/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.18 - (hide annotations) (download) (as text)
Sat Jun 23 12:21:01 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.17: +502 -83 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	23 Jun 2007 11:53:34 -0000
	* HTML-tokenizer.t: Support for new DOCTYPE token syntax.

	* tokenizer-test-1.test: Tests for DOCTYPE tokens
	are revised and added.

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	23 Jun 2007 11:57:47 -0000
	* HTML.pm.src: HTML5 revisions 908, 909, 912, and 913 (quirks mode).

	* NanoDOM.pm (manakai_is_html, manakai_compat_mode, compat_mode):
	New attributes.

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.18 our $VERSION=do{my @r=(q$Revision: 1.17 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 ## ISSUE:
6     ## var doc = implementation.createDocument (null, null, null);
7     ## doc.write ('');
8     ## alert (doc.compatMode);
9 wakaba 1.1
10     my $permitted_slash_tag_name = {
11     base => 1,
12     link => 1,
13     meta => 1,
14     hr => 1,
15     br => 1,
16     img=> 1,
17     embed => 1,
18     param => 1,
19     area => 1,
20     col => 1,
21     input => 1,
22     };
23    
24 wakaba 1.4 my $c1_entity_char = {
25 wakaba 1.10 0x80 => 0x20AC,
26     0x81 => 0xFFFD,
27     0x82 => 0x201A,
28     0x83 => 0x0192,
29     0x84 => 0x201E,
30     0x85 => 0x2026,
31     0x86 => 0x2020,
32     0x87 => 0x2021,
33     0x88 => 0x02C6,
34     0x89 => 0x2030,
35     0x8A => 0x0160,
36     0x8B => 0x2039,
37     0x8C => 0x0152,
38     0x8D => 0xFFFD,
39     0x8E => 0x017D,
40     0x8F => 0xFFFD,
41     0x90 => 0xFFFD,
42     0x91 => 0x2018,
43     0x92 => 0x2019,
44     0x93 => 0x201C,
45     0x94 => 0x201D,
46     0x95 => 0x2022,
47     0x96 => 0x2013,
48     0x97 => 0x2014,
49     0x98 => 0x02DC,
50     0x99 => 0x2122,
51     0x9A => 0x0161,
52     0x9B => 0x203A,
53     0x9C => 0x0153,
54     0x9D => 0xFFFD,
55     0x9E => 0x017E,
56     0x9F => 0x0178,
57 wakaba 1.4 }; # $c1_entity_char
58 wakaba 1.1
59     my $special_category = {
60     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
61     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
62     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
63     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
64     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
65     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
66     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
67     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
68     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
69     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
70     };
71     my $scoping_category = {
72     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
73     table => 1, td => 1, th => 1,
74     };
75     my $formatting_category = {
76     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
77     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
78     };
79     # $phrasing_category: all other elements
80    
81     sub parse_string ($$$;$) {
82     my $self = shift->new;
83     my $s = \$_[0];
84     $self->{document} = $_[1];
85    
86 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
87    
88 wakaba 1.1 my $i = 0;
89 wakaba 1.3 my $line = 1;
90     my $column = 0;
91 wakaba 1.1 $self->{set_next_input_character} = sub {
92     my $self = shift;
93 wakaba 1.13
94     pop @{$self->{prev_input_character}};
95     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
96    
97 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
98     $self->{next_input_character} = ord substr $$s, $i++, 1;
99 wakaba 1.3 $column++;
100 wakaba 1.1
101 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
102     $line++;
103     $column = 0;
104     } elsif ($self->{next_input_character} == 0x000D) { # CR
105 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
106 wakaba 1.1 $self->{next_input_character} = 0x000A; # LF # MUST
107 wakaba 1.3 $line++;
108 wakaba 1.4 $column = 0;
109 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
110     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
111     } elsif ($self->{next_input_character} == 0x0000) { # NULL
112 wakaba 1.8 !!!parse-error (type => 'NULL');
113 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
114     }
115     };
116 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
117     $self->{next_input_character} = -1;
118 wakaba 1.1
119 wakaba 1.3 my $onerror = $_[2] || sub {
120     my (%opt) = @_;
121     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
122     };
123     $self->{parse_error} = sub {
124     $onerror->(@_, line => $line, column => $column);
125 wakaba 1.1 };
126    
127     $self->_initialize_tokenizer;
128     $self->_initialize_tree_constructor;
129     $self->_construct_tree;
130     $self->_terminate_tree_constructor;
131    
132     return $self->{document};
133     } # parse_string
134    
135     sub new ($) {
136     my $class = shift;
137     my $self = bless {}, $class;
138     $self->{set_next_input_character} = sub {
139     $self->{next_input_character} = -1;
140     };
141     $self->{parse_error} = sub {
142     #
143     };
144     return $self;
145     } # new
146    
147     ## Implementations MUST act as if state machine in the spec
148    
149     sub _initialize_tokenizer ($) {
150     my $self = shift;
151     $self->{state} = 'data'; # MUST
152     $self->{content_model_flag} = 'PCDATA'; # be
153     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
154     undef $self->{current_attribute};
155     undef $self->{last_emitted_start_tag_name};
156     undef $self->{last_attribute_value_state};
157     $self->{char} = [];
158     # $self->{next_input_character}
159     !!!next-input-character;
160     $self->{token} = [];
161 wakaba 1.18 # $self->{escape}
162 wakaba 1.1 } # _initialize_tokenizer
163    
164     ## A token has:
165     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
166     ## 'character', or 'end-of-file'
167 wakaba 1.18 ## ->{name} (DOCTYPE, start tag (tag name), end tag (tag name))
168     ## ->{public_identifier} (DOCTYPE)
169     ## ->{system_identifier} (DOCTYPE)
170     ## ->{correct} == 1 or 0 (DOCTYPE)
171 wakaba 1.1 ## ->{attributes} isa HASH (start tag, end tag)
172     ## ->{data} (comment, character)
173    
174     ## Emitted token MUST immediately be handled by the tree construction state.
175    
176     ## Before each step, UA MAY check to see if either one of the scripts in
177     ## "list of scripts that will execute as soon as possible" or the first
178     ## script in the "list of scripts that will execute asynchronously",
179     ## has completed loading. If one has, then it MUST be executed
180     ## and removed from the list.
181    
182     sub _get_next_token ($) {
183     my $self = shift;
184     if (@{$self->{token}}) {
185     return shift @{$self->{token}};
186     }
187    
188     A: {
189     if ($self->{state} eq 'data') {
190     if ($self->{next_input_character} == 0x0026) { # &
191     if ($self->{content_model_flag} eq 'PCDATA' or
192     $self->{content_model_flag} eq 'RCDATA') {
193     $self->{state} = 'entity data';
194     !!!next-input-character;
195     redo A;
196     } else {
197     #
198     }
199 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
200     if ($self->{content_model_flag} eq 'RCDATA' or
201     $self->{content_model_flag} eq 'CDATA') {
202     unless ($self->{escape}) {
203     if ($self->{prev_input_character}->[0] == 0x002D and # -
204     $self->{prev_input_character}->[1] == 0x0021 and # !
205     $self->{prev_input_character}->[2] == 0x003C) { # <
206     $self->{escape} = 1;
207     }
208     }
209     }
210    
211     #
212 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
213 wakaba 1.13 if ($self->{content_model_flag} eq 'PCDATA' or
214     (($self->{content_model_flag} eq 'CDATA' or
215     $self->{content_model_flag} eq 'RCDATA') and
216     not $self->{escape})) {
217 wakaba 1.1 $self->{state} = 'tag open';
218     !!!next-input-character;
219     redo A;
220     } else {
221     #
222     }
223 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
224     if ($self->{escape} and
225     ($self->{content_model_flag} eq 'RCDATA' or
226     $self->{content_model_flag} eq 'CDATA')) {
227     if ($self->{prev_input_character}->[0] == 0x002D and # -
228     $self->{prev_input_character}->[1] == 0x002D) { # -
229     delete $self->{escape};
230     }
231     }
232    
233     #
234 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
235     !!!emit ({type => 'end-of-file'});
236     last A; ## TODO: ok?
237     }
238     # Anything else
239     my $token = {type => 'character',
240     data => chr $self->{next_input_character}};
241     ## Stay in the data state
242     !!!next-input-character;
243    
244     !!!emit ($token);
245    
246     redo A;
247     } elsif ($self->{state} eq 'entity data') {
248     ## (cannot happen in CDATA state)
249    
250     my $token = $self->_tokenize_attempt_to_consume_an_entity;
251    
252     $self->{state} = 'data';
253     # next-input-character is already done
254    
255     unless (defined $token) {
256     !!!emit ({type => 'character', data => '&'});
257     } else {
258     !!!emit ($token);
259     }
260    
261     redo A;
262     } elsif ($self->{state} eq 'tag open') {
263     if ($self->{content_model_flag} eq 'RCDATA' or
264     $self->{content_model_flag} eq 'CDATA') {
265     if ($self->{next_input_character} == 0x002F) { # /
266     !!!next-input-character;
267     $self->{state} = 'close tag open';
268     redo A;
269     } else {
270     ## reconsume
271     $self->{state} = 'data';
272    
273     !!!emit ({type => 'character', data => '<'});
274    
275     redo A;
276     }
277     } elsif ($self->{content_model_flag} eq 'PCDATA') {
278     if ($self->{next_input_character} == 0x0021) { # !
279     $self->{state} = 'markup declaration open';
280     !!!next-input-character;
281     redo A;
282     } elsif ($self->{next_input_character} == 0x002F) { # /
283     $self->{state} = 'close tag open';
284     !!!next-input-character;
285     redo A;
286     } elsif (0x0041 <= $self->{next_input_character} and
287     $self->{next_input_character} <= 0x005A) { # A..Z
288     $self->{current_token}
289     = {type => 'start tag',
290     tag_name => chr ($self->{next_input_character} + 0x0020)};
291     $self->{state} = 'tag name';
292     !!!next-input-character;
293     redo A;
294     } elsif (0x0061 <= $self->{next_input_character} and
295     $self->{next_input_character} <= 0x007A) { # a..z
296     $self->{current_token} = {type => 'start tag',
297     tag_name => chr ($self->{next_input_character})};
298     $self->{state} = 'tag name';
299     !!!next-input-character;
300     redo A;
301     } elsif ($self->{next_input_character} == 0x003E) { # >
302 wakaba 1.3 !!!parse-error (type => 'empty start tag');
303 wakaba 1.1 $self->{state} = 'data';
304     !!!next-input-character;
305    
306     !!!emit ({type => 'character', data => '<>'});
307    
308     redo A;
309     } elsif ($self->{next_input_character} == 0x003F) { # ?
310 wakaba 1.3 !!!parse-error (type => 'pio');
311 wakaba 1.1 $self->{state} = 'bogus comment';
312     ## $self->{next_input_character} is intentionally left as is
313     redo A;
314     } else {
315 wakaba 1.3 !!!parse-error (type => 'bare stago');
316 wakaba 1.1 $self->{state} = 'data';
317     ## reconsume
318    
319     !!!emit ({type => 'character', data => '<'});
320    
321     redo A;
322     }
323     } else {
324     die "$0: $self->{content_model_flag}: Unknown content model flag";
325     }
326     } elsif ($self->{state} eq 'close tag open') {
327     if ($self->{content_model_flag} eq 'RCDATA' or
328     $self->{content_model_flag} eq 'CDATA') {
329     my @next_char;
330     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
331     push @next_char, $self->{next_input_character};
332     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
333     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
334     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
335     !!!next-input-character;
336     next TAGNAME;
337     } else {
338 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
339 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
340     !!!back-next-input-character (@next_char);
341     $self->{state} = 'data';
342    
343     !!!emit ({type => 'character', data => '</'});
344    
345     redo A;
346     }
347     }
348     push @next_char, $self->{next_input_character};
349    
350     unless ($self->{next_input_character} == 0x0009 or # HT
351     $self->{next_input_character} == 0x000A or # LF
352     $self->{next_input_character} == 0x000B or # VT
353     $self->{next_input_character} == 0x000C or # FF
354     $self->{next_input_character} == 0x0020 or # SP
355     $self->{next_input_character} == 0x003E or # >
356     $self->{next_input_character} == 0x002F or # /
357     $self->{next_input_character} == -1) {
358 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
359 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
360     !!!back-next-input-character (@next_char);
361     $self->{state} = 'data';
362    
363     !!!emit ({type => 'character', data => '</'});
364    
365     redo A;
366     } else {
367     $self->{next_input_character} = shift @next_char;
368     !!!back-next-input-character (@next_char);
369     # and consume...
370     }
371     }
372    
373     if (0x0041 <= $self->{next_input_character} and
374     $self->{next_input_character} <= 0x005A) { # A..Z
375     $self->{current_token} = {type => 'end tag',
376     tag_name => chr ($self->{next_input_character} + 0x0020)};
377     $self->{state} = 'tag name';
378     !!!next-input-character;
379     redo A;
380     } elsif (0x0061 <= $self->{next_input_character} and
381     $self->{next_input_character} <= 0x007A) { # a..z
382     $self->{current_token} = {type => 'end tag',
383     tag_name => chr ($self->{next_input_character})};
384     $self->{state} = 'tag name';
385     !!!next-input-character;
386     redo A;
387     } elsif ($self->{next_input_character} == 0x003E) { # >
388 wakaba 1.3 !!!parse-error (type => 'empty end tag');
389 wakaba 1.1 $self->{state} = 'data';
390     !!!next-input-character;
391     redo A;
392     } elsif ($self->{next_input_character} == -1) {
393 wakaba 1.3 !!!parse-error (type => 'bare etago');
394 wakaba 1.1 $self->{state} = 'data';
395     # reconsume
396    
397     !!!emit ({type => 'character', data => '</'});
398    
399     redo A;
400     } else {
401 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
402 wakaba 1.1 $self->{state} = 'bogus comment';
403     ## $self->{next_input_character} is intentionally left as is
404     redo A;
405     }
406     } elsif ($self->{state} eq 'tag name') {
407     if ($self->{next_input_character} == 0x0009 or # HT
408     $self->{next_input_character} == 0x000A or # LF
409     $self->{next_input_character} == 0x000B or # VT
410     $self->{next_input_character} == 0x000C or # FF
411     $self->{next_input_character} == 0x0020) { # SP
412     $self->{state} = 'before attribute name';
413     !!!next-input-character;
414     redo A;
415     } elsif ($self->{next_input_character} == 0x003E) { # >
416     if ($self->{current_token}->{type} eq 'start tag') {
417     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
418     } elsif ($self->{current_token}->{type} eq 'end tag') {
419     $self->{content_model_flag} = 'PCDATA'; # MUST
420     if ($self->{current_token}->{attributes}) {
421 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
422 wakaba 1.1 }
423     } else {
424     die "$0: $self->{current_token}->{type}: Unknown token type";
425     }
426     $self->{state} = 'data';
427     !!!next-input-character;
428    
429     !!!emit ($self->{current_token}); # start tag or end tag
430     undef $self->{current_token};
431    
432     redo A;
433     } elsif (0x0041 <= $self->{next_input_character} and
434     $self->{next_input_character} <= 0x005A) { # A..Z
435     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
436     # start tag or end tag
437     ## Stay in this state
438     !!!next-input-character;
439     redo A;
440 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
441 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
442 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
443     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
444     } elsif ($self->{current_token}->{type} eq 'end tag') {
445     $self->{content_model_flag} = 'PCDATA'; # MUST
446     if ($self->{current_token}->{attributes}) {
447 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
448 wakaba 1.1 }
449     } else {
450     die "$0: $self->{current_token}->{type}: Unknown token type";
451     }
452     $self->{state} = 'data';
453     # reconsume
454    
455     !!!emit ($self->{current_token}); # start tag or end tag
456     undef $self->{current_token};
457    
458     redo A;
459     } elsif ($self->{next_input_character} == 0x002F) { # /
460     !!!next-input-character;
461     if ($self->{next_input_character} == 0x003E and # >
462     $self->{current_token}->{type} eq 'start tag' and
463     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
464     # permitted slash
465     #
466     } else {
467 wakaba 1.3 !!!parse-error (type => 'nestc');
468 wakaba 1.1 }
469     $self->{state} = 'before attribute name';
470     # next-input-character is already done
471     redo A;
472     } else {
473     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
474     # start tag or end tag
475     ## Stay in the state
476     !!!next-input-character;
477     redo A;
478     }
479     } elsif ($self->{state} eq 'before attribute name') {
480     if ($self->{next_input_character} == 0x0009 or # HT
481     $self->{next_input_character} == 0x000A or # LF
482     $self->{next_input_character} == 0x000B or # VT
483     $self->{next_input_character} == 0x000C or # FF
484     $self->{next_input_character} == 0x0020) { # SP
485     ## Stay in the state
486     !!!next-input-character;
487     redo A;
488     } elsif ($self->{next_input_character} == 0x003E) { # >
489     if ($self->{current_token}->{type} eq 'start tag') {
490     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
491     } elsif ($self->{current_token}->{type} eq 'end tag') {
492     $self->{content_model_flag} = 'PCDATA'; # MUST
493     if ($self->{current_token}->{attributes}) {
494 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
495 wakaba 1.1 }
496     } else {
497     die "$0: $self->{current_token}->{type}: Unknown token type";
498     }
499     $self->{state} = 'data';
500     !!!next-input-character;
501    
502     !!!emit ($self->{current_token}); # start tag or end tag
503     undef $self->{current_token};
504    
505     redo A;
506     } elsif (0x0041 <= $self->{next_input_character} and
507     $self->{next_input_character} <= 0x005A) { # A..Z
508     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
509     value => ''};
510     $self->{state} = 'attribute name';
511     !!!next-input-character;
512     redo A;
513     } elsif ($self->{next_input_character} == 0x002F) { # /
514     !!!next-input-character;
515     if ($self->{next_input_character} == 0x003E and # >
516     $self->{current_token}->{type} eq 'start tag' and
517     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
518     # permitted slash
519     #
520     } else {
521 wakaba 1.3 !!!parse-error (type => 'nestc');
522 wakaba 1.1 }
523     ## Stay in the state
524     # next-input-character is already done
525     redo A;
526 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
527 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
528 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
529     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
530     } elsif ($self->{current_token}->{type} eq 'end tag') {
531     $self->{content_model_flag} = 'PCDATA'; # MUST
532     if ($self->{current_token}->{attributes}) {
533 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
534 wakaba 1.1 }
535     } else {
536     die "$0: $self->{current_token}->{type}: Unknown token type";
537     }
538     $self->{state} = 'data';
539     # reconsume
540    
541     !!!emit ($self->{current_token}); # start tag or end tag
542     undef $self->{current_token};
543    
544     redo A;
545     } else {
546     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
547     value => ''};
548     $self->{state} = 'attribute name';
549     !!!next-input-character;
550     redo A;
551     }
552     } elsif ($self->{state} eq 'attribute name') {
553     my $before_leave = sub {
554     if (exists $self->{current_token}->{attributes} # start tag or end tag
555     ->{$self->{current_attribute}->{name}}) { # MUST
556 wakaba 1.3 !!!parse-error (type => 'dupulicate attribute');
557 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
558     } else {
559     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
560     = $self->{current_attribute};
561     }
562     }; # $before_leave
563    
564     if ($self->{next_input_character} == 0x0009 or # HT
565     $self->{next_input_character} == 0x000A or # LF
566     $self->{next_input_character} == 0x000B or # VT
567     $self->{next_input_character} == 0x000C or # FF
568     $self->{next_input_character} == 0x0020) { # SP
569     $before_leave->();
570     $self->{state} = 'after attribute name';
571     !!!next-input-character;
572     redo A;
573     } elsif ($self->{next_input_character} == 0x003D) { # =
574     $before_leave->();
575     $self->{state} = 'before attribute value';
576     !!!next-input-character;
577     redo A;
578     } elsif ($self->{next_input_character} == 0x003E) { # >
579     $before_leave->();
580     if ($self->{current_token}->{type} eq 'start tag') {
581     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
582     } elsif ($self->{current_token}->{type} eq 'end tag') {
583     $self->{content_model_flag} = 'PCDATA'; # MUST
584     if ($self->{current_token}->{attributes}) {
585 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
586 wakaba 1.1 }
587     } else {
588     die "$0: $self->{current_token}->{type}: Unknown token type";
589     }
590     $self->{state} = 'data';
591     !!!next-input-character;
592    
593     !!!emit ($self->{current_token}); # start tag or end tag
594     undef $self->{current_token};
595    
596     redo A;
597     } elsif (0x0041 <= $self->{next_input_character} and
598     $self->{next_input_character} <= 0x005A) { # A..Z
599     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
600     ## Stay in the state
601     !!!next-input-character;
602     redo A;
603     } elsif ($self->{next_input_character} == 0x002F) { # /
604     $before_leave->();
605     !!!next-input-character;
606     if ($self->{next_input_character} == 0x003E and # >
607     $self->{current_token}->{type} eq 'start tag' and
608     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
609     # permitted slash
610     #
611     } else {
612 wakaba 1.3 !!!parse-error (type => 'nestc');
613 wakaba 1.1 }
614     $self->{state} = 'before attribute name';
615     # next-input-character is already done
616     redo A;
617 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
618 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
619 wakaba 1.1 $before_leave->();
620     if ($self->{current_token}->{type} eq 'start tag') {
621     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
622     } elsif ($self->{current_token}->{type} eq 'end tag') {
623     $self->{content_model_flag} = 'PCDATA'; # MUST
624     if ($self->{current_token}->{attributes}) {
625 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
626 wakaba 1.1 }
627     } else {
628     die "$0: $self->{current_token}->{type}: Unknown token type";
629     }
630     $self->{state} = 'data';
631     # reconsume
632    
633     !!!emit ($self->{current_token}); # start tag or end tag
634     undef $self->{current_token};
635    
636     redo A;
637     } else {
638     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
639     ## Stay in the state
640     !!!next-input-character;
641     redo A;
642     }
643     } elsif ($self->{state} eq 'after attribute name') {
644     if ($self->{next_input_character} == 0x0009 or # HT
645     $self->{next_input_character} == 0x000A or # LF
646     $self->{next_input_character} == 0x000B or # VT
647     $self->{next_input_character} == 0x000C or # FF
648     $self->{next_input_character} == 0x0020) { # SP
649     ## Stay in the state
650     !!!next-input-character;
651     redo A;
652     } elsif ($self->{next_input_character} == 0x003D) { # =
653     $self->{state} = 'before attribute value';
654     !!!next-input-character;
655     redo A;
656     } elsif ($self->{next_input_character} == 0x003E) { # >
657     if ($self->{current_token}->{type} eq 'start tag') {
658     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
659     } elsif ($self->{current_token}->{type} eq 'end tag') {
660     $self->{content_model_flag} = 'PCDATA'; # MUST
661     if ($self->{current_token}->{attributes}) {
662 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
663 wakaba 1.1 }
664     } else {
665     die "$0: $self->{current_token}->{type}: Unknown token type";
666     }
667     $self->{state} = 'data';
668     !!!next-input-character;
669    
670     !!!emit ($self->{current_token}); # start tag or end tag
671     undef $self->{current_token};
672    
673     redo A;
674     } elsif (0x0041 <= $self->{next_input_character} and
675     $self->{next_input_character} <= 0x005A) { # A..Z
676     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
677     value => ''};
678     $self->{state} = 'attribute name';
679     !!!next-input-character;
680     redo A;
681     } elsif ($self->{next_input_character} == 0x002F) { # /
682     !!!next-input-character;
683     if ($self->{next_input_character} == 0x003E and # >
684     $self->{current_token}->{type} eq 'start tag' and
685     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
686     # permitted slash
687     #
688     } else {
689 wakaba 1.3 !!!parse-error (type => 'nestc');
690 wakaba 1.1 }
691     $self->{state} = 'before attribute name';
692     # next-input-character is already done
693     redo A;
694 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
695 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
696 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
697     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
698     } elsif ($self->{current_token}->{type} eq 'end tag') {
699     $self->{content_model_flag} = 'PCDATA'; # MUST
700     if ($self->{current_token}->{attributes}) {
701 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
702 wakaba 1.1 }
703     } else {
704     die "$0: $self->{current_token}->{type}: Unknown token type";
705     }
706     $self->{state} = 'data';
707     # reconsume
708    
709     !!!emit ($self->{current_token}); # start tag or end tag
710     undef $self->{current_token};
711    
712     redo A;
713     } else {
714     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
715     value => ''};
716     $self->{state} = 'attribute name';
717     !!!next-input-character;
718     redo A;
719     }
720     } elsif ($self->{state} eq 'before attribute value') {
721     if ($self->{next_input_character} == 0x0009 or # HT
722     $self->{next_input_character} == 0x000A or # LF
723     $self->{next_input_character} == 0x000B or # VT
724     $self->{next_input_character} == 0x000C or # FF
725     $self->{next_input_character} == 0x0020) { # SP
726     ## Stay in the state
727     !!!next-input-character;
728     redo A;
729     } elsif ($self->{next_input_character} == 0x0022) { # "
730     $self->{state} = 'attribute value (double-quoted)';
731     !!!next-input-character;
732     redo A;
733     } elsif ($self->{next_input_character} == 0x0026) { # &
734     $self->{state} = 'attribute value (unquoted)';
735     ## reconsume
736     redo A;
737     } elsif ($self->{next_input_character} == 0x0027) { # '
738     $self->{state} = 'attribute value (single-quoted)';
739     !!!next-input-character;
740     redo A;
741     } elsif ($self->{next_input_character} == 0x003E) { # >
742     if ($self->{current_token}->{type} eq 'start tag') {
743     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
744     } elsif ($self->{current_token}->{type} eq 'end tag') {
745     $self->{content_model_flag} = 'PCDATA'; # MUST
746     if ($self->{current_token}->{attributes}) {
747 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
748 wakaba 1.1 }
749     } else {
750     die "$0: $self->{current_token}->{type}: Unknown token type";
751     }
752     $self->{state} = 'data';
753     !!!next-input-character;
754    
755     !!!emit ($self->{current_token}); # start tag or end tag
756     undef $self->{current_token};
757    
758     redo A;
759 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
760 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
761 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
762     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
763     } elsif ($self->{current_token}->{type} eq 'end tag') {
764     $self->{content_model_flag} = 'PCDATA'; # MUST
765     if ($self->{current_token}->{attributes}) {
766 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
767 wakaba 1.1 }
768     } else {
769     die "$0: $self->{current_token}->{type}: Unknown token type";
770     }
771     $self->{state} = 'data';
772     ## reconsume
773    
774     !!!emit ($self->{current_token}); # start tag or end tag
775     undef $self->{current_token};
776    
777     redo A;
778     } else {
779     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
780     $self->{state} = 'attribute value (unquoted)';
781     !!!next-input-character;
782     redo A;
783     }
784     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
785     if ($self->{next_input_character} == 0x0022) { # "
786     $self->{state} = 'before attribute name';
787     !!!next-input-character;
788     redo A;
789     } elsif ($self->{next_input_character} == 0x0026) { # &
790     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
791     $self->{state} = 'entity in attribute value';
792     !!!next-input-character;
793     redo A;
794     } elsif ($self->{next_input_character} == -1) {
795 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
796 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
797     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
798     } elsif ($self->{current_token}->{type} eq 'end tag') {
799     $self->{content_model_flag} = 'PCDATA'; # MUST
800     if ($self->{current_token}->{attributes}) {
801 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
802 wakaba 1.1 }
803     } else {
804     die "$0: $self->{current_token}->{type}: Unknown token type";
805     }
806     $self->{state} = 'data';
807     ## reconsume
808    
809     !!!emit ($self->{current_token}); # start tag or end tag
810     undef $self->{current_token};
811    
812     redo A;
813     } else {
814     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
815     ## Stay in the state
816     !!!next-input-character;
817     redo A;
818     }
819     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
820     if ($self->{next_input_character} == 0x0027) { # '
821     $self->{state} = 'before attribute name';
822     !!!next-input-character;
823     redo A;
824     } elsif ($self->{next_input_character} == 0x0026) { # &
825     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
826     $self->{state} = 'entity in attribute value';
827     !!!next-input-character;
828     redo A;
829     } elsif ($self->{next_input_character} == -1) {
830 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
831 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
832     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
833     } elsif ($self->{current_token}->{type} eq 'end tag') {
834     $self->{content_model_flag} = 'PCDATA'; # MUST
835     if ($self->{current_token}->{attributes}) {
836 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
837 wakaba 1.1 }
838     } else {
839     die "$0: $self->{current_token}->{type}: Unknown token type";
840     }
841     $self->{state} = 'data';
842     ## reconsume
843    
844     !!!emit ($self->{current_token}); # start tag or end tag
845     undef $self->{current_token};
846    
847     redo A;
848     } else {
849     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
850     ## Stay in the state
851     !!!next-input-character;
852     redo A;
853     }
854     } elsif ($self->{state} eq 'attribute value (unquoted)') {
855     if ($self->{next_input_character} == 0x0009 or # HT
856     $self->{next_input_character} == 0x000A or # LF
857     $self->{next_input_character} == 0x000B or # HT
858     $self->{next_input_character} == 0x000C or # FF
859     $self->{next_input_character} == 0x0020) { # SP
860     $self->{state} = 'before attribute name';
861     !!!next-input-character;
862     redo A;
863     } elsif ($self->{next_input_character} == 0x0026) { # &
864     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
865     $self->{state} = 'entity in attribute value';
866     !!!next-input-character;
867     redo A;
868     } elsif ($self->{next_input_character} == 0x003E) { # >
869     if ($self->{current_token}->{type} eq 'start tag') {
870     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
871     } elsif ($self->{current_token}->{type} eq 'end tag') {
872     $self->{content_model_flag} = 'PCDATA'; # MUST
873     if ($self->{current_token}->{attributes}) {
874 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
875 wakaba 1.1 }
876     } else {
877     die "$0: $self->{current_token}->{type}: Unknown token type";
878     }
879     $self->{state} = 'data';
880     !!!next-input-character;
881    
882     !!!emit ($self->{current_token}); # start tag or end tag
883     undef $self->{current_token};
884    
885     redo A;
886 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
887 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
888 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
889     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
890     } elsif ($self->{current_token}->{type} eq 'end tag') {
891     $self->{content_model_flag} = 'PCDATA'; # MUST
892     if ($self->{current_token}->{attributes}) {
893 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
894 wakaba 1.1 }
895     } else {
896     die "$0: $self->{current_token}->{type}: Unknown token type";
897     }
898     $self->{state} = 'data';
899     ## reconsume
900    
901     !!!emit ($self->{current_token}); # start tag or end tag
902     undef $self->{current_token};
903    
904     redo A;
905     } else {
906     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
907     ## Stay in the state
908     !!!next-input-character;
909     redo A;
910     }
911     } elsif ($self->{state} eq 'entity in attribute value') {
912     my $token = $self->_tokenize_attempt_to_consume_an_entity;
913    
914     unless (defined $token) {
915     $self->{current_attribute}->{value} .= '&';
916     } else {
917     $self->{current_attribute}->{value} .= $token->{data};
918     ## ISSUE: spec says "append the returned character token to the current attribute's value"
919     }
920    
921     $self->{state} = $self->{last_attribute_value_state};
922     # next-input-character is already done
923     redo A;
924     } elsif ($self->{state} eq 'bogus comment') {
925     ## (only happen if PCDATA state)
926    
927     my $token = {type => 'comment', data => ''};
928    
929     BC: {
930     if ($self->{next_input_character} == 0x003E) { # >
931     $self->{state} = 'data';
932     !!!next-input-character;
933    
934     !!!emit ($token);
935    
936     redo A;
937     } elsif ($self->{next_input_character} == -1) {
938     $self->{state} = 'data';
939     ## reconsume
940    
941     !!!emit ($token);
942    
943     redo A;
944     } else {
945     $token->{data} .= chr ($self->{next_input_character});
946     !!!next-input-character;
947     redo BC;
948     }
949     } # BC
950     } elsif ($self->{state} eq 'markup declaration open') {
951     ## (only happen if PCDATA state)
952    
953     my @next_char;
954     push @next_char, $self->{next_input_character};
955    
956     if ($self->{next_input_character} == 0x002D) { # -
957     !!!next-input-character;
958     push @next_char, $self->{next_input_character};
959     if ($self->{next_input_character} == 0x002D) { # -
960     $self->{current_token} = {type => 'comment', data => ''};
961     $self->{state} = 'comment';
962     !!!next-input-character;
963     redo A;
964     }
965     } elsif ($self->{next_input_character} == 0x0044 or # D
966     $self->{next_input_character} == 0x0064) { # d
967     !!!next-input-character;
968     push @next_char, $self->{next_input_character};
969     if ($self->{next_input_character} == 0x004F or # O
970     $self->{next_input_character} == 0x006F) { # o
971     !!!next-input-character;
972     push @next_char, $self->{next_input_character};
973     if ($self->{next_input_character} == 0x0043 or # C
974     $self->{next_input_character} == 0x0063) { # c
975     !!!next-input-character;
976     push @next_char, $self->{next_input_character};
977     if ($self->{next_input_character} == 0x0054 or # T
978     $self->{next_input_character} == 0x0074) { # t
979     !!!next-input-character;
980     push @next_char, $self->{next_input_character};
981     if ($self->{next_input_character} == 0x0059 or # Y
982     $self->{next_input_character} == 0x0079) { # y
983     !!!next-input-character;
984     push @next_char, $self->{next_input_character};
985     if ($self->{next_input_character} == 0x0050 or # P
986     $self->{next_input_character} == 0x0070) { # p
987     !!!next-input-character;
988     push @next_char, $self->{next_input_character};
989     if ($self->{next_input_character} == 0x0045 or # E
990     $self->{next_input_character} == 0x0065) { # e
991     ## ISSUE: What a stupid code this is!
992     $self->{state} = 'DOCTYPE';
993     !!!next-input-character;
994     redo A;
995     }
996     }
997     }
998     }
999     }
1000     }
1001     }
1002    
1003 wakaba 1.3 !!!parse-error (type => 'bogus comment open');
1004 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1005     !!!back-next-input-character (@next_char);
1006     $self->{state} = 'bogus comment';
1007     redo A;
1008    
1009     ## ISSUE: typos in spec: chacacters, is is a parse error
1010     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1011     } elsif ($self->{state} eq 'comment') {
1012     if ($self->{next_input_character} == 0x002D) { # -
1013     $self->{state} = 'comment dash';
1014     !!!next-input-character;
1015     redo A;
1016     } elsif ($self->{next_input_character} == -1) {
1017 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1018 wakaba 1.1 $self->{state} = 'data';
1019     ## reconsume
1020    
1021     !!!emit ($self->{current_token}); # comment
1022     undef $self->{current_token};
1023    
1024     redo A;
1025     } else {
1026     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1027     ## Stay in the state
1028     !!!next-input-character;
1029     redo A;
1030     }
1031     } elsif ($self->{state} eq 'comment dash') {
1032     if ($self->{next_input_character} == 0x002D) { # -
1033     $self->{state} = 'comment end';
1034     !!!next-input-character;
1035     redo A;
1036     } elsif ($self->{next_input_character} == -1) {
1037 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1038 wakaba 1.1 $self->{state} = 'data';
1039     ## reconsume
1040    
1041     !!!emit ($self->{current_token}); # comment
1042     undef $self->{current_token};
1043    
1044     redo A;
1045     } else {
1046     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1047     $self->{state} = 'comment';
1048     !!!next-input-character;
1049     redo A;
1050     }
1051     } elsif ($self->{state} eq 'comment end') {
1052     if ($self->{next_input_character} == 0x003E) { # >
1053     $self->{state} = 'data';
1054     !!!next-input-character;
1055    
1056     !!!emit ($self->{current_token}); # comment
1057     undef $self->{current_token};
1058    
1059     redo A;
1060     } elsif ($self->{next_input_character} == 0x002D) { # -
1061 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1062 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1063     ## Stay in the state
1064     !!!next-input-character;
1065     redo A;
1066     } elsif ($self->{next_input_character} == -1) {
1067 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1068 wakaba 1.1 $self->{state} = 'data';
1069     ## reconsume
1070    
1071     !!!emit ($self->{current_token}); # comment
1072     undef $self->{current_token};
1073    
1074     redo A;
1075     } else {
1076 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1077 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1078     $self->{state} = 'comment';
1079     !!!next-input-character;
1080     redo A;
1081     }
1082     } elsif ($self->{state} eq 'DOCTYPE') {
1083     if ($self->{next_input_character} == 0x0009 or # HT
1084     $self->{next_input_character} == 0x000A or # LF
1085     $self->{next_input_character} == 0x000B or # VT
1086     $self->{next_input_character} == 0x000C or # FF
1087     $self->{next_input_character} == 0x0020) { # SP
1088     $self->{state} = 'before DOCTYPE name';
1089     !!!next-input-character;
1090     redo A;
1091     } else {
1092 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1093 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1094     ## reconsume
1095     redo A;
1096     }
1097     } elsif ($self->{state} eq 'before DOCTYPE name') {
1098     if ($self->{next_input_character} == 0x0009 or # HT
1099     $self->{next_input_character} == 0x000A or # LF
1100     $self->{next_input_character} == 0x000B or # VT
1101     $self->{next_input_character} == 0x000C or # FF
1102     $self->{next_input_character} == 0x0020) { # SP
1103     ## Stay in the state
1104     !!!next-input-character;
1105     redo A;
1106     } elsif ($self->{next_input_character} == 0x003E) { # >
1107 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1108 wakaba 1.1 $self->{state} = 'data';
1109     !!!next-input-character;
1110    
1111 wakaba 1.18 !!!emit ({type => 'DOCTYPE'}); # incorrect
1112 wakaba 1.1
1113     redo A;
1114     } elsif ($self->{next_input_character} == -1) {
1115 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1116 wakaba 1.1 $self->{state} = 'data';
1117     ## reconsume
1118    
1119 wakaba 1.18 !!!emit ({type => 'DOCTYPE'}); # incorrect
1120 wakaba 1.1
1121     redo A;
1122     } else {
1123 wakaba 1.18 $self->{current_token}
1124     = {type => 'DOCTYPE',
1125     name => chr ($self->{next_input_character}),
1126     correct => 1};
1127 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1128 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1129     !!!next-input-character;
1130     redo A;
1131     }
1132     } elsif ($self->{state} eq 'DOCTYPE name') {
1133 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
1134 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1135     $self->{next_input_character} == 0x000A or # LF
1136     $self->{next_input_character} == 0x000B or # VT
1137     $self->{next_input_character} == 0x000C or # FF
1138     $self->{next_input_character} == 0x0020) { # SP
1139     $self->{state} = 'after DOCTYPE name';
1140     !!!next-input-character;
1141     redo A;
1142     } elsif ($self->{next_input_character} == 0x003E) { # >
1143     $self->{state} = 'data';
1144     !!!next-input-character;
1145    
1146     !!!emit ($self->{current_token}); # DOCTYPE
1147     undef $self->{current_token};
1148    
1149     redo A;
1150     } elsif ($self->{next_input_character} == -1) {
1151 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1152 wakaba 1.1 $self->{state} = 'data';
1153     ## reconsume
1154    
1155 wakaba 1.18 delete $self->{current_token}->{correct};
1156     !!!emit ($self->{current_token}); # DOCTYPE
1157 wakaba 1.1 undef $self->{current_token};
1158    
1159     redo A;
1160     } else {
1161     $self->{current_token}->{name}
1162     .= chr ($self->{next_input_character}); # DOCTYPE
1163     ## Stay in the state
1164     !!!next-input-character;
1165     redo A;
1166     }
1167     } elsif ($self->{state} eq 'after DOCTYPE name') {
1168     if ($self->{next_input_character} == 0x0009 or # HT
1169     $self->{next_input_character} == 0x000A or # LF
1170     $self->{next_input_character} == 0x000B or # VT
1171     $self->{next_input_character} == 0x000C or # FF
1172     $self->{next_input_character} == 0x0020) { # SP
1173     ## Stay in the state
1174     !!!next-input-character;
1175     redo A;
1176     } elsif ($self->{next_input_character} == 0x003E) { # >
1177     $self->{state} = 'data';
1178     !!!next-input-character;
1179    
1180     !!!emit ($self->{current_token}); # DOCTYPE
1181     undef $self->{current_token};
1182    
1183     redo A;
1184     } elsif ($self->{next_input_character} == -1) {
1185 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1186 wakaba 1.1 $self->{state} = 'data';
1187     ## reconsume
1188    
1189 wakaba 1.18 delete $self->{current_token}->{correct};
1190     !!!emit ($self->{current_token}); # DOCTYPE
1191     undef $self->{current_token};
1192    
1193     redo A;
1194     } elsif ($self->{next_input_character} == 0x0050 or # P
1195     $self->{next_input_character} == 0x0070) { # p
1196     !!!next-input-character;
1197     if ($self->{next_input_character} == 0x0055 or # U
1198     $self->{next_input_character} == 0x0075) { # u
1199     !!!next-input-character;
1200     if ($self->{next_input_character} == 0x0042 or # B
1201     $self->{next_input_character} == 0x0062) { # b
1202     !!!next-input-character;
1203     if ($self->{next_input_character} == 0x004C or # L
1204     $self->{next_input_character} == 0x006C) { # l
1205     !!!next-input-character;
1206     if ($self->{next_input_character} == 0x0049 or # I
1207     $self->{next_input_character} == 0x0069) { # i
1208     !!!next-input-character;
1209     if ($self->{next_input_character} == 0x0043 or # C
1210     $self->{next_input_character} == 0x0063) { # c
1211     $self->{state} = 'before DOCTYPE public identifier';
1212     !!!next-input-character;
1213     redo A;
1214     }
1215     }
1216     }
1217     }
1218     }
1219    
1220     #
1221     } elsif ($self->{next_input_character} == 0x0053 or # S
1222     $self->{next_input_character} == 0x0073) { # s
1223     !!!next-input-character;
1224     if ($self->{next_input_character} == 0x0059 or # Y
1225     $self->{next_input_character} == 0x0079) { # y
1226     !!!next-input-character;
1227     if ($self->{next_input_character} == 0x0053 or # S
1228     $self->{next_input_character} == 0x0073) { # s
1229     !!!next-input-character;
1230     if ($self->{next_input_character} == 0x0054 or # T
1231     $self->{next_input_character} == 0x0074) { # t
1232     !!!next-input-character;
1233     if ($self->{next_input_character} == 0x0045 or # E
1234     $self->{next_input_character} == 0x0065) { # e
1235     !!!next-input-character;
1236     if ($self->{next_input_character} == 0x004D or # M
1237     $self->{next_input_character} == 0x006D) { # m
1238     $self->{state} = 'before DOCTYPE system identifier';
1239     !!!next-input-character;
1240     redo A;
1241     }
1242     }
1243     }
1244     }
1245     }
1246    
1247     #
1248     } else {
1249     !!!next-input-character;
1250     #
1251     }
1252    
1253     !!!parse-error (type => 'string after DOCTYPE name');
1254     $self->{state} = 'bogus DOCTYPE';
1255     # next-input-character is already done
1256     redo A;
1257     } elsif ($self->{state} eq 'before DOCTYPE public identifier') {
1258     if ({
1259     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1260     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1261     }->{$self->{next_input_character}}) {
1262     ## Stay in the state
1263     !!!next-input-character;
1264     redo A;
1265     } elsif ($self->{next_input_character} eq 0x0022) { # "
1266     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1267     $self->{state} = 'DOCTYPE public identifier (double-quoted)';
1268     !!!next-input-character;
1269     redo A;
1270     } elsif ($self->{next_input_character} eq 0x0027) { # '
1271     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1272     $self->{state} = 'DOCTYPE public identifier (single-quoted)';
1273     !!!next-input-character;
1274     redo A;
1275     } elsif ($self->{next_input_character} eq 0x003E) { # >
1276     !!!parse-error (type => 'no PUBLIC literal');
1277    
1278     $self->{state} = 'data';
1279     !!!next-input-character;
1280    
1281     delete $self->{current_token}->{correct};
1282     !!!emit ($self->{current_token}); # DOCTYPE
1283     undef $self->{current_token};
1284    
1285     redo A;
1286     } elsif ($self->{next_input_character} == -1) {
1287     !!!parse-error (type => 'unclosed DOCTYPE');
1288    
1289     $self->{state} = 'data';
1290     ## reconsume
1291    
1292     delete $self->{current_token}->{correct};
1293     !!!emit ($self->{current_token}); # DOCTYPE
1294     undef $self->{current_token};
1295    
1296     redo A;
1297     } else {
1298     !!!parse-error (type => 'string after PUBLIC');
1299     $self->{state} = 'bogus DOCTYPE';
1300     !!!next-input-character;
1301     redo A;
1302     }
1303     } elsif ($self->{state} eq 'DOCTYPE public identifier (double-quoted)') {
1304     if ($self->{next_input_character} == 0x0022) { # "
1305     $self->{state} = 'after DOCTYPE public identifier';
1306     !!!next-input-character;
1307     redo A;
1308     } elsif ($self->{next_input_character} == -1) {
1309     !!!parse-error (type => 'unclosed PUBLIC literal');
1310    
1311     $self->{state} = 'data';
1312     ## reconsume
1313    
1314     delete $self->{current_token}->{correct};
1315     !!!emit ($self->{current_token}); # DOCTYPE
1316     undef $self->{current_token};
1317    
1318     redo A;
1319     } else {
1320     $self->{current_token}->{public_identifier} # DOCTYPE
1321     .= chr $self->{next_input_character};
1322     ## Stay in the state
1323     !!!next-input-character;
1324     redo A;
1325     }
1326     } elsif ($self->{state} eq 'DOCTYPE public identifier (single-quoted)') {
1327     if ($self->{next_input_character} == 0x0027) { # '
1328     $self->{state} = 'after DOCTYPE public identifier';
1329     !!!next-input-character;
1330     redo A;
1331     } elsif ($self->{next_input_character} == -1) {
1332     !!!parse-error (type => 'unclosed PUBLIC literal');
1333    
1334     $self->{state} = 'data';
1335     ## reconsume
1336    
1337     delete $self->{current_token}->{correct};
1338     !!!emit ($self->{current_token}); # DOCTYPE
1339     undef $self->{current_token};
1340    
1341     redo A;
1342     } else {
1343     $self->{current_token}->{public_identifier} # DOCTYPE
1344     .= chr $self->{next_input_character};
1345     ## Stay in the state
1346     !!!next-input-character;
1347     redo A;
1348     }
1349     } elsif ($self->{state} eq 'after DOCTYPE public identifier') {
1350     if ({
1351     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1352     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1353     }->{$self->{next_input_character}}) {
1354     ## Stay in the state
1355     !!!next-input-character;
1356     redo A;
1357     } elsif ($self->{next_input_character} == 0x0022) { # "
1358     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1359     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
1360     !!!next-input-character;
1361     redo A;
1362     } elsif ($self->{next_input_character} == 0x0027) { # '
1363     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1364     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
1365     !!!next-input-character;
1366     redo A;
1367     } elsif ($self->{next_input_character} == 0x003E) { # >
1368     $self->{state} = 'data';
1369     !!!next-input-character;
1370    
1371     !!!emit ($self->{current_token}); # DOCTYPE
1372     undef $self->{current_token};
1373    
1374     redo A;
1375     } elsif ($self->{next_input_character} == -1) {
1376     !!!parse-error (type => 'unclosed DOCTYPE');
1377    
1378     $self->{state} = 'data';
1379     ## recomsume
1380    
1381     delete $self->{current_token}->{correct};
1382     !!!emit ($self->{current_token}); # DOCTYPE
1383     undef $self->{current_token};
1384    
1385     redo A;
1386     } else {
1387     !!!parse-error (type => 'string after PUBLIC literal');
1388     $self->{state} = 'bogus DOCTYPE';
1389     !!!next-input-character;
1390     redo A;
1391     }
1392     } elsif ($self->{state} eq 'before DOCTYPE system identifier') {
1393     if ({
1394     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1395     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1396     }->{$self->{next_input_character}}) {
1397     ## Stay in the state
1398     !!!next-input-character;
1399     redo A;
1400     } elsif ($self->{next_input_character} == 0x0022) { # "
1401     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1402     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
1403     !!!next-input-character;
1404     redo A;
1405     } elsif ($self->{next_input_character} == 0x0027) { # '
1406     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1407     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
1408     !!!next-input-character;
1409     redo A;
1410     } elsif ($self->{next_input_character} == 0x003E) { # >
1411     !!!parse-error (type => 'no SYSTEM literal');
1412     $self->{state} = 'data';
1413     !!!next-input-character;
1414    
1415     delete $self->{current_token}->{correct};
1416     !!!emit ($self->{current_token}); # DOCTYPE
1417     undef $self->{current_token};
1418    
1419     redo A;
1420     } elsif ($self->{next_input_character} == -1) {
1421     !!!parse-error (type => 'unclosed DOCTYPE');
1422    
1423     $self->{state} = 'data';
1424     ## recomsume
1425    
1426     delete $self->{current_token}->{correct};
1427     !!!emit ($self->{current_token}); # DOCTYPE
1428     undef $self->{current_token};
1429    
1430     redo A;
1431     } else {
1432     !!!parse-error (type => 'string after PUBLIC literal');
1433     $self->{state} = 'bogus DOCTYPE';
1434     !!!next-input-character;
1435     redo A;
1436     }
1437     } elsif ($self->{state} eq 'DOCTYPE system identifier (double-quoted)') {
1438     if ($self->{next_input_character} == 0x0022) { # "
1439     $self->{state} = 'after DOCTYPE system identifier';
1440     !!!next-input-character;
1441     redo A;
1442     } elsif ($self->{next_input_character} == -1) {
1443     !!!parse-error (type => 'unclosed SYSTEM literal');
1444    
1445     $self->{state} = 'data';
1446     ## reconsume
1447    
1448     delete $self->{current_token}->{correct};
1449     !!!emit ($self->{current_token}); # DOCTYPE
1450     undef $self->{current_token};
1451    
1452     redo A;
1453     } else {
1454     $self->{current_token}->{system_identifier} # DOCTYPE
1455     .= chr $self->{next_input_character};
1456     ## Stay in the state
1457     !!!next-input-character;
1458     redo A;
1459     }
1460     } elsif ($self->{state} eq 'DOCTYPE system identifier (single-quoted)') {
1461     if ($self->{next_input_character} == 0x0027) { # '
1462     $self->{state} = 'after DOCTYPE system identifier';
1463     !!!next-input-character;
1464     redo A;
1465     } elsif ($self->{next_input_character} == -1) {
1466     !!!parse-error (type => 'unclosed SYSTEM literal');
1467    
1468     $self->{state} = 'data';
1469     ## reconsume
1470    
1471     delete $self->{current_token}->{correct};
1472 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
1473     undef $self->{current_token};
1474    
1475     redo A;
1476     } else {
1477 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
1478     .= chr $self->{next_input_character};
1479     ## Stay in the state
1480     !!!next-input-character;
1481     redo A;
1482     }
1483     } elsif ($self->{state} eq 'after DOCTYPE system identifier') {
1484     if ({
1485     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1486     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1487     }->{$self->{next_input_character}}) {
1488     ## Stay in the state
1489     !!!next-input-character;
1490     redo A;
1491     } elsif ($self->{next_input_character} == 0x003E) { # >
1492     $self->{state} = 'data';
1493     !!!next-input-character;
1494    
1495     !!!emit ($self->{current_token}); # DOCTYPE
1496     undef $self->{current_token};
1497    
1498     redo A;
1499     } elsif ($self->{next_input_character} == -1) {
1500     !!!parse-error (type => 'unclosed DOCTYPE');
1501    
1502     $self->{state} = 'data';
1503     ## recomsume
1504    
1505     delete $self->{current_token}->{correct};
1506     !!!emit ($self->{current_token}); # DOCTYPE
1507     undef $self->{current_token};
1508    
1509     redo A;
1510     } else {
1511     !!!parse-error (type => 'string after SYSTEM literal');
1512 wakaba 1.1 $self->{state} = 'bogus DOCTYPE';
1513     !!!next-input-character;
1514     redo A;
1515     }
1516     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1517     if ($self->{next_input_character} == 0x003E) { # >
1518     $self->{state} = 'data';
1519     !!!next-input-character;
1520    
1521 wakaba 1.18 delete $self->{current_token}->{correct};
1522 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
1523     undef $self->{current_token};
1524    
1525     redo A;
1526     } elsif ($self->{next_input_character} == -1) {
1527 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1528 wakaba 1.1 $self->{state} = 'data';
1529     ## reconsume
1530    
1531 wakaba 1.18 delete $self->{current_token}->{correct};
1532 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
1533     undef $self->{current_token};
1534    
1535     redo A;
1536     } else {
1537     ## Stay in the state
1538     !!!next-input-character;
1539     redo A;
1540     }
1541     } else {
1542     die "$0: $self->{state}: Unknown state";
1543     }
1544     } # A
1545    
1546     die "$0: _get_next_token: unexpected case";
1547     } # _get_next_token
1548    
1549     sub _tokenize_attempt_to_consume_an_entity ($) {
1550     my $self = shift;
1551    
1552     if ($self->{next_input_character} == 0x0023) { # #
1553     !!!next-input-character;
1554     if ($self->{next_input_character} == 0x0078 or # x
1555     $self->{next_input_character} == 0x0058) { # X
1556 wakaba 1.4 my $num;
1557 wakaba 1.1 X: {
1558     my $x_char = $self->{next_input_character};
1559     !!!next-input-character;
1560     if (0x0030 <= $self->{next_input_character} and
1561     $self->{next_input_character} <= 0x0039) { # 0..9
1562     $num ||= 0;
1563     $num *= 0x10;
1564     $num += $self->{next_input_character} - 0x0030;
1565     redo X;
1566     } elsif (0x0061 <= $self->{next_input_character} and
1567     $self->{next_input_character} <= 0x0066) { # a..f
1568     ## ISSUE: the spec says U+0078, which is apparently incorrect
1569     $num ||= 0;
1570     $num *= 0x10;
1571     $num += $self->{next_input_character} - 0x0060 + 9;
1572     redo X;
1573     } elsif (0x0041 <= $self->{next_input_character} and
1574     $self->{next_input_character} <= 0x0046) { # A..F
1575     ## ISSUE: the spec says U+0058, which is apparently incorrect
1576     $num ||= 0;
1577     $num *= 0x10;
1578     $num += $self->{next_input_character} - 0x0040 + 9;
1579     redo X;
1580     } elsif (not defined $num) { # no hexadecimal digit
1581 wakaba 1.3 !!!parse-error (type => 'bare hcro');
1582 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
1583     !!!back-next-input-character ($x_char);
1584     return undef;
1585     } elsif ($self->{next_input_character} == 0x003B) { # ;
1586     !!!next-input-character;
1587     } else {
1588 wakaba 1.3 !!!parse-error (type => 'no refc');
1589 wakaba 1.1 }
1590    
1591     ## TODO: check the definition for |a valid Unicode character|.
1592 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
1593 wakaba 1.1 if ($num > 1114111 or $num == 0) {
1594     $num = 0xFFFD; # REPLACEMENT CHARACTER
1595     ## ISSUE: Why this is not an error?
1596 wakaba 1.4 } elsif (0x80 <= $num and $num <= 0x9F) {
1597 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $num);
1598 wakaba 1.4 $num = $c1_entity_char->{$num};
1599 wakaba 1.1 }
1600    
1601     return {type => 'character', data => chr $num};
1602     } # X
1603     } elsif (0x0030 <= $self->{next_input_character} and
1604     $self->{next_input_character} <= 0x0039) { # 0..9
1605     my $code = $self->{next_input_character} - 0x0030;
1606     !!!next-input-character;
1607    
1608     while (0x0030 <= $self->{next_input_character} and
1609     $self->{next_input_character} <= 0x0039) { # 0..9
1610     $code *= 10;
1611     $code += $self->{next_input_character} - 0x0030;
1612    
1613     !!!next-input-character;
1614     }
1615    
1616     if ($self->{next_input_character} == 0x003B) { # ;
1617     !!!next-input-character;
1618     } else {
1619 wakaba 1.3 !!!parse-error (type => 'no refc');
1620 wakaba 1.1 }
1621    
1622     ## TODO: check the definition for |a valid Unicode character|.
1623     if ($code > 1114111 or $code == 0) {
1624     $code = 0xFFFD; # REPLACEMENT CHARACTER
1625     ## ISSUE: Why this is not an error?
1626 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
1627 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
1628 wakaba 1.4 $code = $c1_entity_char->{$code};
1629 wakaba 1.1 }
1630    
1631     return {type => 'character', data => chr $code};
1632     } else {
1633 wakaba 1.3 !!!parse-error (type => 'bare nero');
1634 wakaba 1.1 !!!back-next-input-character ($self->{next_input_character});
1635     $self->{next_input_character} = 0x0023; # #
1636     return undef;
1637     }
1638     } elsif ((0x0041 <= $self->{next_input_character} and
1639     $self->{next_input_character} <= 0x005A) or
1640     (0x0061 <= $self->{next_input_character} and
1641     $self->{next_input_character} <= 0x007A)) {
1642     my $entity_name = chr $self->{next_input_character};
1643     !!!next-input-character;
1644    
1645     my $value = $entity_name;
1646     my $match;
1647 wakaba 1.16 require Whatpm::_NamedEntityList;
1648     our $EntityChar;
1649 wakaba 1.1
1650     while (length $entity_name < 10 and
1651     ## NOTE: Some number greater than the maximum length of entity name
1652 wakaba 1.16 ((0x0041 <= $self->{next_input_character} and # a
1653     $self->{next_input_character} <= 0x005A) or # x
1654     (0x0061 <= $self->{next_input_character} and # a
1655     $self->{next_input_character} <= 0x007A) or # z
1656     (0x0030 <= $self->{next_input_character} and # 0
1657     $self->{next_input_character} <= 0x0039) or # 9
1658     $self->{next_input_character} == 0x003B)) { # ;
1659 wakaba 1.1 $entity_name .= chr $self->{next_input_character};
1660 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
1661     $value = $EntityChar->{$entity_name};
1662     if ($self->{next_input_character} == 0x003B) { # ;
1663     $match = 1;
1664     !!!next-input-character;
1665     last;
1666     } else {
1667     $match = -1;
1668     }
1669 wakaba 1.1 } else {
1670     $value .= chr $self->{next_input_character};
1671     }
1672     !!!next-input-character;
1673     }
1674    
1675 wakaba 1.16 if ($match > 0) {
1676     return {type => 'character', data => $value};
1677     } elsif ($match < 0) {
1678     !!!parse-error (type => 'refc');
1679 wakaba 1.1 return {type => 'character', data => $value};
1680     } else {
1681 wakaba 1.3 !!!parse-error (type => 'bare ero');
1682 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
1683     !!!back-token ({type => 'character', data => $value});
1684     return undef;
1685     }
1686     } else {
1687     ## no characters are consumed
1688 wakaba 1.3 !!!parse-error (type => 'bare ero');
1689 wakaba 1.1 return undef;
1690     }
1691     } # _tokenize_attempt_to_consume_an_entity
1692    
1693     sub _initialize_tree_constructor ($) {
1694     my $self = shift;
1695     ## NOTE: $self->{document} MUST be specified before this method is called
1696     $self->{document}->strict_error_checking (0);
1697     ## TODO: Turn mutation events off # MUST
1698     ## TODO: Turn loose Document option (manakai extension) on
1699 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
1700 wakaba 1.1 } # _initialize_tree_constructor
1701    
1702     sub _terminate_tree_constructor ($) {
1703     my $self = shift;
1704     $self->{document}->strict_error_checking (1);
1705     ## TODO: Turn mutation events on
1706     } # _terminate_tree_constructor
1707    
1708     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1709    
1710 wakaba 1.3 { # tree construction stage
1711     my $token;
1712    
1713 wakaba 1.1 sub _construct_tree ($) {
1714     my ($self) = @_;
1715    
1716     ## When an interactive UA render the $self->{document} available
1717     ## to the user, or when it begin accepting user input, are
1718     ## not defined.
1719    
1720     ## Append a character: collect it and all subsequent consecutive
1721     ## characters and insert one Text node whose data is concatenation
1722     ## of all those characters. # MUST
1723    
1724     !!!next-token;
1725    
1726 wakaba 1.3 $self->{insertion_mode} = 'before head';
1727     undef $self->{form_element};
1728     undef $self->{head_element};
1729     $self->{open_elements} = [];
1730     undef $self->{inner_html_node};
1731    
1732     $self->_tree_construction_initial; # MUST
1733     $self->_tree_construction_root_element;
1734     $self->_tree_construction_main;
1735     } # _construct_tree
1736    
1737     sub _tree_construction_initial ($) {
1738     my $self = shift;
1739 wakaba 1.18 INITIAL: {
1740     if ($token->{type} eq 'DOCTYPE') {
1741     ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
1742     ## error, switch to a conformance checking mode for another
1743     ## language.
1744     my $doctype_name = $token->{name};
1745     $doctype_name = '' unless defined $doctype_name;
1746     $doctype_name =~ tr/a-z/A-Z/;
1747     if (not defined $token->{name} or # <!DOCTYPE>
1748     defined $token->{public_identifier} or
1749     defined $token->{system_identifier}) {
1750     !!!parse-error (type => 'not HTML5');
1751     } elsif ($doctype_name ne 'HTML') {
1752     ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
1753     !!!parse-error (type => 'not HTML5');
1754     }
1755    
1756     my $doctype = $self->{document}->create_document_type_definition
1757     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
1758     $doctype->public_id ($token->{public_identifier})
1759     if defined $token->{public_identifier};
1760     $doctype->system_id ($token->{system_identifier})
1761     if defined $token->{system_identifier};
1762     ## NOTE: Other DocumentType attributes are null or empty lists.
1763     ## ISSUE: internalSubset = null??
1764     $self->{document}->append_child ($doctype);
1765    
1766     if (not $token->{correct} or $doctype_name ne 'HTML') {
1767     $self->{document}->manakai_compat_mode ('quirks');
1768     } elsif (defined $token->{public_identifier}) {
1769     my $pubid = $token->{public_identifier};
1770     $pubid =~ tr/a-z/A-z/;
1771     if ({
1772     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
1773     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
1774     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
1775     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
1776     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
1777     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
1778     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
1779     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
1780     "-//IETF//DTD HTML 2.0//EN" => 1,
1781     "-//IETF//DTD HTML 2.1E//EN" => 1,
1782     "-//IETF//DTD HTML 3.0//EN" => 1,
1783     "-//IETF//DTD HTML 3.0//EN//" => 1,
1784     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
1785     "-//IETF//DTD HTML 3.2//EN" => 1,
1786     "-//IETF//DTD HTML 3//EN" => 1,
1787     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
1788     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
1789     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
1790     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
1791     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
1792     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
1793     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
1794     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
1795     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
1796     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
1797     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
1798     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
1799     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
1800     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
1801     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
1802     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
1803     "-//IETF//DTD HTML STRICT//EN" => 1,
1804     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
1805     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
1806     "-//IETF//DTD HTML//EN" => 1,
1807     "-//IETF//DTD HTML//EN//2.0" => 1,
1808     "-//IETF//DTD HTML//EN//3.0" => 1,
1809     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
1810     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
1811     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
1812     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
1813     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
1814     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
1815     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
1816     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
1817     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
1818     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
1819     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
1820     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
1821     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
1822     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
1823     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
1824     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
1825     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
1826     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
1827     "-//W3C//DTD HTML 3.2//EN" => 1,
1828     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
1829     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
1830     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
1831     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
1832     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
1833     "-//W3C//DTD W3 HTML//EN" => 1,
1834     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
1835     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
1836     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
1837     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
1838     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
1839     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
1840     "HTML" => 1,
1841     }->{$pubid}) {
1842     $self->{document}->manakai_compat_mode ('quirks');
1843     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
1844     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
1845     if (defined $token->{system_identifier}) {
1846     $self->{document}->manakai_compat_mode ('quirks');
1847     } else {
1848     $self->{document}->manakai_compat_mode ('limited quirks');
1849 wakaba 1.3 }
1850 wakaba 1.18 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 Frameset//EN" or
1851     $pubid eq "-//W3C//DTD XHTML 1.0 Transitional//EN") {
1852     $self->{document}->manakai_compat_mode ('limited quirks');
1853     }
1854     }
1855     if (defined $token->{system_identifier}) {
1856     my $sysid = $token->{system_identifier};
1857     $sysid =~ tr/A-Z/a-z/;
1858     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1859     $self->{document}->manakai_compat_mode ('quirks');
1860     }
1861     }
1862    
1863     ## Go to the root element phase.
1864     !!!next-token;
1865     return;
1866     } elsif ({
1867     'start tag' => 1,
1868     'end tag' => 1,
1869     'end-of-file' => 1,
1870     }->{$token->{type}}) {
1871     !!!parse-error (type => 'no DOCTYPE');
1872     $self->{document}->manakai_compat_mode ('quirks');
1873     ## Go to the root element phase
1874     ## reprocess
1875     return;
1876     } elsif ($token->{type} eq 'character') {
1877     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
1878     ## Ignore the token
1879     unless (length $token->{data}) {
1880     ## Stay in the phase
1881     !!!next-token;
1882     redo INITIAL;
1883 wakaba 1.3 }
1884     }
1885 wakaba 1.18
1886     !!!parse-error (type => 'no DOCTYPE');
1887     $self->{document}->manakai_compat_mode ('quirks');
1888     ## Go to the root element phase
1889     ## reprocess
1890     return;
1891     } elsif ($token->{type} eq 'comment') {
1892     my $comment = $self->{document}->create_comment ($token->{data});
1893     $self->{document}->append_child ($comment);
1894    
1895     ## Stay in the phase.
1896     !!!next-token;
1897     redo INITIAL;
1898     } else {
1899     die "$0: $token->{type}: Unknown token";
1900     }
1901     } # INITIAL
1902 wakaba 1.3 } # _tree_construction_initial
1903    
1904     sub _tree_construction_root_element ($) {
1905     my $self = shift;
1906    
1907     B: {
1908     if ($token->{type} eq 'DOCTYPE') {
1909     !!!parse-error (type => 'in html:#DOCTYPE');
1910     ## Ignore the token
1911     ## Stay in the phase
1912     !!!next-token;
1913     redo B;
1914     } elsif ($token->{type} eq 'comment') {
1915     my $comment = $self->{document}->create_comment ($token->{data});
1916     $self->{document}->append_child ($comment);
1917     ## Stay in the phase
1918     !!!next-token;
1919     redo B;
1920     } elsif ($token->{type} eq 'character') {
1921     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1922     $self->{document}->manakai_append_text ($1);
1923     ## ISSUE: DOM3 Core does not allow Document > Text
1924     unless (length $token->{data}) {
1925     ## Stay in the phase
1926     !!!next-token;
1927     redo B;
1928     }
1929     }
1930     #
1931     } elsif ({
1932     'start tag' => 1,
1933     'end tag' => 1,
1934     'end-of-file' => 1,
1935     }->{$token->{type}}) {
1936     ## ISSUE: There is an issue in the spec
1937     #
1938     } else {
1939     die "$0: $token->{type}: Unknown token";
1940     }
1941     my $root_element; !!!create-element ($root_element, 'html');
1942     $self->{document}->append_child ($root_element);
1943     push @{$self->{open_elements}}, [$root_element, 'html'];
1944     #$phase = 'main';
1945     ## reprocess
1946     #redo B;
1947     return;
1948     } # B
1949     } # _tree_construction_root_element
1950    
1951     sub _reset_insertion_mode ($) {
1952     my $self = shift;
1953    
1954     ## Step 1
1955     my $last;
1956    
1957     ## Step 2
1958     my $i = -1;
1959     my $node = $self->{open_elements}->[$i];
1960    
1961     ## Step 3
1962     S3: {
1963     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
1964     if (defined $self->{inner_html_node}) {
1965     if ($self->{inner_html_node}->[1] eq 'td' or
1966     $self->{inner_html_node}->[1] eq 'th') {
1967     #
1968     } else {
1969     $node = $self->{inner_html_node};
1970     }
1971     }
1972    
1973     ## Step 4..13
1974     my $new_mode = {
1975     select => 'in select',
1976     td => 'in cell',
1977     th => 'in cell',
1978     tr => 'in row',
1979     tbody => 'in table body',
1980     thead => 'in table head',
1981     tfoot => 'in table foot',
1982     caption => 'in caption',
1983     colgroup => 'in column group',
1984     table => 'in table',
1985     head => 'in body', # not in head!
1986     body => 'in body',
1987     frameset => 'in frameset',
1988     }->{$node->[1]};
1989     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1990    
1991     ## Step 14
1992     if ($node->[1] eq 'html') {
1993     unless (defined $self->{head_element}) {
1994     $self->{insertion_mode} = 'before head';
1995     } else {
1996     $self->{insertion_mode} = 'after head';
1997     }
1998     return;
1999     }
2000    
2001     ## Step 15
2002     $self->{insertion_mode} = 'in body' and return if $last;
2003    
2004     ## Step 16
2005     $i--;
2006     $node = $self->{open_elements}->[$i];
2007    
2008     ## Step 17
2009     redo S3;
2010     } # S3
2011     } # _reset_insertion_mode
2012    
2013     sub _tree_construction_main ($) {
2014     my $self = shift;
2015    
2016     my $phase = 'main';
2017 wakaba 1.1
2018     my $active_formatting_elements = [];
2019    
2020     my $reconstruct_active_formatting_elements = sub { # MUST
2021     my $insert = shift;
2022    
2023     ## Step 1
2024     return unless @$active_formatting_elements;
2025    
2026     ## Step 3
2027     my $i = -1;
2028     my $entry = $active_formatting_elements->[$i];
2029    
2030     ## Step 2
2031     return if $entry->[0] eq '#marker';
2032 wakaba 1.3 for (@{$self->{open_elements}}) {
2033 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2034     return;
2035     }
2036     }
2037    
2038     S4: {
2039     ## Step 4
2040     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2041    
2042     ## Step 5
2043     $i--;
2044     $entry = $active_formatting_elements->[$i];
2045    
2046     ## Step 6
2047     if ($entry->[0] eq '#marker') {
2048     #
2049     } else {
2050     my $in_open_elements;
2051 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
2052 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2053     $in_open_elements = 1;
2054     last OE;
2055     }
2056     }
2057     if ($in_open_elements) {
2058     #
2059     } else {
2060     redo S4;
2061     }
2062     }
2063    
2064     ## Step 7
2065     $i++;
2066     $entry = $active_formatting_elements->[$i];
2067     } # S4
2068    
2069     S7: {
2070     ## Step 8
2071     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2072    
2073     ## Step 9
2074     $insert->($clone->[0]);
2075 wakaba 1.3 push @{$self->{open_elements}}, $clone;
2076 wakaba 1.1
2077     ## Step 10
2078 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2079 wakaba 1.1
2080     ## Step 11
2081     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2082     ## Step 7'
2083     $i++;
2084     $entry = $active_formatting_elements->[$i];
2085    
2086     redo S7;
2087     }
2088     } # S7
2089     }; # $reconstruct_active_formatting_elements
2090    
2091     my $clear_up_to_marker = sub {
2092     for (reverse 0..$#$active_formatting_elements) {
2093     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2094     splice @$active_formatting_elements, $_;
2095     return;
2096     }
2097     }
2098     }; # $clear_up_to_marker
2099    
2100     my $style_start_tag = sub {
2101 wakaba 1.6 my $style_el; !!!create-element ($style_el, 'style', $token->{attributes});
2102 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
2103     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
2104     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2105 wakaba 1.1 ->append_child ($style_el);
2106     $self->{content_model_flag} = 'CDATA';
2107 wakaba 1.13 delete $self->{escape}; # MUST
2108 wakaba 1.1
2109     my $text = '';
2110     !!!next-token;
2111     while ($token->{type} eq 'character') {
2112     $text .= $token->{data};
2113     !!!next-token;
2114     } # stop if non-character token or tokenizer stops tokenising
2115     if (length $text) {
2116     $style_el->manakai_append_text ($text);
2117     }
2118    
2119     $self->{content_model_flag} = 'PCDATA';
2120    
2121     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
2122     ## Ignore the token
2123     } else {
2124 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2125 wakaba 1.1 ## ISSUE: And ignore?
2126     }
2127     !!!next-token;
2128     }; # $style_start_tag
2129    
2130     my $script_start_tag = sub {
2131     my $script_el;
2132     !!!create-element ($script_el, 'script', $token->{attributes});
2133     ## TODO: mark as "parser-inserted"
2134    
2135     $self->{content_model_flag} = 'CDATA';
2136 wakaba 1.13 delete $self->{escape}; # MUST
2137 wakaba 1.1
2138     my $text = '';
2139     !!!next-token;
2140     while ($token->{type} eq 'character') {
2141     $text .= $token->{data};
2142     !!!next-token;
2143     } # stop if non-character token or tokenizer stops tokenising
2144     if (length $text) {
2145     $script_el->manakai_append_text ($text);
2146     }
2147    
2148     $self->{content_model_flag} = 'PCDATA';
2149    
2150     if ($token->{type} eq 'end tag' and
2151     $token->{tag_name} eq 'script') {
2152     ## Ignore the token
2153     } else {
2154 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2155 wakaba 1.1 ## ISSUE: And ignore?
2156     ## TODO: mark as "already executed"
2157     }
2158    
2159 wakaba 1.3 if (defined $self->{inner_html_node}) {
2160     ## TODO: mark as "already executed"
2161     } else {
2162 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
2163     ## TODO: insertion point = just before the next input character
2164    
2165 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
2166     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
2167 wakaba 1.1
2168     ## TODO: insertion point = $old_insertion_point (might be "undefined")
2169    
2170     ## TODO: if there is a script that will execute as soon as the parser resume, then...
2171     }
2172    
2173     !!!next-token;
2174     }; # $script_start_tag
2175    
2176     my $formatting_end_tag = sub {
2177     my $tag_name = shift;
2178    
2179     FET: {
2180     ## Step 1
2181     my $formatting_element;
2182     my $formatting_element_i_in_active;
2183     AFE: for (reverse 0..$#$active_formatting_elements) {
2184     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2185     $formatting_element = $active_formatting_elements->[$_];
2186     $formatting_element_i_in_active = $_;
2187     last AFE;
2188     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2189     last AFE;
2190     }
2191     } # AFE
2192     unless (defined $formatting_element) {
2193 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
2194 wakaba 1.1 ## Ignore the token
2195     !!!next-token;
2196     return;
2197     }
2198     ## has an element in scope
2199     my $in_scope = 1;
2200     my $formatting_element_i_in_open;
2201 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2202     my $node = $self->{open_elements}->[$_];
2203 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
2204     if ($in_scope) {
2205     $formatting_element_i_in_open = $_;
2206     last INSCOPE;
2207     } else { # in open elements but not in scope
2208 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2209 wakaba 1.1 ## Ignore the token
2210     !!!next-token;
2211     return;
2212     }
2213     } elsif ({
2214     table => 1, caption => 1, td => 1, th => 1,
2215     button => 1, marquee => 1, object => 1, html => 1,
2216     }->{$node->[1]}) {
2217     $in_scope = 0;
2218     }
2219     } # INSCOPE
2220     unless (defined $formatting_element_i_in_open) {
2221 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2222 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
2223     !!!next-token; ## TODO: ok?
2224     return;
2225     }
2226 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
2227 wakaba 1.4 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2228 wakaba 1.1 }
2229    
2230     ## Step 2
2231     my $furthest_block;
2232     my $furthest_block_i_in_open;
2233 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2234     my $node = $self->{open_elements}->[$_];
2235 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
2236     #not $phrasing_category->{$node->[1]} and
2237     ($special_category->{$node->[1]} or
2238     $scoping_category->{$node->[1]})) {
2239     $furthest_block = $node;
2240     $furthest_block_i_in_open = $_;
2241     } elsif ($node->[0] eq $formatting_element->[0]) {
2242     last OE;
2243     }
2244     } # OE
2245    
2246     ## Step 3
2247     unless (defined $furthest_block) { # MUST
2248 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
2249 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
2250     !!!next-token;
2251     return;
2252     }
2253    
2254     ## Step 4
2255 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
2256 wakaba 1.1
2257     ## Step 5
2258     my $furthest_block_parent = $furthest_block->[0]->parent_node;
2259     if (defined $furthest_block_parent) {
2260     $furthest_block_parent->remove_child ($furthest_block->[0]);
2261     }
2262    
2263     ## Step 6
2264     my $bookmark_prev_el
2265     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
2266     ->[0];
2267    
2268     ## Step 7
2269     my $node = $furthest_block;
2270     my $node_i_in_open = $furthest_block_i_in_open;
2271     my $last_node = $furthest_block;
2272     S7: {
2273     ## Step 1
2274     $node_i_in_open--;
2275 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
2276 wakaba 1.1
2277     ## Step 2
2278     my $node_i_in_active;
2279     S7S2: {
2280     for (reverse 0..$#$active_formatting_elements) {
2281     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2282     $node_i_in_active = $_;
2283     last S7S2;
2284     }
2285     }
2286 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
2287 wakaba 1.1 redo S7;
2288     } # S7S2
2289    
2290     ## Step 3
2291     last S7 if $node->[0] eq $formatting_element->[0];
2292    
2293     ## Step 4
2294     if ($last_node->[0] eq $furthest_block->[0]) {
2295     $bookmark_prev_el = $node->[0];
2296     }
2297    
2298     ## Step 5
2299     if ($node->[0]->has_child_nodes ()) {
2300     my $clone = [$node->[0]->clone_node (0), $node->[1]];
2301     $active_formatting_elements->[$node_i_in_active] = $clone;
2302 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
2303 wakaba 1.1 $node = $clone;
2304     }
2305    
2306     ## Step 6
2307     $node->[0]->append_child ($last_node->[0]);
2308    
2309     ## Step 7
2310     $last_node = $node;
2311    
2312     ## Step 8
2313     redo S7;
2314     } # S7
2315    
2316     ## Step 8
2317     $common_ancestor_node->[0]->append_child ($last_node->[0]);
2318    
2319     ## Step 9
2320     my $clone = [$formatting_element->[0]->clone_node (0),
2321     $formatting_element->[1]];
2322    
2323     ## Step 10
2324     my @cn = @{$furthest_block->[0]->child_nodes};
2325     $clone->[0]->append_child ($_) for @cn;
2326    
2327     ## Step 11
2328     $furthest_block->[0]->append_child ($clone->[0]);
2329    
2330     ## Step 12
2331     my $i;
2332     AFE: for (reverse 0..$#$active_formatting_elements) {
2333     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
2334     splice @$active_formatting_elements, $_, 1;
2335     $i-- and last AFE if defined $i;
2336     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
2337     $i = $_;
2338     }
2339     } # AFE
2340     splice @$active_formatting_elements, $i + 1, 0, $clone;
2341    
2342     ## Step 13
2343     undef $i;
2344 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2345     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
2346     splice @{$self->{open_elements}}, $_, 1;
2347 wakaba 1.1 $i-- and last OE if defined $i;
2348 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
2349 wakaba 1.1 $i = $_;
2350     }
2351     } # OE
2352 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
2353 wakaba 1.1
2354     ## Step 14
2355     redo FET;
2356     } # FET
2357     }; # $formatting_end_tag
2358    
2359     my $insert_to_current = sub {
2360 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
2361 wakaba 1.1 }; # $insert_to_current
2362    
2363     my $insert_to_foster = sub {
2364     my $child = shift;
2365     if ({
2366     table => 1, tbody => 1, tfoot => 1,
2367     thead => 1, tr => 1,
2368 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2369 wakaba 1.1 # MUST
2370     my $foster_parent_element;
2371     my $next_sibling;
2372 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2373     if ($self->{open_elements}->[$_]->[1] eq 'table') {
2374     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
2375 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
2376     $foster_parent_element = $parent;
2377 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
2378 wakaba 1.1 } else {
2379     $foster_parent_element
2380 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
2381 wakaba 1.1 }
2382     last OE;
2383     }
2384     } # OE
2385 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
2386 wakaba 1.1 unless defined $foster_parent_element;
2387     $foster_parent_element->insert_before
2388     ($child, $next_sibling);
2389     } else {
2390 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
2391 wakaba 1.1 }
2392     }; # $insert_to_foster
2393    
2394     my $in_body = sub {
2395     my $insert = shift;
2396     if ($token->{type} eq 'start tag') {
2397     if ($token->{tag_name} eq 'script') {
2398     $script_start_tag->();
2399     return;
2400     } elsif ($token->{tag_name} eq 'style') {
2401     $style_start_tag->();
2402     return;
2403     } elsif ({
2404     base => 1, link => 1, meta => 1,
2405     }->{$token->{tag_name}}) {
2406 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2407 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
2408     my $el;
2409     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2410 wakaba 1.3 if (defined $self->{head_element}) {
2411     $self->{head_element}->append_child ($el);
2412 wakaba 1.1 } else {
2413     $insert->($el);
2414     }
2415    
2416     !!!next-token;
2417     return;
2418     } elsif ($token->{tag_name} eq 'title') {
2419 wakaba 1.3 !!!parse-error (type => 'in body:title');
2420 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
2421     my $title_el;
2422     !!!create-element ($title_el, 'title', $token->{attributes});
2423 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2424 wakaba 1.1 ->append_child ($title_el);
2425     $self->{content_model_flag} = 'RCDATA';
2426 wakaba 1.13 delete $self->{escape}; # MUST
2427 wakaba 1.1
2428     my $text = '';
2429     !!!next-token;
2430     while ($token->{type} eq 'character') {
2431     $text .= $token->{data};
2432     !!!next-token;
2433     }
2434     if (length $text) {
2435     $title_el->manakai_append_text ($text);
2436     }
2437    
2438     $self->{content_model_flag} = 'PCDATA';
2439    
2440     if ($token->{type} eq 'end tag' and
2441     $token->{tag_name} eq 'title') {
2442     ## Ignore the token
2443     } else {
2444 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2445 wakaba 1.1 ## ISSUE: And ignore?
2446     }
2447     !!!next-token;
2448     return;
2449     } elsif ($token->{tag_name} eq 'body') {
2450 wakaba 1.3 !!!parse-error (type => 'in body:body');
2451 wakaba 1.1
2452 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
2453     $self->{open_elements}->[1]->[1] ne 'body') {
2454 wakaba 1.1 ## Ignore the token
2455     } else {
2456 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
2457 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2458     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2459     $body_el->set_attribute_ns
2460     (undef, [undef, $attr_name],
2461     $token->{attributes}->{$attr_name}->{value});
2462     }
2463     }
2464     }
2465     !!!next-token;
2466     return;
2467     } elsif ({
2468     address => 1, blockquote => 1, center => 1, dir => 1,
2469     div => 1, dl => 1, fieldset => 1, listing => 1,
2470     menu => 1, ol => 1, p => 1, ul => 1,
2471     pre => 1,
2472     }->{$token->{tag_name}}) {
2473     ## has a p element in scope
2474 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2475 wakaba 1.1 if ($_->[1] eq 'p') {
2476     !!!back-token;
2477     $token = {type => 'end tag', tag_name => 'p'};
2478     return;
2479     } elsif ({
2480     table => 1, caption => 1, td => 1, th => 1,
2481     button => 1, marquee => 1, object => 1, html => 1,
2482     }->{$_->[1]}) {
2483     last INSCOPE;
2484     }
2485     } # INSCOPE
2486    
2487     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2488     if ($token->{tag_name} eq 'pre') {
2489     !!!next-token;
2490     if ($token->{type} eq 'character') {
2491     $token->{data} =~ s/^\x0A//;
2492     unless (length $token->{data}) {
2493     !!!next-token;
2494     }
2495     }
2496     } else {
2497     !!!next-token;
2498     }
2499     return;
2500     } elsif ($token->{tag_name} eq 'form') {
2501 wakaba 1.3 if (defined $self->{form_element}) {
2502     !!!parse-error (type => 'in form:form');
2503 wakaba 1.1 ## Ignore the token
2504 wakaba 1.7 !!!next-token;
2505     return;
2506 wakaba 1.1 } else {
2507     ## has a p element in scope
2508 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2509 wakaba 1.1 if ($_->[1] eq 'p') {
2510     !!!back-token;
2511     $token = {type => 'end tag', tag_name => 'p'};
2512     return;
2513     } elsif ({
2514     table => 1, caption => 1, td => 1, th => 1,
2515     button => 1, marquee => 1, object => 1, html => 1,
2516     }->{$_->[1]}) {
2517     last INSCOPE;
2518     }
2519     } # INSCOPE
2520    
2521     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2522 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2523 wakaba 1.1 !!!next-token;
2524     return;
2525     }
2526     } elsif ($token->{tag_name} eq 'li') {
2527     ## has a p element in scope
2528 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2529 wakaba 1.1 if ($_->[1] eq 'p') {
2530     !!!back-token;
2531     $token = {type => 'end tag', tag_name => 'p'};
2532     return;
2533     } elsif ({
2534     table => 1, caption => 1, td => 1, th => 1,
2535     button => 1, marquee => 1, object => 1, html => 1,
2536     }->{$_->[1]}) {
2537     last INSCOPE;
2538     }
2539     } # INSCOPE
2540    
2541     ## Step 1
2542     my $i = -1;
2543 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2544 wakaba 1.1 LI: {
2545     ## Step 2
2546     if ($node->[1] eq 'li') {
2547 wakaba 1.8 if ($i != -1) {
2548     !!!parse-error (type => 'end tag missing:'.
2549     $self->{open_elements}->[-1]->[1]);
2550     ## TODO: test
2551     }
2552 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2553 wakaba 1.1 last LI;
2554     }
2555    
2556     ## Step 3
2557     if (not $formatting_category->{$node->[1]} and
2558     #not $phrasing_category->{$node->[1]} and
2559     ($special_category->{$node->[1]} or
2560     $scoping_category->{$node->[1]}) and
2561     $node->[1] ne 'address' and $node->[1] ne 'div') {
2562     last LI;
2563     }
2564    
2565     ## Step 4
2566     $i--;
2567 wakaba 1.3 $node = $self->{open_elements}->[$i];
2568 wakaba 1.1 redo LI;
2569     } # LI
2570    
2571     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2572     !!!next-token;
2573     return;
2574     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2575     ## has a p element in scope
2576 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2577 wakaba 1.1 if ($_->[1] eq 'p') {
2578     !!!back-token;
2579     $token = {type => 'end tag', tag_name => 'p'};
2580     return;
2581     } elsif ({
2582     table => 1, caption => 1, td => 1, th => 1,
2583     button => 1, marquee => 1, object => 1, html => 1,
2584     }->{$_->[1]}) {
2585     last INSCOPE;
2586     }
2587     } # INSCOPE
2588    
2589     ## Step 1
2590     my $i = -1;
2591 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2592 wakaba 1.1 LI: {
2593     ## Step 2
2594     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2595 wakaba 1.8 if ($i != -1) {
2596     !!!parse-error (type => 'end tag missing:'.
2597     $self->{open_elements}->[-1]->[1]);
2598     ## TODO: test
2599     }
2600 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2601 wakaba 1.1 last LI;
2602     }
2603    
2604     ## Step 3
2605     if (not $formatting_category->{$node->[1]} and
2606     #not $phrasing_category->{$node->[1]} and
2607     ($special_category->{$node->[1]} or
2608     $scoping_category->{$node->[1]}) and
2609     $node->[1] ne 'address' and $node->[1] ne 'div') {
2610     last LI;
2611     }
2612    
2613     ## Step 4
2614     $i--;
2615 wakaba 1.3 $node = $self->{open_elements}->[$i];
2616 wakaba 1.1 redo LI;
2617     } # LI
2618    
2619     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2620     !!!next-token;
2621     return;
2622     } elsif ($token->{tag_name} eq 'plaintext') {
2623     ## has a p element in scope
2624 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2625 wakaba 1.1 if ($_->[1] eq 'p') {
2626     !!!back-token;
2627     $token = {type => 'end tag', tag_name => 'p'};
2628     return;
2629     } elsif ({
2630     table => 1, caption => 1, td => 1, th => 1,
2631     button => 1, marquee => 1, object => 1, html => 1,
2632     }->{$_->[1]}) {
2633     last INSCOPE;
2634     }
2635     } # INSCOPE
2636    
2637     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2638    
2639     $self->{content_model_flag} = 'PLAINTEXT';
2640    
2641     !!!next-token;
2642     return;
2643     } elsif ({
2644     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2645     }->{$token->{tag_name}}) {
2646     ## has a p element in scope
2647 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2648     my $node = $self->{open_elements}->[$_];
2649 wakaba 1.1 if ($node->[1] eq 'p') {
2650     !!!back-token;
2651     $token = {type => 'end tag', tag_name => 'p'};
2652     return;
2653     } elsif ({
2654     table => 1, caption => 1, td => 1, th => 1,
2655     button => 1, marquee => 1, object => 1, html => 1,
2656     }->{$node->[1]}) {
2657     last INSCOPE;
2658     }
2659     } # INSCOPE
2660    
2661     ## has an element in scope
2662     my $i;
2663 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2664     my $node = $self->{open_elements}->[$_];
2665 wakaba 1.1 if ({
2666     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2667     }->{$node->[1]}) {
2668     $i = $_;
2669     last INSCOPE;
2670     } elsif ({
2671     table => 1, caption => 1, td => 1, th => 1,
2672     button => 1, marquee => 1, object => 1, html => 1,
2673     }->{$node->[1]}) {
2674     last INSCOPE;
2675     }
2676     } # INSCOPE
2677    
2678     if (defined $i) {
2679 wakaba 1.3 !!!parse-error (type => 'in hn:hn');
2680     splice @{$self->{open_elements}}, $i;
2681 wakaba 1.1 }
2682    
2683     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2684    
2685     !!!next-token;
2686     return;
2687     } elsif ($token->{tag_name} eq 'a') {
2688     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2689     my $node = $active_formatting_elements->[$i];
2690     if ($node->[1] eq 'a') {
2691 wakaba 1.3 !!!parse-error (type => 'in a:a');
2692 wakaba 1.1
2693     !!!back-token;
2694     $token = {type => 'end tag', tag_name => 'a'};
2695     $formatting_end_tag->($token->{tag_name});
2696    
2697     AFE2: for (reverse 0..$#$active_formatting_elements) {
2698     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2699     splice @$active_formatting_elements, $_, 1;
2700     last AFE2;
2701     }
2702     } # AFE2
2703 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2704     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2705     splice @{$self->{open_elements}}, $_, 1;
2706 wakaba 1.1 last OE;
2707     }
2708     } # OE
2709     last AFE;
2710     } elsif ($node->[0] eq '#marker') {
2711     last AFE;
2712     }
2713     } # AFE
2714    
2715     $reconstruct_active_formatting_elements->($insert_to_current);
2716    
2717     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2718 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2719 wakaba 1.1
2720     !!!next-token;
2721     return;
2722     } elsif ({
2723     b => 1, big => 1, em => 1, font => 1, i => 1,
2724     nobr => 1, s => 1, small => 1, strile => 1,
2725     strong => 1, tt => 1, u => 1,
2726     }->{$token->{tag_name}}) {
2727     $reconstruct_active_formatting_elements->($insert_to_current);
2728    
2729     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2730 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2731 wakaba 1.1
2732     !!!next-token;
2733     return;
2734     } elsif ($token->{tag_name} eq 'button') {
2735     ## has a button element in scope
2736 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2737     my $node = $self->{open_elements}->[$_];
2738 wakaba 1.1 if ($node->[1] eq 'button') {
2739 wakaba 1.3 !!!parse-error (type => 'in button:button');
2740 wakaba 1.1 !!!back-token;
2741     $token = {type => 'end tag', tag_name => 'button'};
2742     return;
2743     } elsif ({
2744     table => 1, caption => 1, td => 1, th => 1,
2745     button => 1, marquee => 1, object => 1, html => 1,
2746     }->{$node->[1]}) {
2747     last INSCOPE;
2748     }
2749     } # INSCOPE
2750    
2751     $reconstruct_active_formatting_elements->($insert_to_current);
2752    
2753     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2754     push @$active_formatting_elements, ['#marker', ''];
2755    
2756     !!!next-token;
2757     return;
2758     } elsif ($token->{tag_name} eq 'marquee' or
2759     $token->{tag_name} eq 'object') {
2760     $reconstruct_active_formatting_elements->($insert_to_current);
2761    
2762     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2763     push @$active_formatting_elements, ['#marker', ''];
2764    
2765     !!!next-token;
2766     return;
2767     } elsif ($token->{tag_name} eq 'xmp') {
2768     $reconstruct_active_formatting_elements->($insert_to_current);
2769    
2770     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2771    
2772     $self->{content_model_flag} = 'CDATA';
2773 wakaba 1.13 delete $self->{escape}; # MUST
2774 wakaba 1.1
2775     !!!next-token;
2776     return;
2777     } elsif ($token->{tag_name} eq 'table') {
2778     ## has a p element in scope
2779 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2780 wakaba 1.1 if ($_->[1] eq 'p') {
2781     !!!back-token;
2782     $token = {type => 'end tag', tag_name => 'p'};
2783     return;
2784     } elsif ({
2785     table => 1, caption => 1, td => 1, th => 1,
2786     button => 1, marquee => 1, object => 1, html => 1,
2787     }->{$_->[1]}) {
2788     last INSCOPE;
2789     }
2790     } # INSCOPE
2791    
2792     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2793    
2794 wakaba 1.3 $self->{insertion_mode} = 'in table';
2795 wakaba 1.1
2796     !!!next-token;
2797     return;
2798     } elsif ({
2799     area => 1, basefont => 1, bgsound => 1, br => 1,
2800     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2801     image => 1,
2802     }->{$token->{tag_name}}) {
2803     if ($token->{tag_name} eq 'image') {
2804 wakaba 1.3 !!!parse-error (type => 'image');
2805 wakaba 1.1 $token->{tag_name} = 'img';
2806     }
2807    
2808     $reconstruct_active_formatting_elements->($insert_to_current);
2809    
2810     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2811 wakaba 1.3 pop @{$self->{open_elements}};
2812 wakaba 1.1
2813     !!!next-token;
2814     return;
2815     } elsif ($token->{tag_name} eq 'hr') {
2816     ## has a p element in scope
2817 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2818 wakaba 1.1 if ($_->[1] eq 'p') {
2819     !!!back-token;
2820     $token = {type => 'end tag', tag_name => 'p'};
2821     return;
2822     } elsif ({
2823     table => 1, caption => 1, td => 1, th => 1,
2824     button => 1, marquee => 1, object => 1, html => 1,
2825     }->{$_->[1]}) {
2826     last INSCOPE;
2827     }
2828     } # INSCOPE
2829    
2830     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2831 wakaba 1.3 pop @{$self->{open_elements}};
2832 wakaba 1.1
2833     !!!next-token;
2834     return;
2835     } elsif ($token->{tag_name} eq 'input') {
2836     $reconstruct_active_formatting_elements->($insert_to_current);
2837    
2838     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2839 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
2840     pop @{$self->{open_elements}};
2841 wakaba 1.1
2842     !!!next-token;
2843     return;
2844     } elsif ($token->{tag_name} eq 'isindex') {
2845 wakaba 1.3 !!!parse-error (type => 'isindex');
2846 wakaba 1.1
2847 wakaba 1.3 if (defined $self->{form_element}) {
2848 wakaba 1.1 ## Ignore the token
2849     !!!next-token;
2850     return;
2851     } else {
2852     my $at = $token->{attributes};
2853     $at->{name} = {name => 'name', value => 'isindex'};
2854     my @tokens = (
2855     {type => 'start tag', tag_name => 'form'},
2856     {type => 'start tag', tag_name => 'hr'},
2857     {type => 'start tag', tag_name => 'p'},
2858     {type => 'start tag', tag_name => 'label'},
2859     {type => 'character',
2860     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2861     ## TODO: make this configurable
2862     {type => 'start tag', tag_name => 'input', attributes => $at},
2863     #{type => 'character', data => ''}, # SHOULD
2864     {type => 'end tag', tag_name => 'label'},
2865     {type => 'end tag', tag_name => 'p'},
2866     {type => 'start tag', tag_name => 'hr'},
2867     {type => 'end tag', tag_name => 'form'},
2868     );
2869     $token = shift @tokens;
2870     !!!back-token (@tokens);
2871     return;
2872     }
2873     } elsif ({
2874     textarea => 1,
2875 wakaba 1.5 iframe => 1,
2876 wakaba 1.1 noembed => 1,
2877     noframes => 1,
2878     noscript => 0, ## TODO: 1 if scripting is enabled
2879     }->{$token->{tag_name}}) {
2880     my $tag_name = $token->{tag_name};
2881     my $el;
2882     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2883    
2884     if ($token->{tag_name} eq 'textarea') {
2885 wakaba 1.3 ## TODO: $self->{form_element} if defined
2886 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
2887     } else {
2888     $self->{content_model_flag} = 'CDATA';
2889     }
2890 wakaba 1.13 delete $self->{escape}; # MUST
2891 wakaba 1.1
2892     $insert->($el);
2893    
2894     my $text = '';
2895 wakaba 1.9 if ($token->{tag_name} eq 'textarea') {
2896     !!!next-token;
2897     if ($token->{type} eq 'character') {
2898     $token->{data} =~ s/^\x0A//;
2899     unless (length $token->{data}) {
2900     !!!next-token;
2901     }
2902     }
2903     } else {
2904     !!!next-token;
2905     }
2906 wakaba 1.1 while ($token->{type} eq 'character') {
2907     $text .= $token->{data};
2908     !!!next-token;
2909     }
2910     if (length $text) {
2911     $el->manakai_append_text ($text);
2912     }
2913    
2914     $self->{content_model_flag} = 'PCDATA';
2915    
2916     if ($token->{type} eq 'end tag' and
2917     $token->{tag_name} eq $tag_name) {
2918     ## Ignore the token
2919     } else {
2920 wakaba 1.10 if ($token->{tag_name} eq 'textarea') {
2921     !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2922     } else {
2923 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2924     }
2925 wakaba 1.1 ## ISSUE: And ignore?
2926     }
2927     !!!next-token;
2928     return;
2929     } elsif ($token->{tag_name} eq 'select') {
2930     $reconstruct_active_formatting_elements->($insert_to_current);
2931    
2932     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2933    
2934 wakaba 1.3 $self->{insertion_mode} = 'in select';
2935 wakaba 1.1 !!!next-token;
2936     return;
2937     } elsif ({
2938     caption => 1, col => 1, colgroup => 1, frame => 1,
2939     frameset => 1, head => 1, option => 1, optgroup => 1,
2940     tbody => 1, td => 1, tfoot => 1, th => 1,
2941     thead => 1, tr => 1,
2942     }->{$token->{tag_name}}) {
2943 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2944 wakaba 1.1 ## Ignore the token
2945     !!!next-token;
2946     return;
2947    
2948     ## ISSUE: An issue on HTML5 new elements in the spec.
2949     } else {
2950     $reconstruct_active_formatting_elements->($insert_to_current);
2951    
2952     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2953    
2954     !!!next-token;
2955     return;
2956     }
2957     } elsif ($token->{type} eq 'end tag') {
2958     if ($token->{tag_name} eq 'body') {
2959 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2960 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2961 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2962     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2963 wakaba 1.1 }
2964 wakaba 1.3 $self->{insertion_mode} = 'after body';
2965 wakaba 1.1 !!!next-token;
2966     return;
2967     } else {
2968 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2969 wakaba 1.1 ## Ignore the token
2970     !!!next-token;
2971     return;
2972     }
2973     } elsif ($token->{tag_name} eq 'html') {
2974 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2975 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2976 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2977     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
2978 wakaba 1.1 }
2979 wakaba 1.3 $self->{insertion_mode} = 'after body';
2980 wakaba 1.1 ## reprocess
2981     return;
2982     } else {
2983 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2984 wakaba 1.1 ## Ignore the token
2985     !!!next-token;
2986     return;
2987     }
2988     } elsif ({
2989     address => 1, blockquote => 1, center => 1, dir => 1,
2990     div => 1, dl => 1, fieldset => 1, listing => 1,
2991     menu => 1, ol => 1, pre => 1, ul => 1,
2992     p => 1,
2993     dd => 1, dt => 1, li => 1,
2994     button => 1, marquee => 1, object => 1,
2995     }->{$token->{tag_name}}) {
2996     ## has an element in scope
2997     my $i;
2998 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2999     my $node = $self->{open_elements}->[$_];
3000 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3001     ## generate implied end tags
3002     if ({
3003     dd => ($token->{tag_name} ne 'dd'),
3004     dt => ($token->{tag_name} ne 'dt'),
3005     li => ($token->{tag_name} ne 'li'),
3006     p => ($token->{tag_name} ne 'p'),
3007     td => 1, th => 1, tr => 1,
3008 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3009 wakaba 1.1 !!!back-token;
3010     $token = {type => 'end tag',
3011 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3012 wakaba 1.1 return;
3013     }
3014     $i = $_;
3015     last INSCOPE unless $token->{tag_name} eq 'p';
3016     } elsif ({
3017     table => 1, caption => 1, td => 1, th => 1,
3018     button => 1, marquee => 1, object => 1, html => 1,
3019     }->{$node->[1]}) {
3020     last INSCOPE;
3021     }
3022     } # INSCOPE
3023    
3024 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
3025     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3026 wakaba 1.1 }
3027    
3028 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
3029 wakaba 1.1 $clear_up_to_marker->()
3030     if {
3031     button => 1, marquee => 1, object => 1,
3032     }->{$token->{tag_name}};
3033     !!!next-token;
3034     return;
3035 wakaba 1.12 } elsif ($token->{tag_name} eq 'form') {
3036     ## has an element in scope
3037     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3038     my $node = $self->{open_elements}->[$_];
3039     if ($node->[1] eq $token->{tag_name}) {
3040     ## generate implied end tags
3041     if ({
3042     dd => 1, dt => 1, li => 1, p => 1,
3043     td => 1, th => 1, tr => 1,
3044     }->{$self->{open_elements}->[-1]->[1]}) {
3045     !!!back-token;
3046     $token = {type => 'end tag',
3047     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3048     return;
3049     }
3050     last INSCOPE;
3051     } elsif ({
3052     table => 1, caption => 1, td => 1, th => 1,
3053     button => 1, marquee => 1, object => 1, html => 1,
3054     }->{$node->[1]}) {
3055     last INSCOPE;
3056     }
3057     } # INSCOPE
3058    
3059     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
3060     pop @{$self->{open_elements}};
3061     } else {
3062     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3063     }
3064    
3065     undef $self->{form_element};
3066     !!!next-token;
3067     return;
3068 wakaba 1.1 } elsif ({
3069     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3070     }->{$token->{tag_name}}) {
3071     ## has an element in scope
3072     my $i;
3073 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3074     my $node = $self->{open_elements}->[$_];
3075 wakaba 1.1 if ({
3076     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3077     }->{$node->[1]}) {
3078     ## generate implied end tags
3079     if ({
3080     dd => 1, dt => 1, li => 1, p => 1,
3081     td => 1, th => 1, tr => 1,
3082 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3083 wakaba 1.1 !!!back-token;
3084     $token = {type => 'end tag',
3085 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3086 wakaba 1.1 return;
3087     }
3088     $i = $_;
3089     last INSCOPE;
3090     } elsif ({
3091     table => 1, caption => 1, td => 1, th => 1,
3092     button => 1, marquee => 1, object => 1, html => 1,
3093     }->{$node->[1]}) {
3094     last INSCOPE;
3095     }
3096     } # INSCOPE
3097    
3098 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
3099     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3100 wakaba 1.1 }
3101    
3102 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
3103 wakaba 1.1 !!!next-token;
3104     return;
3105     } elsif ({
3106     a => 1,
3107     b => 1, big => 1, em => 1, font => 1, i => 1,
3108     nobr => 1, s => 1, small => 1, strile => 1,
3109     strong => 1, tt => 1, u => 1,
3110     }->{$token->{tag_name}}) {
3111     $formatting_end_tag->($token->{tag_name});
3112 wakaba 1.8 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
3113 wakaba 1.1 return;
3114     } elsif ({
3115     caption => 1, col => 1, colgroup => 1, frame => 1,
3116     frameset => 1, head => 1, option => 1, optgroup => 1,
3117     tbody => 1, td => 1, tfoot => 1, th => 1,
3118     thead => 1, tr => 1,
3119     area => 1, basefont => 1, bgsound => 1, br => 1,
3120     embed => 1, hr => 1, iframe => 1, image => 1,
3121 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
3122 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
3123     table => 1, textarea => 1, wbr => 1,
3124     noscript => 0, ## TODO: if scripting is enabled
3125     }->{$token->{tag_name}}) {
3126 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3127 wakaba 1.1 ## Ignore the token
3128     !!!next-token;
3129     return;
3130    
3131     ## ISSUE: Issue on HTML5 new elements in spec
3132    
3133     } else {
3134     ## Step 1
3135     my $node_i = -1;
3136 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
3137 wakaba 1.1
3138     ## Step 2
3139     S2: {
3140     if ($node->[1] eq $token->{tag_name}) {
3141     ## Step 1
3142     ## generate implied end tags
3143     if ({
3144     dd => 1, dt => 1, li => 1, p => 1,
3145     td => 1, th => 1, tr => 1,
3146 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3147 wakaba 1.1 !!!back-token;
3148     $token = {type => 'end tag',
3149 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3150 wakaba 1.1 return;
3151     }
3152    
3153     ## Step 2
3154 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
3155     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3156 wakaba 1.1 }
3157    
3158     ## Step 3
3159 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
3160    
3161     !!!next-token;
3162 wakaba 1.1 last S2;
3163     } else {
3164     ## Step 3
3165     if (not $formatting_category->{$node->[1]} and
3166     #not $phrasing_category->{$node->[1]} and
3167     ($special_category->{$node->[1]} or
3168     $scoping_category->{$node->[1]})) {
3169 wakaba 1.3 !!!parse-error (type => 'not closed:'.$node->[1]);
3170 wakaba 1.1 ## Ignore the token
3171     !!!next-token;
3172     last S2;
3173     }
3174     }
3175    
3176     ## Step 4
3177     $node_i--;
3178 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
3179 wakaba 1.1
3180     ## Step 5;
3181     redo S2;
3182     } # S2
3183 wakaba 1.3 return;
3184 wakaba 1.1 }
3185     }
3186     }; # $in_body
3187    
3188     B: {
3189 wakaba 1.3 if ($phase eq 'main') {
3190 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
3191 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
3192 wakaba 1.1 ## Ignore the token
3193     ## Stay in the phase
3194     !!!next-token;
3195     redo B;
3196     } elsif ($token->{type} eq 'start tag' and
3197     $token->{tag_name} eq 'html') {
3198     ## TODO: unless it is the first start tag token, parse-error
3199 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
3200 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
3201     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3202     $top_el->set_attribute_ns
3203     (undef, [undef, $attr_name],
3204     $token->{attributes}->{$attr_name}->{value});
3205     }
3206     }
3207     !!!next-token;
3208     redo B;
3209     } elsif ($token->{type} eq 'end-of-file') {
3210     ## Generate implied end tags
3211     if ({
3212     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
3213 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3214 wakaba 1.1 !!!back-token;
3215 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
3216 wakaba 1.1 redo B;
3217     }
3218    
3219 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
3220     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
3221     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3222     } elsif (defined $self->{inner_html_node} and
3223     @{$self->{open_elements}} > 1 and
3224     $self->{open_elements}->[1]->[1] ne 'body') {
3225     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3226 wakaba 1.1 }
3227    
3228     ## Stop parsing
3229     last B;
3230    
3231     ## ISSUE: There is an issue in the spec.
3232     } else {
3233 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
3234 wakaba 1.1 if ($token->{type} eq 'character') {
3235     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3236 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3237 wakaba 1.1 unless (length $token->{data}) {
3238     !!!next-token;
3239     redo B;
3240     }
3241     }
3242     ## As if <head>
3243 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3244     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3245     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3246     $self->{insertion_mode} = 'in head';
3247 wakaba 1.1 ## reprocess
3248     redo B;
3249     } elsif ($token->{type} eq 'comment') {
3250     my $comment = $self->{document}->create_comment ($token->{data});
3251 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3252 wakaba 1.1 !!!next-token;
3253     redo B;
3254     } elsif ($token->{type} eq 'start tag') {
3255     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
3256 wakaba 1.3 !!!create-element ($self->{head_element}, 'head', $attr);
3257     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3258     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3259     $self->{insertion_mode} = 'in head';
3260 wakaba 1.1 if ($token->{tag_name} eq 'head') {
3261     !!!next-token;
3262     #} elsif ({
3263     # base => 1, link => 1, meta => 1,
3264     # script => 1, style => 1, title => 1,
3265     # }->{$token->{tag_name}}) {
3266     # ## reprocess
3267     } else {
3268     ## reprocess
3269     }
3270     redo B;
3271     } elsif ($token->{type} eq 'end tag') {
3272     if ($token->{tag_name} eq 'html') {
3273     ## As if <head>
3274 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3275     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3276     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3277     $self->{insertion_mode} = 'in head';
3278 wakaba 1.1 ## reprocess
3279     redo B;
3280     } else {
3281 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3282 wakaba 1.1 ## Ignore the token
3283     !!!next-token;
3284     redo B;
3285     }
3286     } else {
3287     die "$0: $token->{type}: Unknown type";
3288     }
3289 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
3290 wakaba 1.1 if ($token->{type} eq 'character') {
3291     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3292 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3293 wakaba 1.1 unless (length $token->{data}) {
3294     !!!next-token;
3295     redo B;
3296     }
3297     }
3298    
3299     #
3300     } elsif ($token->{type} eq 'comment') {
3301     my $comment = $self->{document}->create_comment ($token->{data});
3302 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3303 wakaba 1.1 !!!next-token;
3304     redo B;
3305     } elsif ($token->{type} eq 'start tag') {
3306     if ($token->{tag_name} eq 'title') {
3307     ## NOTE: There is an "as if in head" code clone
3308     my $title_el;
3309     !!!create-element ($title_el, 'title', $token->{attributes});
3310 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3311 wakaba 1.1 ->append_child ($title_el);
3312     $self->{content_model_flag} = 'RCDATA';
3313 wakaba 1.13 delete $self->{escape}; # MUST
3314 wakaba 1.1
3315     my $text = '';
3316     !!!next-token;
3317     while ($token->{type} eq 'character') {
3318     $text .= $token->{data};
3319     !!!next-token;
3320     }
3321     if (length $text) {
3322     $title_el->manakai_append_text ($text);
3323     }
3324    
3325     $self->{content_model_flag} = 'PCDATA';
3326    
3327     if ($token->{type} eq 'end tag' and
3328     $token->{tag_name} eq 'title') {
3329     ## Ignore the token
3330     } else {
3331 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
3332 wakaba 1.1 ## ISSUE: And ignore?
3333     }
3334     !!!next-token;
3335     redo B;
3336     } elsif ($token->{tag_name} eq 'style') {
3337     $style_start_tag->();
3338     redo B;
3339     } elsif ($token->{tag_name} eq 'script') {
3340     $script_start_tag->();
3341     redo B;
3342     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
3343     ## NOTE: There are "as if in head" code clones
3344     my $el;
3345     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
3346 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3347 wakaba 1.1 ->append_child ($el);
3348    
3349     !!!next-token;
3350     redo B;
3351     } elsif ($token->{tag_name} eq 'head') {
3352 wakaba 1.3 !!!parse-error (type => 'in head:head');
3353 wakaba 1.1 ## Ignore the token
3354     !!!next-token;
3355     redo B;
3356     } else {
3357     #
3358     }
3359     } elsif ($token->{type} eq 'end tag') {
3360     if ($token->{tag_name} eq 'head') {
3361 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3362     pop @{$self->{open_elements}};
3363 wakaba 1.1 } else {
3364 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:head');
3365 wakaba 1.1 }
3366 wakaba 1.3 $self->{insertion_mode} = 'after head';
3367 wakaba 1.1 !!!next-token;
3368     redo B;
3369     } elsif ($token->{tag_name} eq 'html') {
3370     #
3371     } else {
3372 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3373 wakaba 1.1 ## Ignore the token
3374     !!!next-token;
3375     redo B;
3376     }
3377     } else {
3378     #
3379     }
3380    
3381 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3382 wakaba 1.1 ## As if </head>
3383 wakaba 1.3 pop @{$self->{open_elements}};
3384 wakaba 1.1 }
3385 wakaba 1.3 $self->{insertion_mode} = 'after head';
3386 wakaba 1.1 ## reprocess
3387     redo B;
3388    
3389     ## ISSUE: An issue in the spec.
3390 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
3391 wakaba 1.1 if ($token->{type} eq 'character') {
3392     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3393 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3394 wakaba 1.1 unless (length $token->{data}) {
3395     !!!next-token;
3396     redo B;
3397     }
3398     }
3399    
3400     #
3401     } elsif ($token->{type} eq 'comment') {
3402     my $comment = $self->{document}->create_comment ($token->{data});
3403 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3404 wakaba 1.1 !!!next-token;
3405     redo B;
3406     } elsif ($token->{type} eq 'start tag') {
3407     if ($token->{tag_name} eq 'body') {
3408     !!!insert-element ('body', $token->{attributes});
3409 wakaba 1.3 $self->{insertion_mode} = 'in body';
3410 wakaba 1.1 !!!next-token;
3411     redo B;
3412     } elsif ($token->{tag_name} eq 'frameset') {
3413     !!!insert-element ('frameset', $token->{attributes});
3414 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
3415 wakaba 1.1 !!!next-token;
3416     redo B;
3417     } elsif ({
3418     base => 1, link => 1, meta => 1,
3419 wakaba 1.3 script => 1, style => 1, title => 1,
3420 wakaba 1.1 }->{$token->{tag_name}}) {
3421 wakaba 1.3 !!!parse-error (type => 'after head:'.$token->{tag_name});
3422     $self->{insertion_mode} = 'in head';
3423 wakaba 1.1 ## reprocess
3424     redo B;
3425     } else {
3426     #
3427     }
3428     } else {
3429     #
3430     }
3431    
3432     ## As if <body>
3433     !!!insert-element ('body');
3434 wakaba 1.3 $self->{insertion_mode} = 'in body';
3435 wakaba 1.1 ## reprocess
3436     redo B;
3437 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
3438 wakaba 1.1 if ($token->{type} eq 'character') {
3439     ## NOTE: There is a code clone of "character in body".
3440     $reconstruct_active_formatting_elements->($insert_to_current);
3441    
3442 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3443 wakaba 1.1
3444     !!!next-token;
3445     redo B;
3446     } elsif ($token->{type} eq 'comment') {
3447     ## NOTE: There is a code clone of "comment in body".
3448     my $comment = $self->{document}->create_comment ($token->{data});
3449 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3450 wakaba 1.1 !!!next-token;
3451     redo B;
3452     } else {
3453     $in_body->($insert_to_current);
3454     redo B;
3455     }
3456 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
3457 wakaba 1.1 if ($token->{type} eq 'character') {
3458     ## NOTE: There are "character in table" code clones.
3459     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3460 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3461 wakaba 1.1
3462     unless (length $token->{data}) {
3463     !!!next-token;
3464     redo B;
3465     }
3466     }
3467    
3468 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3469    
3470 wakaba 1.1 ## As if in body, but insert into foster parent element
3471     ## ISSUE: Spec says that "whenever a node would be inserted
3472     ## into the current node" while characters might not be
3473     ## result in a new Text node.
3474     $reconstruct_active_formatting_elements->($insert_to_foster);
3475    
3476     if ({
3477     table => 1, tbody => 1, tfoot => 1,
3478     thead => 1, tr => 1,
3479 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3480 wakaba 1.1 # MUST
3481     my $foster_parent_element;
3482     my $next_sibling;
3483     my $prev_sibling;
3484 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3485     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3486     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3487 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3488     $foster_parent_element = $parent;
3489 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3490 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3491     } else {
3492 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3493 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3494     }
3495     last OE;
3496     }
3497     } # OE
3498 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3499 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3500     unless defined $foster_parent_element;
3501     if (defined $prev_sibling and
3502     $prev_sibling->node_type == 3) {
3503     $prev_sibling->manakai_append_text ($token->{data});
3504     } else {
3505     $foster_parent_element->insert_before
3506     ($self->{document}->create_text_node ($token->{data}),
3507     $next_sibling);
3508     }
3509     } else {
3510 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3511 wakaba 1.1 }
3512    
3513     !!!next-token;
3514     redo B;
3515     } elsif ($token->{type} eq 'comment') {
3516     my $comment = $self->{document}->create_comment ($token->{data});
3517 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3518 wakaba 1.1 !!!next-token;
3519     redo B;
3520     } elsif ($token->{type} eq 'start tag') {
3521     if ({
3522     caption => 1,
3523     colgroup => 1,
3524     tbody => 1, tfoot => 1, thead => 1,
3525     }->{$token->{tag_name}}) {
3526     ## Clear back to table context
3527 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3528     $self->{open_elements}->[-1]->[1] ne 'html') {
3529     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3530     pop @{$self->{open_elements}};
3531 wakaba 1.1 }
3532    
3533     push @$active_formatting_elements, ['#marker', '']
3534     if $token->{tag_name} eq 'caption';
3535    
3536     !!!insert-element ($token->{tag_name}, $token->{attributes});
3537 wakaba 1.3 $self->{insertion_mode} = {
3538 wakaba 1.1 caption => 'in caption',
3539     colgroup => 'in column group',
3540     tbody => 'in table body',
3541     tfoot => 'in table body',
3542     thead => 'in table body',
3543     }->{$token->{tag_name}};
3544     !!!next-token;
3545     redo B;
3546     } elsif ({
3547     col => 1,
3548     td => 1, th => 1, tr => 1,
3549     }->{$token->{tag_name}}) {
3550     ## Clear back to table context
3551 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3552     $self->{open_elements}->[-1]->[1] ne 'html') {
3553     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3554     pop @{$self->{open_elements}};
3555 wakaba 1.1 }
3556    
3557     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3558 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3559 wakaba 1.1 ? 'in column group' : 'in table body';
3560     ## reprocess
3561     redo B;
3562     } elsif ($token->{tag_name} eq 'table') {
3563     ## NOTE: There are code clones for this "table in table"
3564 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3565 wakaba 1.1
3566     ## As if </table>
3567     ## have a table element in table scope
3568     my $i;
3569 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3570     my $node = $self->{open_elements}->[$_];
3571 wakaba 1.1 if ($node->[1] eq 'table') {
3572     $i = $_;
3573     last INSCOPE;
3574     } elsif ({
3575     table => 1, html => 1,
3576     }->{$node->[1]}) {
3577     last INSCOPE;
3578     }
3579     } # INSCOPE
3580     unless (defined $i) {
3581 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3582 wakaba 1.1 ## Ignore tokens </table><table>
3583     !!!next-token;
3584     redo B;
3585     }
3586    
3587     ## generate implied end tags
3588     if ({
3589     dd => 1, dt => 1, li => 1, p => 1,
3590     td => 1, th => 1, tr => 1,
3591 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3592 wakaba 1.1 !!!back-token; # <table>
3593     $token = {type => 'end tag', tag_name => 'table'};
3594     !!!back-token;
3595     $token = {type => 'end tag',
3596 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3597 wakaba 1.1 redo B;
3598     }
3599    
3600 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3601     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3602 wakaba 1.1 }
3603    
3604 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3605 wakaba 1.1
3606 wakaba 1.3 $self->_reset_insertion_mode;
3607 wakaba 1.1
3608     ## reprocess
3609     redo B;
3610     } else {
3611     #
3612     }
3613     } elsif ($token->{type} eq 'end tag') {
3614     if ($token->{tag_name} eq 'table') {
3615     ## have a table element in table scope
3616     my $i;
3617 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3618     my $node = $self->{open_elements}->[$_];
3619 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3620     $i = $_;
3621     last INSCOPE;
3622     } elsif ({
3623     table => 1, html => 1,
3624     }->{$node->[1]}) {
3625     last INSCOPE;
3626     }
3627     } # INSCOPE
3628     unless (defined $i) {
3629 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3630 wakaba 1.1 ## Ignore the token
3631     !!!next-token;
3632     redo B;
3633     }
3634    
3635     ## generate implied end tags
3636     if ({
3637     dd => 1, dt => 1, li => 1, p => 1,
3638     td => 1, th => 1, tr => 1,
3639 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3640 wakaba 1.1 !!!back-token;
3641     $token = {type => 'end tag',
3642 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3643 wakaba 1.1 redo B;
3644     }
3645    
3646 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3647     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3648 wakaba 1.1 }
3649    
3650 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3651 wakaba 1.1
3652 wakaba 1.3 $self->_reset_insertion_mode;
3653 wakaba 1.1
3654     !!!next-token;
3655     redo B;
3656     } elsif ({
3657     body => 1, caption => 1, col => 1, colgroup => 1,
3658     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3659     thead => 1, tr => 1,
3660     }->{$token->{tag_name}}) {
3661 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3662 wakaba 1.1 ## Ignore the token
3663     !!!next-token;
3664     redo B;
3665     } else {
3666     #
3667     }
3668     } else {
3669     #
3670     }
3671    
3672 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3673 wakaba 1.1 $in_body->($insert_to_foster);
3674     redo B;
3675 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
3676 wakaba 1.1 if ($token->{type} eq 'character') {
3677     ## NOTE: This is a code clone of "character in body".
3678     $reconstruct_active_formatting_elements->($insert_to_current);
3679    
3680 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3681 wakaba 1.1
3682     !!!next-token;
3683     redo B;
3684     } elsif ($token->{type} eq 'comment') {
3685     ## NOTE: This is a code clone of "comment in body".
3686     my $comment = $self->{document}->create_comment ($token->{data});
3687 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3688 wakaba 1.1 !!!next-token;
3689     redo B;
3690     } elsif ($token->{type} eq 'start tag') {
3691     if ({
3692     caption => 1, col => 1, colgroup => 1, tbody => 1,
3693     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3694     }->{$token->{tag_name}}) {
3695 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3696 wakaba 1.1
3697     ## As if </caption>
3698     ## have a table element in table scope
3699     my $i;
3700 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3701     my $node = $self->{open_elements}->[$_];
3702 wakaba 1.1 if ($node->[1] eq 'caption') {
3703     $i = $_;
3704     last INSCOPE;
3705     } elsif ({
3706     table => 1, html => 1,
3707     }->{$node->[1]}) {
3708     last INSCOPE;
3709     }
3710     } # INSCOPE
3711     unless (defined $i) {
3712 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3713 wakaba 1.1 ## Ignore the token
3714     !!!next-token;
3715     redo B;
3716     }
3717    
3718     ## generate implied end tags
3719     if ({
3720     dd => 1, dt => 1, li => 1, p => 1,
3721     td => 1, th => 1, tr => 1,
3722 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3723 wakaba 1.1 !!!back-token; # <?>
3724     $token = {type => 'end tag', tag_name => 'caption'};
3725     !!!back-token;
3726     $token = {type => 'end tag',
3727 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3728 wakaba 1.1 redo B;
3729     }
3730    
3731 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3732     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3733 wakaba 1.1 }
3734    
3735 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3736 wakaba 1.1
3737     $clear_up_to_marker->();
3738    
3739 wakaba 1.3 $self->{insertion_mode} = 'in table';
3740 wakaba 1.1
3741     ## reprocess
3742     redo B;
3743     } else {
3744     #
3745     }
3746     } elsif ($token->{type} eq 'end tag') {
3747     if ($token->{tag_name} eq 'caption') {
3748     ## have a table element in table scope
3749     my $i;
3750 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3751     my $node = $self->{open_elements}->[$_];
3752 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3753     $i = $_;
3754     last INSCOPE;
3755     } elsif ({
3756     table => 1, html => 1,
3757     }->{$node->[1]}) {
3758     last INSCOPE;
3759     }
3760     } # INSCOPE
3761     unless (defined $i) {
3762 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3763 wakaba 1.1 ## Ignore the token
3764     !!!next-token;
3765     redo B;
3766     }
3767    
3768     ## generate implied end tags
3769     if ({
3770     dd => 1, dt => 1, li => 1, p => 1,
3771     td => 1, th => 1, tr => 1,
3772 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3773 wakaba 1.1 !!!back-token;
3774     $token = {type => 'end tag',
3775 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3776 wakaba 1.1 redo B;
3777     }
3778    
3779 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3780     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3781 wakaba 1.1 }
3782    
3783 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3784 wakaba 1.1
3785     $clear_up_to_marker->();
3786    
3787 wakaba 1.3 $self->{insertion_mode} = 'in table';
3788 wakaba 1.1
3789     !!!next-token;
3790     redo B;
3791     } elsif ($token->{tag_name} eq 'table') {
3792 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3793 wakaba 1.1
3794     ## As if </caption>
3795     ## have a table element in table scope
3796     my $i;
3797 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3798     my $node = $self->{open_elements}->[$_];
3799 wakaba 1.1 if ($node->[1] eq 'caption') {
3800     $i = $_;
3801     last INSCOPE;
3802     } elsif ({
3803     table => 1, html => 1,
3804     }->{$node->[1]}) {
3805     last INSCOPE;
3806     }
3807     } # INSCOPE
3808     unless (defined $i) {
3809 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3810 wakaba 1.1 ## Ignore the token
3811     !!!next-token;
3812     redo B;
3813     }
3814    
3815     ## generate implied end tags
3816     if ({
3817     dd => 1, dt => 1, li => 1, p => 1,
3818     td => 1, th => 1, tr => 1,
3819 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3820 wakaba 1.1 !!!back-token; # </table>
3821     $token = {type => 'end tag', tag_name => 'caption'};
3822     !!!back-token;
3823     $token = {type => 'end tag',
3824 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3825 wakaba 1.1 redo B;
3826     }
3827    
3828 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3829     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3830 wakaba 1.1 }
3831    
3832 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3833 wakaba 1.1
3834     $clear_up_to_marker->();
3835    
3836 wakaba 1.3 $self->{insertion_mode} = 'in table';
3837 wakaba 1.1
3838     ## reprocess
3839     redo B;
3840     } elsif ({
3841     body => 1, col => 1, colgroup => 1,
3842     html => 1, tbody => 1, td => 1, tfoot => 1,
3843     th => 1, thead => 1, tr => 1,
3844     }->{$token->{tag_name}}) {
3845 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3846 wakaba 1.1 ## Ignore the token
3847     redo B;
3848     } else {
3849     #
3850     }
3851     } else {
3852     #
3853     }
3854    
3855     $in_body->($insert_to_current);
3856     redo B;
3857 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
3858 wakaba 1.1 if ($token->{type} eq 'character') {
3859     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3860 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3861 wakaba 1.1 unless (length $token->{data}) {
3862     !!!next-token;
3863     redo B;
3864     }
3865     }
3866    
3867     #
3868     } elsif ($token->{type} eq 'comment') {
3869     my $comment = $self->{document}->create_comment ($token->{data});
3870 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3871 wakaba 1.1 !!!next-token;
3872     redo B;
3873     } elsif ($token->{type} eq 'start tag') {
3874     if ($token->{tag_name} eq 'col') {
3875     !!!insert-element ($token->{tag_name}, $token->{attributes});
3876 wakaba 1.3 pop @{$self->{open_elements}};
3877 wakaba 1.1 !!!next-token;
3878     redo B;
3879     } else {
3880     #
3881     }
3882     } elsif ($token->{type} eq 'end tag') {
3883     if ($token->{tag_name} eq 'colgroup') {
3884 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3885     !!!parse-error (type => 'unmatched end tag:colgroup');
3886 wakaba 1.1 ## Ignore the token
3887     !!!next-token;
3888     redo B;
3889     } else {
3890 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3891     $self->{insertion_mode} = 'in table';
3892 wakaba 1.1 !!!next-token;
3893     redo B;
3894     }
3895     } elsif ($token->{tag_name} eq 'col') {
3896 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:col');
3897 wakaba 1.1 ## Ignore the token
3898     !!!next-token;
3899     redo B;
3900     } else {
3901     #
3902     }
3903     } else {
3904     #
3905     }
3906    
3907     ## As if </colgroup>
3908 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3909     !!!parse-error (type => 'unmatched end tag:colgroup');
3910 wakaba 1.1 ## Ignore the token
3911     !!!next-token;
3912     redo B;
3913     } else {
3914 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3915     $self->{insertion_mode} = 'in table';
3916 wakaba 1.1 ## reprocess
3917     redo B;
3918     }
3919 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
3920 wakaba 1.1 if ($token->{type} eq 'character') {
3921     ## NOTE: This is a "character in table" code clone.
3922     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3923 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3924 wakaba 1.1
3925     unless (length $token->{data}) {
3926     !!!next-token;
3927     redo B;
3928     }
3929     }
3930    
3931 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3932    
3933 wakaba 1.1 ## As if in body, but insert into foster parent element
3934     ## ISSUE: Spec says that "whenever a node would be inserted
3935     ## into the current node" while characters might not be
3936     ## result in a new Text node.
3937     $reconstruct_active_formatting_elements->($insert_to_foster);
3938    
3939     if ({
3940     table => 1, tbody => 1, tfoot => 1,
3941     thead => 1, tr => 1,
3942 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3943 wakaba 1.1 # MUST
3944     my $foster_parent_element;
3945     my $next_sibling;
3946     my $prev_sibling;
3947 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3948     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3949     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3950 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3951     $foster_parent_element = $parent;
3952 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3953 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3954     } else {
3955 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3956 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3957     }
3958     last OE;
3959     }
3960     } # OE
3961 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3962 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3963     unless defined $foster_parent_element;
3964     if (defined $prev_sibling and
3965     $prev_sibling->node_type == 3) {
3966     $prev_sibling->manakai_append_text ($token->{data});
3967     } else {
3968     $foster_parent_element->insert_before
3969     ($self->{document}->create_text_node ($token->{data}),
3970     $next_sibling);
3971     }
3972     } else {
3973 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3974 wakaba 1.1 }
3975    
3976     !!!next-token;
3977     redo B;
3978     } elsif ($token->{type} eq 'comment') {
3979     ## Copied from 'in table'
3980     my $comment = $self->{document}->create_comment ($token->{data});
3981 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3982 wakaba 1.1 !!!next-token;
3983     redo B;
3984     } elsif ($token->{type} eq 'start tag') {
3985     if ({
3986     tr => 1,
3987     th => 1, td => 1,
3988     }->{$token->{tag_name}}) {
3989 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
3990     !!!parse-error (type => 'missing start tag:tr');
3991     }
3992    
3993 wakaba 1.1 ## Clear back to table body context
3994     while (not {
3995     tbody => 1, tfoot => 1, thead => 1, html => 1,
3996 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3997     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3998     pop @{$self->{open_elements}};
3999 wakaba 1.1 }
4000    
4001 wakaba 1.3 $self->{insertion_mode} = 'in row';
4002 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
4003     !!!insert-element ($token->{tag_name}, $token->{attributes});
4004     !!!next-token;
4005     } else {
4006     !!!insert-element ('tr');
4007     ## reprocess
4008     }
4009     redo B;
4010     } elsif ({
4011     caption => 1, col => 1, colgroup => 1,
4012     tbody => 1, tfoot => 1, thead => 1,
4013     }->{$token->{tag_name}}) {
4014     ## have an element in table scope
4015     my $i;
4016 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4017     my $node = $self->{open_elements}->[$_];
4018 wakaba 1.1 if ({
4019     tbody => 1, thead => 1, tfoot => 1,
4020     }->{$node->[1]}) {
4021     $i = $_;
4022     last INSCOPE;
4023     } elsif ({
4024     table => 1, html => 1,
4025     }->{$node->[1]}) {
4026     last INSCOPE;
4027     }
4028     } # INSCOPE
4029     unless (defined $i) {
4030 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4031 wakaba 1.1 ## Ignore the token
4032     !!!next-token;
4033     redo B;
4034     }
4035    
4036     ## Clear back to table body context
4037     while (not {
4038     tbody => 1, tfoot => 1, thead => 1, html => 1,
4039 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4040     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4041     pop @{$self->{open_elements}};
4042 wakaba 1.1 }
4043    
4044     ## As if <{current node}>
4045     ## have an element in table scope
4046     ## true by definition
4047    
4048     ## Clear back to table body context
4049     ## nop by definition
4050    
4051 wakaba 1.3 pop @{$self->{open_elements}};
4052     $self->{insertion_mode} = 'in table';
4053 wakaba 1.1 ## reprocess
4054     redo B;
4055     } elsif ($token->{tag_name} eq 'table') {
4056     ## NOTE: This is a code clone of "table in table"
4057 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4058 wakaba 1.1
4059     ## As if </table>
4060     ## have a table element in table scope
4061     my $i;
4062 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4063     my $node = $self->{open_elements}->[$_];
4064 wakaba 1.1 if ($node->[1] eq 'table') {
4065     $i = $_;
4066     last INSCOPE;
4067     } elsif ({
4068     table => 1, html => 1,
4069     }->{$node->[1]}) {
4070     last INSCOPE;
4071     }
4072     } # INSCOPE
4073     unless (defined $i) {
4074 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4075 wakaba 1.1 ## Ignore tokens </table><table>
4076     !!!next-token;
4077     redo B;
4078     }
4079    
4080     ## generate implied end tags
4081     if ({
4082     dd => 1, dt => 1, li => 1, p => 1,
4083     td => 1, th => 1, tr => 1,
4084 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4085 wakaba 1.1 !!!back-token; # <table>
4086     $token = {type => 'end tag', tag_name => 'table'};
4087     !!!back-token;
4088     $token = {type => 'end tag',
4089 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4090 wakaba 1.1 redo B;
4091     }
4092    
4093 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4094     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4095 wakaba 1.1 }
4096    
4097 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4098 wakaba 1.1
4099 wakaba 1.3 $self->_reset_insertion_mode;
4100 wakaba 1.1
4101     ## reprocess
4102     redo B;
4103     } else {
4104     #
4105     }
4106     } elsif ($token->{type} eq 'end tag') {
4107     if ({
4108     tbody => 1, tfoot => 1, thead => 1,
4109     }->{$token->{tag_name}}) {
4110     ## have an element in table scope
4111     my $i;
4112 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4113     my $node = $self->{open_elements}->[$_];
4114 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4115     $i = $_;
4116     last INSCOPE;
4117     } elsif ({
4118     table => 1, html => 1,
4119     }->{$node->[1]}) {
4120     last INSCOPE;
4121     }
4122     } # INSCOPE
4123     unless (defined $i) {
4124 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4125 wakaba 1.1 ## Ignore the token
4126     !!!next-token;
4127     redo B;
4128     }
4129    
4130     ## Clear back to table body context
4131     while (not {
4132     tbody => 1, tfoot => 1, thead => 1, html => 1,
4133 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4134     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4135     pop @{$self->{open_elements}};
4136 wakaba 1.1 }
4137    
4138 wakaba 1.3 pop @{$self->{open_elements}};
4139     $self->{insertion_mode} = 'in table';
4140 wakaba 1.1 !!!next-token;
4141     redo B;
4142     } elsif ($token->{tag_name} eq 'table') {
4143     ## have an element in table scope
4144     my $i;
4145 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4146     my $node = $self->{open_elements}->[$_];
4147 wakaba 1.1 if ({
4148     tbody => 1, thead => 1, tfoot => 1,
4149     }->{$node->[1]}) {
4150     $i = $_;
4151     last INSCOPE;
4152     } elsif ({
4153     table => 1, html => 1,
4154     }->{$node->[1]}) {
4155     last INSCOPE;
4156     }
4157     } # INSCOPE
4158     unless (defined $i) {
4159 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4160 wakaba 1.1 ## Ignore the token
4161     !!!next-token;
4162     redo B;
4163     }
4164    
4165     ## Clear back to table body context
4166     while (not {
4167     tbody => 1, tfoot => 1, thead => 1, html => 1,
4168 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4169     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4170     pop @{$self->{open_elements}};
4171 wakaba 1.1 }
4172    
4173     ## As if <{current node}>
4174     ## have an element in table scope
4175     ## true by definition
4176    
4177     ## Clear back to table body context
4178     ## nop by definition
4179    
4180 wakaba 1.3 pop @{$self->{open_elements}};
4181     $self->{insertion_mode} = 'in table';
4182 wakaba 1.1 ## reprocess
4183     redo B;
4184     } elsif ({
4185     body => 1, caption => 1, col => 1, colgroup => 1,
4186     html => 1, td => 1, th => 1, tr => 1,
4187     }->{$token->{tag_name}}) {
4188 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4189 wakaba 1.1 ## Ignore the token
4190     !!!next-token;
4191     redo B;
4192     } else {
4193     #
4194     }
4195     } else {
4196     #
4197     }
4198    
4199     ## As if in table
4200 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4201 wakaba 1.1 $in_body->($insert_to_foster);
4202     redo B;
4203 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
4204 wakaba 1.1 if ($token->{type} eq 'character') {
4205     ## NOTE: This is a "character in table" code clone.
4206     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4207 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4208 wakaba 1.1
4209     unless (length $token->{data}) {
4210     !!!next-token;
4211     redo B;
4212     }
4213     }
4214    
4215 wakaba 1.3 !!!parse-error (type => 'in table:#character');
4216    
4217 wakaba 1.1 ## As if in body, but insert into foster parent element
4218     ## ISSUE: Spec says that "whenever a node would be inserted
4219     ## into the current node" while characters might not be
4220     ## result in a new Text node.
4221     $reconstruct_active_formatting_elements->($insert_to_foster);
4222    
4223     if ({
4224     table => 1, tbody => 1, tfoot => 1,
4225     thead => 1, tr => 1,
4226 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4227 wakaba 1.1 # MUST
4228     my $foster_parent_element;
4229     my $next_sibling;
4230     my $prev_sibling;
4231 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4232     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4233     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4234 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4235     $foster_parent_element = $parent;
4236 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4237 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4238     } else {
4239 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4240 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4241     }
4242     last OE;
4243     }
4244     } # OE
4245 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4246 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4247     unless defined $foster_parent_element;
4248     if (defined $prev_sibling and
4249     $prev_sibling->node_type == 3) {
4250     $prev_sibling->manakai_append_text ($token->{data});
4251     } else {
4252     $foster_parent_element->insert_before
4253     ($self->{document}->create_text_node ($token->{data}),
4254     $next_sibling);
4255     }
4256     } else {
4257 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4258 wakaba 1.1 }
4259    
4260     !!!next-token;
4261     redo B;
4262     } elsif ($token->{type} eq 'comment') {
4263     ## Copied from 'in table'
4264     my $comment = $self->{document}->create_comment ($token->{data});
4265 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4266 wakaba 1.1 !!!next-token;
4267     redo B;
4268     } elsif ($token->{type} eq 'start tag') {
4269     if ($token->{tag_name} eq 'th' or
4270     $token->{tag_name} eq 'td') {
4271     ## Clear back to table row context
4272     while (not {
4273     tr => 1, html => 1,
4274 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4275     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4276     pop @{$self->{open_elements}};
4277 wakaba 1.1 }
4278    
4279     !!!insert-element ($token->{tag_name}, $token->{attributes});
4280 wakaba 1.3 $self->{insertion_mode} = 'in cell';
4281 wakaba 1.1
4282     push @$active_formatting_elements, ['#marker', ''];
4283    
4284     !!!next-token;
4285     redo B;
4286     } elsif ({
4287     caption => 1, col => 1, colgroup => 1,
4288     tbody => 1, tfoot => 1, thead => 1, tr => 1,
4289     }->{$token->{tag_name}}) {
4290     ## As if </tr>
4291     ## have an element in table scope
4292     my $i;
4293 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4294     my $node = $self->{open_elements}->[$_];
4295 wakaba 1.1 if ($node->[1] eq 'tr') {
4296     $i = $_;
4297     last INSCOPE;
4298     } elsif ({
4299     table => 1, html => 1,
4300     }->{$node->[1]}) {
4301     last INSCOPE;
4302     }
4303     } # INSCOPE
4304     unless (defined $i) {
4305 wakaba 1.3 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4306 wakaba 1.1 ## Ignore the token
4307     !!!next-token;
4308     redo B;
4309     }
4310    
4311     ## Clear back to table row context
4312     while (not {
4313     tr => 1, html => 1,
4314 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4315     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4316     pop @{$self->{open_elements}};
4317 wakaba 1.1 }
4318    
4319 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4320     $self->{insertion_mode} = 'in table body';
4321 wakaba 1.1 ## reprocess
4322     redo B;
4323     } elsif ($token->{tag_name} eq 'table') {
4324     ## NOTE: This is a code clone of "table in table"
4325 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4326 wakaba 1.1
4327     ## As if </table>
4328     ## have a table element in table scope
4329     my $i;
4330 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4331     my $node = $self->{open_elements}->[$_];
4332 wakaba 1.1 if ($node->[1] eq 'table') {
4333     $i = $_;
4334     last INSCOPE;
4335     } elsif ({
4336     table => 1, html => 1,
4337     }->{$node->[1]}) {
4338     last INSCOPE;
4339     }
4340     } # INSCOPE
4341     unless (defined $i) {
4342 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4343 wakaba 1.1 ## Ignore tokens </table><table>
4344     !!!next-token;
4345     redo B;
4346     }
4347    
4348     ## generate implied end tags
4349     if ({
4350     dd => 1, dt => 1, li => 1, p => 1,
4351     td => 1, th => 1, tr => 1,
4352 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4353 wakaba 1.1 !!!back-token; # <table>
4354     $token = {type => 'end tag', tag_name => 'table'};
4355     !!!back-token;
4356     $token = {type => 'end tag',
4357 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4358 wakaba 1.1 redo B;
4359     }
4360    
4361 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4362     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4363 wakaba 1.1 }
4364    
4365 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4366 wakaba 1.1
4367 wakaba 1.3 $self->_reset_insertion_mode;
4368 wakaba 1.1
4369     ## reprocess
4370     redo B;
4371     } else {
4372     #
4373     }
4374     } elsif ($token->{type} eq 'end tag') {
4375     if ($token->{tag_name} eq 'tr') {
4376     ## have an element in table scope
4377     my $i;
4378 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4379     my $node = $self->{open_elements}->[$_];
4380 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4381     $i = $_;
4382     last INSCOPE;
4383     } elsif ({
4384     table => 1, html => 1,
4385     }->{$node->[1]}) {
4386     last INSCOPE;
4387     }
4388     } # INSCOPE
4389     unless (defined $i) {
4390 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4391 wakaba 1.1 ## Ignore the token
4392     !!!next-token;
4393     redo B;
4394     }
4395    
4396     ## Clear back to table row context
4397     while (not {
4398     tr => 1, html => 1,
4399 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4400     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4401     pop @{$self->{open_elements}};
4402 wakaba 1.1 }
4403    
4404 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4405     $self->{insertion_mode} = 'in table body';
4406 wakaba 1.1 !!!next-token;
4407     redo B;
4408     } elsif ($token->{tag_name} eq 'table') {
4409     ## As if </tr>
4410     ## have an element in table scope
4411     my $i;
4412 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4413     my $node = $self->{open_elements}->[$_];
4414 wakaba 1.1 if ($node->[1] eq 'tr') {
4415     $i = $_;
4416     last INSCOPE;
4417     } elsif ({
4418     table => 1, html => 1,
4419     }->{$node->[1]}) {
4420     last INSCOPE;
4421     }
4422     } # INSCOPE
4423     unless (defined $i) {
4424 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4425 wakaba 1.1 ## Ignore the token
4426     !!!next-token;
4427     redo B;
4428     }
4429    
4430     ## Clear back to table row context
4431     while (not {
4432     tr => 1, html => 1,
4433 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4434     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4435     pop @{$self->{open_elements}};
4436 wakaba 1.1 }
4437    
4438 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4439     $self->{insertion_mode} = 'in table body';
4440 wakaba 1.1 ## reprocess
4441     redo B;
4442     } elsif ({
4443     tbody => 1, tfoot => 1, thead => 1,
4444     }->{$token->{tag_name}}) {
4445     ## have an element in table scope
4446     my $i;
4447 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4448     my $node = $self->{open_elements}->[$_];
4449 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4450     $i = $_;
4451     last INSCOPE;
4452     } elsif ({
4453     table => 1, html => 1,
4454     }->{$node->[1]}) {
4455     last INSCOPE;
4456     }
4457     } # INSCOPE
4458     unless (defined $i) {
4459 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4460 wakaba 1.1 ## Ignore the token
4461     !!!next-token;
4462     redo B;
4463     }
4464    
4465     ## As if </tr>
4466     ## have an element in table scope
4467     my $i;
4468 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4469     my $node = $self->{open_elements}->[$_];
4470 wakaba 1.1 if ($node->[1] eq 'tr') {
4471     $i = $_;
4472     last INSCOPE;
4473     } elsif ({
4474     table => 1, html => 1,
4475     }->{$node->[1]}) {
4476     last INSCOPE;
4477     }
4478     } # INSCOPE
4479     unless (defined $i) {
4480 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:tr');
4481 wakaba 1.1 ## Ignore the token
4482     !!!next-token;
4483     redo B;
4484     }
4485    
4486     ## Clear back to table row context
4487     while (not {
4488     tr => 1, html => 1,
4489 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4490     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4491     pop @{$self->{open_elements}};
4492 wakaba 1.1 }
4493    
4494 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4495     $self->{insertion_mode} = 'in table body';
4496 wakaba 1.1 ## reprocess
4497     redo B;
4498     } elsif ({
4499     body => 1, caption => 1, col => 1,
4500     colgroup => 1, html => 1, td => 1, th => 1,
4501     }->{$token->{tag_name}}) {
4502 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4503 wakaba 1.1 ## Ignore the token
4504     !!!next-token;
4505     redo B;
4506     } else {
4507     #
4508     }
4509     } else {
4510     #
4511     }
4512    
4513     ## As if in table
4514 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4515 wakaba 1.1 $in_body->($insert_to_foster);
4516     redo B;
4517 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
4518 wakaba 1.1 if ($token->{type} eq 'character') {
4519     ## NOTE: This is a code clone of "character in body".
4520     $reconstruct_active_formatting_elements->($insert_to_current);
4521    
4522 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4523 wakaba 1.1
4524     !!!next-token;
4525     redo B;
4526     } elsif ($token->{type} eq 'comment') {
4527     ## NOTE: This is a code clone of "comment in body".
4528     my $comment = $self->{document}->create_comment ($token->{data});
4529 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4530 wakaba 1.1 !!!next-token;
4531     redo B;
4532     } elsif ($token->{type} eq 'start tag') {
4533     if ({
4534     caption => 1, col => 1, colgroup => 1,
4535     tbody => 1, td => 1, tfoot => 1, th => 1,
4536     thead => 1, tr => 1,
4537     }->{$token->{tag_name}}) {
4538     ## have an element in table scope
4539     my $tn;
4540 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4541     my $node = $self->{open_elements}->[$_];
4542 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4543     $tn = $node->[1];
4544     last INSCOPE;
4545     } elsif ({
4546     table => 1, html => 1,
4547     }->{$node->[1]}) {
4548     last INSCOPE;
4549     }
4550     } # INSCOPE
4551     unless (defined $tn) {
4552 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4553 wakaba 1.1 ## Ignore the token
4554     !!!next-token;
4555     redo B;
4556     }
4557    
4558     ## Close the cell
4559     !!!back-token; # <?>
4560     $token = {type => 'end tag', tag_name => $tn};
4561     redo B;
4562     } else {
4563     #
4564     }
4565     } elsif ($token->{type} eq 'end tag') {
4566     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4567     ## have an element in table scope
4568     my $i;
4569 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4570     my $node = $self->{open_elements}->[$_];
4571 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4572     $i = $_;
4573     last INSCOPE;
4574     } elsif ({
4575     table => 1, html => 1,
4576     }->{$node->[1]}) {
4577     last INSCOPE;
4578     }
4579     } # INSCOPE
4580     unless (defined $i) {
4581 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4582 wakaba 1.1 ## Ignore the token
4583     !!!next-token;
4584     redo B;
4585     }
4586    
4587     ## generate implied end tags
4588     if ({
4589     dd => 1, dt => 1, li => 1, p => 1,
4590     td => ($token->{tag_name} eq 'th'),
4591     th => ($token->{tag_name} eq 'td'),
4592     tr => 1,
4593 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4594 wakaba 1.1 !!!back-token;
4595     $token = {type => 'end tag',
4596 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4597 wakaba 1.1 redo B;
4598     }
4599    
4600 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4601     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4602 wakaba 1.1 }
4603    
4604 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4605 wakaba 1.1
4606     $clear_up_to_marker->();
4607    
4608 wakaba 1.3 $self->{insertion_mode} = 'in row';
4609 wakaba 1.1
4610     !!!next-token;
4611     redo B;
4612     } elsif ({
4613     body => 1, caption => 1, col => 1,
4614     colgroup => 1, html => 1,
4615     }->{$token->{tag_name}}) {
4616 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4617 wakaba 1.1 ## Ignore the token
4618     !!!next-token;
4619     redo B;
4620     } elsif ({
4621     table => 1, tbody => 1, tfoot => 1,
4622     thead => 1, tr => 1,
4623     }->{$token->{tag_name}}) {
4624     ## have an element in table scope
4625     my $i;
4626     my $tn;
4627 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4628     my $node = $self->{open_elements}->[$_];
4629 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4630     $i = $_;
4631     last INSCOPE;
4632     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4633     $tn = $node->[1];
4634     ## NOTE: There is exactly one |td| or |th| element
4635     ## in scope in the stack of open elements by definition.
4636     } elsif ({
4637     table => 1, html => 1,
4638     }->{$node->[1]}) {
4639     last INSCOPE;
4640     }
4641     } # INSCOPE
4642     unless (defined $i) {
4643 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4644 wakaba 1.1 ## Ignore the token
4645     !!!next-token;
4646     redo B;
4647     }
4648    
4649     ## Close the cell
4650     !!!back-token; # </?>
4651     $token = {type => 'end tag', tag_name => $tn};
4652     redo B;
4653     } else {
4654     #
4655     }
4656     } else {
4657     #
4658     }
4659    
4660     $in_body->($insert_to_current);
4661     redo B;
4662 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
4663 wakaba 1.1 if ($token->{type} eq 'character') {
4664 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4665 wakaba 1.1 !!!next-token;
4666     redo B;
4667     } elsif ($token->{type} eq 'comment') {
4668     my $comment = $self->{document}->create_comment ($token->{data});
4669 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4670 wakaba 1.1 !!!next-token;
4671     redo B;
4672     } elsif ($token->{type} eq 'start tag') {
4673     if ($token->{tag_name} eq 'option') {
4674 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4675 wakaba 1.1 ## As if </option>
4676 wakaba 1.3 pop @{$self->{open_elements}};
4677 wakaba 1.1 }
4678    
4679     !!!insert-element ($token->{tag_name}, $token->{attributes});
4680     !!!next-token;
4681     redo B;
4682     } elsif ($token->{tag_name} eq 'optgroup') {
4683 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4684 wakaba 1.1 ## As if </option>
4685 wakaba 1.3 pop @{$self->{open_elements}};
4686 wakaba 1.1 }
4687    
4688 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4689 wakaba 1.1 ## As if </optgroup>
4690 wakaba 1.3 pop @{$self->{open_elements}};
4691 wakaba 1.1 }
4692    
4693     !!!insert-element ($token->{tag_name}, $token->{attributes});
4694     !!!next-token;
4695     redo B;
4696     } elsif ($token->{tag_name} eq 'select') {
4697 wakaba 1.3 !!!parse-error (type => 'not closed:select');
4698 wakaba 1.1 ## As if </select> instead
4699     ## have an element in table scope
4700     my $i;
4701 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4702     my $node = $self->{open_elements}->[$_];
4703 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4704     $i = $_;
4705     last INSCOPE;
4706     } elsif ({
4707     table => 1, html => 1,
4708     }->{$node->[1]}) {
4709     last INSCOPE;
4710     }
4711     } # INSCOPE
4712     unless (defined $i) {
4713 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4714 wakaba 1.1 ## Ignore the token
4715     !!!next-token;
4716     redo B;
4717     }
4718    
4719 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4720 wakaba 1.1
4721 wakaba 1.3 $self->_reset_insertion_mode;
4722 wakaba 1.1
4723     !!!next-token;
4724     redo B;
4725     } else {
4726     #
4727     }
4728     } elsif ($token->{type} eq 'end tag') {
4729     if ($token->{tag_name} eq 'optgroup') {
4730 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4731     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4732 wakaba 1.1 ## As if </option>
4733 wakaba 1.3 splice @{$self->{open_elements}}, -2;
4734     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4735     pop @{$self->{open_elements}};
4736 wakaba 1.1 } else {
4737 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4738 wakaba 1.1 ## Ignore the token
4739     }
4740     !!!next-token;
4741     redo B;
4742     } elsif ($token->{tag_name} eq 'option') {
4743 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4744     pop @{$self->{open_elements}};
4745 wakaba 1.1 } else {
4746 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4747 wakaba 1.1 ## Ignore the token
4748     }
4749     !!!next-token;
4750     redo B;
4751     } elsif ($token->{tag_name} eq 'select') {
4752     ## have an element in table scope
4753     my $i;
4754 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4755     my $node = $self->{open_elements}->[$_];
4756 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4757     $i = $_;
4758     last INSCOPE;
4759     } elsif ({
4760     table => 1, html => 1,
4761     }->{$node->[1]}) {
4762     last INSCOPE;
4763     }
4764     } # INSCOPE
4765     unless (defined $i) {
4766 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4767 wakaba 1.1 ## Ignore the token
4768     !!!next-token;
4769     redo B;
4770     }
4771    
4772 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4773 wakaba 1.1
4774 wakaba 1.3 $self->_reset_insertion_mode;
4775 wakaba 1.1
4776     !!!next-token;
4777     redo B;
4778     } elsif ({
4779     caption => 1, table => 1, tbody => 1,
4780     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4781     }->{$token->{tag_name}}) {
4782 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4783 wakaba 1.1
4784     ## have an element in table scope
4785     my $i;
4786 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4787     my $node = $self->{open_elements}->[$_];
4788 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4789     $i = $_;
4790     last INSCOPE;
4791     } elsif ({
4792     table => 1, html => 1,
4793     }->{$node->[1]}) {
4794     last INSCOPE;
4795     }
4796     } # INSCOPE
4797     unless (defined $i) {
4798     ## Ignore the token
4799     !!!next-token;
4800     redo B;
4801     }
4802    
4803     ## As if </select>
4804     ## have an element in table scope
4805     undef $i;
4806 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4807     my $node = $self->{open_elements}->[$_];
4808 wakaba 1.1 if ($node->[1] eq 'select') {
4809     $i = $_;
4810     last INSCOPE;
4811     } elsif ({
4812     table => 1, html => 1,
4813     }->{$node->[1]}) {
4814     last INSCOPE;
4815     }
4816     } # INSCOPE
4817     unless (defined $i) {
4818 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4819 wakaba 1.1 ## Ignore the </select> token
4820     !!!next-token; ## TODO: ok?
4821     redo B;
4822     }
4823    
4824 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4825 wakaba 1.1
4826 wakaba 1.3 $self->_reset_insertion_mode;
4827 wakaba 1.1
4828     ## reprocess
4829     redo B;
4830     } else {
4831     #
4832     }
4833     } else {
4834     #
4835     }
4836    
4837 wakaba 1.3 !!!parse-error (type => 'in select:'.$token->{tag_name});
4838 wakaba 1.1 ## Ignore the token
4839     !!!next-token;
4840     redo B;
4841 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
4842 wakaba 1.1 if ($token->{type} eq 'character') {
4843     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4844     ## As if in body
4845     $reconstruct_active_formatting_elements->($insert_to_current);
4846    
4847 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4848 wakaba 1.1
4849     unless (length $token->{data}) {
4850     !!!next-token;
4851     redo B;
4852     }
4853     }
4854    
4855     #
4856 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4857 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
4858     my $comment = $self->{document}->create_comment ($token->{data});
4859 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
4860 wakaba 1.1 !!!next-token;
4861     redo B;
4862 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
4863     !!!parse-error (type => 'after body:'.$token->{tag_name});
4864     #
4865 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
4866     if ($token->{tag_name} eq 'html') {
4867 wakaba 1.3 if (defined $self->{inner_html_node}) {
4868     !!!parse-error (type => 'unmatched end tag:html');
4869     ## Ignore the token
4870     !!!next-token;
4871     redo B;
4872     } else {
4873     $phase = 'trailing end';
4874     !!!next-token;
4875     redo B;
4876     }
4877 wakaba 1.1 } else {
4878 wakaba 1.3 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4879 wakaba 1.1 }
4880     } else {
4881 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4882 wakaba 1.1 }
4883    
4884 wakaba 1.3 $self->{insertion_mode} = 'in body';
4885 wakaba 1.1 ## reprocess
4886     redo B;
4887 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
4888 wakaba 1.1 if ($token->{type} eq 'character') {
4889     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4890 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4891 wakaba 1.1
4892     unless (length $token->{data}) {
4893     !!!next-token;
4894     redo B;
4895     }
4896     }
4897    
4898     #
4899     } elsif ($token->{type} eq 'comment') {
4900     my $comment = $self->{document}->create_comment ($token->{data});
4901 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4902 wakaba 1.1 !!!next-token;
4903     redo B;
4904     } elsif ($token->{type} eq 'start tag') {
4905     if ($token->{tag_name} eq 'frameset') {
4906     !!!insert-element ($token->{tag_name}, $token->{attributes});
4907     !!!next-token;
4908     redo B;
4909     } elsif ($token->{tag_name} eq 'frame') {
4910     !!!insert-element ($token->{tag_name}, $token->{attributes});
4911 wakaba 1.3 pop @{$self->{open_elements}};
4912 wakaba 1.1 !!!next-token;
4913     redo B;
4914     } elsif ($token->{tag_name} eq 'noframes') {
4915     $in_body->($insert_to_current);
4916     redo B;
4917     } else {
4918     #
4919     }
4920     } elsif ($token->{type} eq 'end tag') {
4921     if ($token->{tag_name} eq 'frameset') {
4922 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
4923     @{$self->{open_elements}} == 1) {
4924     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4925 wakaba 1.1 ## Ignore the token
4926     !!!next-token;
4927     } else {
4928 wakaba 1.3 pop @{$self->{open_elements}};
4929 wakaba 1.1 !!!next-token;
4930     }
4931    
4932     ## if not inner_html and
4933 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
4934     $self->{insertion_mode} = 'after frameset';
4935 wakaba 1.1 }
4936     redo B;
4937     } else {
4938     #
4939     }
4940     } else {
4941     #
4942     }
4943    
4944 wakaba 1.3 if (defined $token->{tag_name}) {
4945     !!!parse-error (type => 'in frameset:'.$token->{tag_name});
4946     } else {
4947     !!!parse-error (type => 'in frameset:#'.$token->{type});
4948     }
4949 wakaba 1.1 ## Ignore the token
4950     !!!next-token;
4951     redo B;
4952 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
4953 wakaba 1.1 if ($token->{type} eq 'character') {
4954     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4955 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4956 wakaba 1.1
4957     unless (length $token->{data}) {
4958     !!!next-token;
4959     redo B;
4960     }
4961     }
4962    
4963     #
4964     } elsif ($token->{type} eq 'comment') {
4965     my $comment = $self->{document}->create_comment ($token->{data});
4966 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4967 wakaba 1.1 !!!next-token;
4968     redo B;
4969     } elsif ($token->{type} eq 'start tag') {
4970     if ($token->{tag_name} eq 'noframes') {
4971     $in_body->($insert_to_current);
4972     redo B;
4973     } else {
4974     #
4975     }
4976     } elsif ($token->{type} eq 'end tag') {
4977     if ($token->{tag_name} eq 'html') {
4978     $phase = 'trailing end';
4979     !!!next-token;
4980     redo B;
4981     } else {
4982     #
4983     }
4984     } else {
4985     #
4986     }
4987    
4988 wakaba 1.3 if (defined $token->{tag_name}) {
4989     !!!parse-error (type => 'after frameset:'.$token->{tag_name});
4990     } else {
4991     !!!parse-error (type => 'after frameset:#'.$token->{type});
4992     }
4993 wakaba 1.1 ## Ignore the token
4994     !!!next-token;
4995     redo B;
4996    
4997     ## ISSUE: An issue in spec there
4998     } else {
4999 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
5000 wakaba 1.1 }
5001     }
5002     } elsif ($phase eq 'trailing end') {
5003     ## states in the main stage is preserved yet # MUST
5004    
5005     if ($token->{type} eq 'DOCTYPE') {
5006 wakaba 1.3 !!!parse-error (type => 'after html:#DOCTYPE');
5007 wakaba 1.1 ## Ignore the token
5008     !!!next-token;
5009     redo B;
5010     } elsif ($token->{type} eq 'comment') {
5011     my $comment = $self->{document}->create_comment ($token->{data});
5012     $self->{document}->append_child ($comment);
5013     !!!next-token;
5014     redo B;
5015     } elsif ($token->{type} eq 'character') {
5016     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5017     my $data = $1;
5018     ## As if in the main phase.
5019     ## NOTE: The insertion mode in the main phase
5020     ## just before the phase has been changed to the trailing
5021     ## end phase is either "after body" or "after frameset".
5022     $reconstruct_active_formatting_elements->($insert_to_current)
5023     if $phase eq 'main';
5024    
5025 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
5026 wakaba 1.1
5027     unless (length $token->{data}) {
5028     !!!next-token;
5029     redo B;
5030     }
5031     }
5032    
5033 wakaba 1.3 !!!parse-error (type => 'after html:#character');
5034 wakaba 1.1 $phase = 'main';
5035     ## reprocess
5036     redo B;
5037     } elsif ($token->{type} eq 'start tag' or
5038     $token->{type} eq 'end tag') {
5039 wakaba 1.3 !!!parse-error (type => 'after html:'.$token->{tag_name});
5040 wakaba 1.1 $phase = 'main';
5041     ## reprocess
5042     redo B;
5043     } elsif ($token->{type} eq 'end-of-file') {
5044     ## Stop parsing
5045     last B;
5046     } else {
5047     die "$0: $token->{type}: Unknown token";
5048     }
5049     }
5050     } # B
5051    
5052     ## Stop parsing # MUST
5053    
5054     ## TODO: script stuffs
5055 wakaba 1.3 } # _tree_construct_main
5056    
5057     sub set_inner_html ($$$) {
5058     my $class = shift;
5059     my $node = shift;
5060     my $s = \$_[0];
5061     my $onerror = $_[1];
5062    
5063     my $nt = $node->node_type;
5064     if ($nt == 9) {
5065     # MUST
5066    
5067     ## Step 1 # MUST
5068     ## TODO: If the document has an active parser, ...
5069     ## ISSUE: There is an issue in the spec.
5070    
5071     ## Step 2 # MUST
5072     my @cn = @{$node->child_nodes};
5073     for (@cn) {
5074     $node->remove_child ($_);
5075     }
5076    
5077     ## Step 3, 4, 5 # MUST
5078     $class->parse_string ($$s => $node, $onerror);
5079     } elsif ($nt == 1) {
5080     ## TODO: If non-html element
5081    
5082     ## NOTE: Most of this code is copied from |parse_string|
5083    
5084     ## Step 1 # MUST
5085 wakaba 1.14 my $this_doc = $node->owner_document;
5086     my $doc = $this_doc->implementation->create_document;
5087 wakaba 1.18 $doc->manakai_is_html (1);
5088 wakaba 1.3 my $p = $class->new;
5089     $p->{document} = $doc;
5090    
5091     ## Step 9 # MUST
5092     my $i = 0;
5093     my $line = 1;
5094     my $column = 0;
5095     $p->{set_next_input_character} = sub {
5096     my $self = shift;
5097 wakaba 1.14
5098     pop @{$self->{prev_input_character}};
5099     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
5100    
5101 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
5102     $self->{next_input_character} = ord substr $$s, $i++, 1;
5103     $column++;
5104 wakaba 1.4
5105     if ($self->{next_input_character} == 0x000A) { # LF
5106     $line++;
5107     $column = 0;
5108     } elsif ($self->{next_input_character} == 0x000D) { # CR
5109 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
5110 wakaba 1.3 $self->{next_input_character} = 0x000A; # LF # MUST
5111     $line++;
5112 wakaba 1.4 $column = 0;
5113 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
5114     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5115     } elsif ($self->{next_input_character} == 0x0000) { # NULL
5116 wakaba 1.14 !!!parse-error (type => 'NULL');
5117 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5118     }
5119     };
5120 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
5121     $p->{next_input_character} = -1;
5122 wakaba 1.3
5123     my $ponerror = $onerror || sub {
5124     my (%opt) = @_;
5125     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
5126     };
5127     $p->{parse_error} = sub {
5128     $ponerror->(@_, line => $line, column => $column);
5129     };
5130    
5131     $p->_initialize_tokenizer;
5132     $p->_initialize_tree_constructor;
5133    
5134     ## Step 2
5135     my $node_ln = $node->local_name;
5136     $p->{content_model_flag} = {
5137     title => 'RCDATA',
5138     textarea => 'RCDATA',
5139     style => 'CDATA',
5140     script => 'CDATA',
5141     xmp => 'CDATA',
5142     iframe => 'CDATA',
5143     noembed => 'CDATA',
5144     noframes => 'CDATA',
5145     noscript => 'CDATA',
5146     plaintext => 'PLAINTEXT',
5147     }->{$node_ln} || 'PCDATA';
5148     ## ISSUE: What is "the name of the element"? local name?
5149    
5150     $p->{inner_html_node} = [$node, $node_ln];
5151    
5152     ## Step 4
5153     my $root = $doc->create_element_ns
5154     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5155    
5156     ## Step 5 # MUST
5157     $doc->append_child ($root);
5158    
5159     ## Step 6 # MUST
5160     push @{$p->{open_elements}}, [$root, 'html'];
5161    
5162     undef $p->{head_element};
5163    
5164     ## Step 7 # MUST
5165     $p->_reset_insertion_mode;
5166    
5167     ## Step 8 # MUST
5168     my $anode = $node;
5169     AN: while (defined $anode) {
5170     if ($anode->node_type == 1) {
5171     my $nsuri = $anode->namespace_uri;
5172     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5173     if ($anode->local_name eq 'form') { ## TODO: case?
5174     $p->{form_element} = $anode;
5175     last AN;
5176     }
5177     }
5178     }
5179     $anode = $anode->parent_node;
5180     } # AN
5181    
5182     ## Step 3 # MUST
5183     ## Step 10 # MUST
5184     {
5185     my $self = $p;
5186     !!!next-token;
5187     }
5188     $p->_tree_construction_main;
5189    
5190     ## Step 11 # MUST
5191     my @cn = @{$node->child_nodes};
5192     for (@cn) {
5193     $node->remove_child ($_);
5194     }
5195     ## ISSUE: mutation events? read-only?
5196    
5197     ## Step 12 # MUST
5198     @cn = @{$root->child_nodes};
5199     for (@cn) {
5200 wakaba 1.14 $this_doc->adopt_node ($_);
5201 wakaba 1.3 $node->append_child ($_);
5202     }
5203 wakaba 1.14 ## ISSUE: mutation events?
5204 wakaba 1.3
5205     $p->_terminate_tree_constructor;
5206     } else {
5207     die "$0: |set_inner_html| is not defined for node of type $nt";
5208     }
5209     } # set_inner_html
5210    
5211     } # tree construction stage
5212 wakaba 1.1
5213     sub get_inner_html ($$$) {
5214 wakaba 1.3 my (undef, $node, $on_error) = @_;
5215 wakaba 1.1
5216     ## Step 1
5217     my $s = '';
5218    
5219     my $in_cdata;
5220     my $parent = $node;
5221     while (defined $parent) {
5222     if ($parent->node_type == 1 and
5223     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
5224     {
5225     style => 1, script => 1, xmp => 1, iframe => 1,
5226     noembed => 1, noframes => 1, noscript => 1,
5227     }->{$parent->local_name}) { ## TODO: case thingy
5228     $in_cdata = 1;
5229     }
5230     $parent = $parent->parent_node;
5231     }
5232    
5233     ## Step 2
5234     my @node = @{$node->child_nodes};
5235     C: while (@node) {
5236     my $child = shift @node;
5237     unless (ref $child) {
5238     if ($child eq 'cdata-out') {
5239     $in_cdata = 0;
5240     } else {
5241     $s .= $child; # end tag
5242     }
5243     next C;
5244     }
5245    
5246     my $nt = $child->node_type;
5247     if ($nt == 1) { # Element
5248     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
5249     $s .= '<' . $tag_name;
5250    
5251     ## ISSUE: Non-html elements
5252    
5253     my @attrs = @{$child->attributes}; # sort order MUST be stable
5254     for my $attr (@attrs) { # order is implementation dependent
5255     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
5256     $s .= ' ' . $attr_name . '="';
5257     my $attr_value = $attr->value;
5258     ## escape
5259     $attr_value =~ s/&/&amp;/g;
5260     $attr_value =~ s/</&lt;/g;
5261     $attr_value =~ s/>/&gt;/g;
5262     $attr_value =~ s/"/&quot;/g;
5263     $s .= $attr_value . '"';
5264     }
5265     $s .= '>';
5266    
5267     next C if {
5268     area => 1, base => 1, basefont => 1, bgsound => 1,
5269     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
5270     img => 1, input => 1, link => 1, meta => 1, param => 1,
5271     spacer => 1, wbr => 1,
5272     }->{$tag_name};
5273    
5274     if (not $in_cdata and {
5275     style => 1, script => 1, xmp => 1, iframe => 1,
5276     noembed => 1, noframes => 1, noscript => 1,
5277     }->{$tag_name}) {
5278     unshift @node, 'cdata-out';
5279     $in_cdata = 1;
5280     }
5281    
5282     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
5283     } elsif ($nt == 3 or $nt == 4) {
5284     if ($in_cdata) {
5285     $s .= $child->data;
5286     } else {
5287     my $value = $child->data;
5288     $value =~ s/&/&amp;/g;
5289     $value =~ s/</&lt;/g;
5290     $value =~ s/>/&gt;/g;
5291     $value =~ s/"/&quot;/g;
5292     $s .= $value;
5293     }
5294     } elsif ($nt == 8) {
5295     $s .= '<!--' . $child->data . '-->';
5296     } elsif ($nt == 10) {
5297     $s .= '<!DOCTYPE ' . $child->name . '>';
5298     } elsif ($nt == 5) { # entrefs
5299     push @node, @{$child->child_nodes};
5300     } else {
5301     $on_error->($child) if defined $on_error;
5302     }
5303     ## ISSUE: This code does not support PIs.
5304     } # C
5305    
5306     ## Step 3
5307     return \$s;
5308     } # get_inner_html
5309    
5310     1;
5311 wakaba 1.18 # $Date: 2007/06/23 08:15:21 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24