/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.16 - (hide annotations) (download) (as text)
Sat Jun 23 07:42:11 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.15: +24 -280 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	23 Jun 2007 07:42:02 -0000
	* tokenizer-test-1.test: |⟨|, and |&rlang;| tests
	are added.  (HTML5 revision 895.)  A test for |&;|
	is added.

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	23 Jun 2007 07:40:34 -0000
	* .cvsignore: |Entities.html| is added.

	* HTML.pm.src: |$entity_char| is removed and
	requires |Whatpm::_NamedEntityList| instead.
	HTML5 revision 898 (refc), except that lack of refc
	is parse error.

	* mkentitylist.pl: New script.

	* Makefile (all): |_NamedEntityList.pm| is added.
	(_NamedEntityList.pm, Entities.html): New rules.

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.16 our $VERSION=do{my @r=(q$Revision: 1.15 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is an early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21 wakaba 1.4 my $c1_entity_char = {
22 wakaba 1.10 0x80 => 0x20AC,
23     0x81 => 0xFFFD,
24     0x82 => 0x201A,
25     0x83 => 0x0192,
26     0x84 => 0x201E,
27     0x85 => 0x2026,
28     0x86 => 0x2020,
29     0x87 => 0x2021,
30     0x88 => 0x02C6,
31     0x89 => 0x2030,
32     0x8A => 0x0160,
33     0x8B => 0x2039,
34     0x8C => 0x0152,
35     0x8D => 0xFFFD,
36     0x8E => 0x017D,
37     0x8F => 0xFFFD,
38     0x90 => 0xFFFD,
39     0x91 => 0x2018,
40     0x92 => 0x2019,
41     0x93 => 0x201C,
42     0x94 => 0x201D,
43     0x95 => 0x2022,
44     0x96 => 0x2013,
45     0x97 => 0x2014,
46     0x98 => 0x02DC,
47     0x99 => 0x2122,
48     0x9A => 0x0161,
49     0x9B => 0x203A,
50     0x9C => 0x0153,
51     0x9D => 0xFFFD,
52     0x9E => 0x017E,
53     0x9F => 0x0178,
54 wakaba 1.4 }; # $c1_entity_char
55 wakaba 1.1
56     my $special_category = {
57     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
58     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
59     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
60     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
61     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
62     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
63     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
64     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
65     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
66     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
67     };
68     my $scoping_category = {
69     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
70     table => 1, td => 1, th => 1,
71     };
72     my $formatting_category = {
73     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
74     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
75     };
76     # $phrasing_category: all other elements
77    
78     sub parse_string ($$$;$) {
79     my $self = shift->new;
80     my $s = \$_[0];
81     $self->{document} = $_[1];
82    
83 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
84    
85 wakaba 1.1 my $i = 0;
86 wakaba 1.3 my $line = 1;
87     my $column = 0;
88 wakaba 1.1 $self->{set_next_input_character} = sub {
89     my $self = shift;
90 wakaba 1.13
91     pop @{$self->{prev_input_character}};
92     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
93    
94 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
95     $self->{next_input_character} = ord substr $$s, $i++, 1;
96 wakaba 1.3 $column++;
97 wakaba 1.1
98 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
99     $line++;
100     $column = 0;
101     } elsif ($self->{next_input_character} == 0x000D) { # CR
102 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
103 wakaba 1.1 $self->{next_input_character} = 0x000A; # LF # MUST
104 wakaba 1.3 $line++;
105 wakaba 1.4 $column = 0;
106 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
107     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
108     } elsif ($self->{next_input_character} == 0x0000) { # NULL
109 wakaba 1.8 !!!parse-error (type => 'NULL');
110 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
111     }
112     };
113 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
114     $self->{next_input_character} = -1;
115 wakaba 1.1
116 wakaba 1.3 my $onerror = $_[2] || sub {
117     my (%opt) = @_;
118     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
119     };
120     $self->{parse_error} = sub {
121     $onerror->(@_, line => $line, column => $column);
122 wakaba 1.1 };
123    
124     $self->_initialize_tokenizer;
125     $self->_initialize_tree_constructor;
126     $self->_construct_tree;
127     $self->_terminate_tree_constructor;
128    
129     return $self->{document};
130     } # parse_string
131    
132     sub new ($) {
133     my $class = shift;
134     my $self = bless {}, $class;
135     $self->{set_next_input_character} = sub {
136     $self->{next_input_character} = -1;
137     };
138     $self->{parse_error} = sub {
139     #
140     };
141     return $self;
142     } # new
143    
144     ## Implementations MUST act as if state machine in the spec
145    
146     sub _initialize_tokenizer ($) {
147     my $self = shift;
148     $self->{state} = 'data'; # MUST
149     $self->{content_model_flag} = 'PCDATA'; # be
150     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
151     undef $self->{current_attribute};
152     undef $self->{last_emitted_start_tag_name};
153     undef $self->{last_attribute_value_state};
154     $self->{char} = [];
155     # $self->{next_input_character}
156     !!!next-input-character;
157     $self->{token} = [];
158     } # _initialize_tokenizer
159    
160     ## A token has:
161     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
162     ## 'character', or 'end-of-file'
163     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
164     ## ISSUE: the spec need s/tagname/tag name/
165     ## ->{error} == 1 or 0 (DOCTYPE)
166     ## ->{attributes} isa HASH (start tag, end tag)
167     ## ->{data} (comment, character)
168    
169     ## Macros
170     ## Macros MUST be preceded by three EXCLAMATION MARKs.
171     ## emit ($token)
172     ## Emits the specified token.
173    
174     ## Emitted token MUST immediately be handled by the tree construction state.
175    
176     ## Before each step, UA MAY check to see if either one of the scripts in
177     ## "list of scripts that will execute as soon as possible" or the first
178     ## script in the "list of scripts that will execute asynchronously",
179     ## has completed loading. If one has, then it MUST be executed
180     ## and removed from the list.
181    
182     sub _get_next_token ($) {
183     my $self = shift;
184     if (@{$self->{token}}) {
185     return shift @{$self->{token}};
186     }
187    
188     A: {
189     if ($self->{state} eq 'data') {
190     if ($self->{next_input_character} == 0x0026) { # &
191     if ($self->{content_model_flag} eq 'PCDATA' or
192     $self->{content_model_flag} eq 'RCDATA') {
193     $self->{state} = 'entity data';
194     !!!next-input-character;
195     redo A;
196     } else {
197     #
198     }
199 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
200     if ($self->{content_model_flag} eq 'RCDATA' or
201     $self->{content_model_flag} eq 'CDATA') {
202     unless ($self->{escape}) {
203     if ($self->{prev_input_character}->[0] == 0x002D and # -
204     $self->{prev_input_character}->[1] == 0x0021 and # !
205     $self->{prev_input_character}->[2] == 0x003C) { # <
206     $self->{escape} = 1;
207     }
208     }
209     }
210    
211     #
212 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
213 wakaba 1.13 if ($self->{content_model_flag} eq 'PCDATA' or
214     (($self->{content_model_flag} eq 'CDATA' or
215     $self->{content_model_flag} eq 'RCDATA') and
216     not $self->{escape})) {
217 wakaba 1.1 $self->{state} = 'tag open';
218     !!!next-input-character;
219     redo A;
220     } else {
221     #
222     }
223 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
224     if ($self->{escape} and
225     ($self->{content_model_flag} eq 'RCDATA' or
226     $self->{content_model_flag} eq 'CDATA')) {
227     if ($self->{prev_input_character}->[0] == 0x002D and # -
228     $self->{prev_input_character}->[1] == 0x002D) { # -
229     delete $self->{escape};
230     }
231     }
232    
233     #
234 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
235     !!!emit ({type => 'end-of-file'});
236     last A; ## TODO: ok?
237     }
238     # Anything else
239     my $token = {type => 'character',
240     data => chr $self->{next_input_character}};
241     ## Stay in the data state
242     !!!next-input-character;
243    
244     !!!emit ($token);
245    
246     redo A;
247     } elsif ($self->{state} eq 'entity data') {
248     ## (cannot happen in CDATA state)
249    
250     my $token = $self->_tokenize_attempt_to_consume_an_entity;
251    
252     $self->{state} = 'data';
253     # next-input-character is already done
254    
255     unless (defined $token) {
256     !!!emit ({type => 'character', data => '&'});
257     } else {
258     !!!emit ($token);
259     }
260    
261     redo A;
262     } elsif ($self->{state} eq 'tag open') {
263     if ($self->{content_model_flag} eq 'RCDATA' or
264     $self->{content_model_flag} eq 'CDATA') {
265     if ($self->{next_input_character} == 0x002F) { # /
266     !!!next-input-character;
267     $self->{state} = 'close tag open';
268     redo A;
269     } else {
270     ## reconsume
271     $self->{state} = 'data';
272    
273     !!!emit ({type => 'character', data => '<'});
274    
275     redo A;
276     }
277     } elsif ($self->{content_model_flag} eq 'PCDATA') {
278     if ($self->{next_input_character} == 0x0021) { # !
279     $self->{state} = 'markup declaration open';
280     !!!next-input-character;
281     redo A;
282     } elsif ($self->{next_input_character} == 0x002F) { # /
283     $self->{state} = 'close tag open';
284     !!!next-input-character;
285     redo A;
286     } elsif (0x0041 <= $self->{next_input_character} and
287     $self->{next_input_character} <= 0x005A) { # A..Z
288     $self->{current_token}
289     = {type => 'start tag',
290     tag_name => chr ($self->{next_input_character} + 0x0020)};
291     $self->{state} = 'tag name';
292     !!!next-input-character;
293     redo A;
294     } elsif (0x0061 <= $self->{next_input_character} and
295     $self->{next_input_character} <= 0x007A) { # a..z
296     $self->{current_token} = {type => 'start tag',
297     tag_name => chr ($self->{next_input_character})};
298     $self->{state} = 'tag name';
299     !!!next-input-character;
300     redo A;
301     } elsif ($self->{next_input_character} == 0x003E) { # >
302 wakaba 1.3 !!!parse-error (type => 'empty start tag');
303 wakaba 1.1 $self->{state} = 'data';
304     !!!next-input-character;
305    
306     !!!emit ({type => 'character', data => '<>'});
307    
308     redo A;
309     } elsif ($self->{next_input_character} == 0x003F) { # ?
310 wakaba 1.3 !!!parse-error (type => 'pio');
311 wakaba 1.1 $self->{state} = 'bogus comment';
312     ## $self->{next_input_character} is intentionally left as is
313     redo A;
314     } else {
315 wakaba 1.3 !!!parse-error (type => 'bare stago');
316 wakaba 1.1 $self->{state} = 'data';
317     ## reconsume
318    
319     !!!emit ({type => 'character', data => '<'});
320    
321     redo A;
322     }
323     } else {
324     die "$0: $self->{content_model_flag}: Unknown content model flag";
325     }
326     } elsif ($self->{state} eq 'close tag open') {
327     if ($self->{content_model_flag} eq 'RCDATA' or
328     $self->{content_model_flag} eq 'CDATA') {
329     my @next_char;
330     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
331     push @next_char, $self->{next_input_character};
332     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
333     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
334     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
335     !!!next-input-character;
336     next TAGNAME;
337     } else {
338 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
339 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
340     !!!back-next-input-character (@next_char);
341     $self->{state} = 'data';
342    
343     !!!emit ({type => 'character', data => '</'});
344    
345     redo A;
346     }
347     }
348     push @next_char, $self->{next_input_character};
349    
350     unless ($self->{next_input_character} == 0x0009 or # HT
351     $self->{next_input_character} == 0x000A or # LF
352     $self->{next_input_character} == 0x000B or # VT
353     $self->{next_input_character} == 0x000C or # FF
354     $self->{next_input_character} == 0x0020 or # SP
355     $self->{next_input_character} == 0x003E or # >
356     $self->{next_input_character} == 0x002F or # /
357     $self->{next_input_character} == 0x003C or # <
358     $self->{next_input_character} == -1) {
359 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
360 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
361     !!!back-next-input-character (@next_char);
362     $self->{state} = 'data';
363    
364     !!!emit ({type => 'character', data => '</'});
365    
366     redo A;
367     } else {
368     $self->{next_input_character} = shift @next_char;
369     !!!back-next-input-character (@next_char);
370     # and consume...
371     }
372     }
373    
374     if (0x0041 <= $self->{next_input_character} and
375     $self->{next_input_character} <= 0x005A) { # A..Z
376     $self->{current_token} = {type => 'end tag',
377     tag_name => chr ($self->{next_input_character} + 0x0020)};
378     $self->{state} = 'tag name';
379     !!!next-input-character;
380     redo A;
381     } elsif (0x0061 <= $self->{next_input_character} and
382     $self->{next_input_character} <= 0x007A) { # a..z
383     $self->{current_token} = {type => 'end tag',
384     tag_name => chr ($self->{next_input_character})};
385     $self->{state} = 'tag name';
386     !!!next-input-character;
387     redo A;
388     } elsif ($self->{next_input_character} == 0x003E) { # >
389 wakaba 1.3 !!!parse-error (type => 'empty end tag');
390 wakaba 1.1 $self->{state} = 'data';
391     !!!next-input-character;
392     redo A;
393     } elsif ($self->{next_input_character} == -1) {
394 wakaba 1.3 !!!parse-error (type => 'bare etago');
395 wakaba 1.1 $self->{state} = 'data';
396     # reconsume
397    
398     !!!emit ({type => 'character', data => '</'});
399    
400     redo A;
401     } else {
402 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
403 wakaba 1.1 $self->{state} = 'bogus comment';
404     ## $self->{next_input_character} is intentionally left as is
405     redo A;
406     }
407     } elsif ($self->{state} eq 'tag name') {
408     if ($self->{next_input_character} == 0x0009 or # HT
409     $self->{next_input_character} == 0x000A or # LF
410     $self->{next_input_character} == 0x000B or # VT
411     $self->{next_input_character} == 0x000C or # FF
412     $self->{next_input_character} == 0x0020) { # SP
413     $self->{state} = 'before attribute name';
414     !!!next-input-character;
415     redo A;
416     } elsif ($self->{next_input_character} == 0x003E) { # >
417     if ($self->{current_token}->{type} eq 'start tag') {
418     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
419     } elsif ($self->{current_token}->{type} eq 'end tag') {
420     $self->{content_model_flag} = 'PCDATA'; # MUST
421     if ($self->{current_token}->{attributes}) {
422 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
423 wakaba 1.1 }
424     } else {
425     die "$0: $self->{current_token}->{type}: Unknown token type";
426     }
427     $self->{state} = 'data';
428     !!!next-input-character;
429    
430     !!!emit ($self->{current_token}); # start tag or end tag
431     undef $self->{current_token};
432    
433     redo A;
434     } elsif (0x0041 <= $self->{next_input_character} and
435     $self->{next_input_character} <= 0x005A) { # A..Z
436     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
437     # start tag or end tag
438     ## Stay in this state
439     !!!next-input-character;
440     redo A;
441     } elsif ($self->{next_input_character} == 0x003C or # <
442     $self->{next_input_character} == -1) {
443 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
444 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
445     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
446     } elsif ($self->{current_token}->{type} eq 'end tag') {
447     $self->{content_model_flag} = 'PCDATA'; # MUST
448     if ($self->{current_token}->{attributes}) {
449 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
450 wakaba 1.1 }
451     } else {
452     die "$0: $self->{current_token}->{type}: Unknown token type";
453     }
454     $self->{state} = 'data';
455     # reconsume
456    
457     !!!emit ($self->{current_token}); # start tag or end tag
458     undef $self->{current_token};
459    
460     redo A;
461     } elsif ($self->{next_input_character} == 0x002F) { # /
462     !!!next-input-character;
463     if ($self->{next_input_character} == 0x003E and # >
464     $self->{current_token}->{type} eq 'start tag' and
465     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
466     # permitted slash
467     #
468     } else {
469 wakaba 1.3 !!!parse-error (type => 'nestc');
470 wakaba 1.1 }
471     $self->{state} = 'before attribute name';
472     # next-input-character is already done
473     redo A;
474     } else {
475     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
476     # start tag or end tag
477     ## Stay in the state
478     !!!next-input-character;
479     redo A;
480     }
481     } elsif ($self->{state} eq 'before attribute name') {
482     if ($self->{next_input_character} == 0x0009 or # HT
483     $self->{next_input_character} == 0x000A or # LF
484     $self->{next_input_character} == 0x000B or # VT
485     $self->{next_input_character} == 0x000C or # FF
486     $self->{next_input_character} == 0x0020) { # SP
487     ## Stay in the state
488     !!!next-input-character;
489     redo A;
490     } elsif ($self->{next_input_character} == 0x003E) { # >
491     if ($self->{current_token}->{type} eq 'start tag') {
492     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
493     } elsif ($self->{current_token}->{type} eq 'end tag') {
494     $self->{content_model_flag} = 'PCDATA'; # MUST
495     if ($self->{current_token}->{attributes}) {
496 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
497 wakaba 1.1 }
498     } else {
499     die "$0: $self->{current_token}->{type}: Unknown token type";
500     }
501     $self->{state} = 'data';
502     !!!next-input-character;
503    
504     !!!emit ($self->{current_token}); # start tag or end tag
505     undef $self->{current_token};
506    
507     redo A;
508     } elsif (0x0041 <= $self->{next_input_character} and
509     $self->{next_input_character} <= 0x005A) { # A..Z
510     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
511     value => ''};
512     $self->{state} = 'attribute name';
513     !!!next-input-character;
514     redo A;
515     } elsif ($self->{next_input_character} == 0x002F) { # /
516     !!!next-input-character;
517     if ($self->{next_input_character} == 0x003E and # >
518     $self->{current_token}->{type} eq 'start tag' and
519     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
520     # permitted slash
521     #
522     } else {
523 wakaba 1.3 !!!parse-error (type => 'nestc');
524 wakaba 1.1 }
525     ## Stay in the state
526     # next-input-character is already done
527     redo A;
528     } elsif ($self->{next_input_character} == 0x003C or # <
529     $self->{next_input_character} == -1) {
530 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
531 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
532     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
533     } elsif ($self->{current_token}->{type} eq 'end tag') {
534     $self->{content_model_flag} = 'PCDATA'; # MUST
535     if ($self->{current_token}->{attributes}) {
536 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
537 wakaba 1.1 }
538     } else {
539     die "$0: $self->{current_token}->{type}: Unknown token type";
540     }
541     $self->{state} = 'data';
542     # reconsume
543    
544     !!!emit ($self->{current_token}); # start tag or end tag
545     undef $self->{current_token};
546    
547     redo A;
548     } else {
549     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
550     value => ''};
551     $self->{state} = 'attribute name';
552     !!!next-input-character;
553     redo A;
554     }
555     } elsif ($self->{state} eq 'attribute name') {
556     my $before_leave = sub {
557     if (exists $self->{current_token}->{attributes} # start tag or end tag
558     ->{$self->{current_attribute}->{name}}) { # MUST
559 wakaba 1.3 !!!parse-error (type => 'dupulicate attribute');
560 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
561     } else {
562     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
563     = $self->{current_attribute};
564     }
565     }; # $before_leave
566    
567     if ($self->{next_input_character} == 0x0009 or # HT
568     $self->{next_input_character} == 0x000A or # LF
569     $self->{next_input_character} == 0x000B or # VT
570     $self->{next_input_character} == 0x000C or # FF
571     $self->{next_input_character} == 0x0020) { # SP
572     $before_leave->();
573     $self->{state} = 'after attribute name';
574     !!!next-input-character;
575     redo A;
576     } elsif ($self->{next_input_character} == 0x003D) { # =
577     $before_leave->();
578     $self->{state} = 'before attribute value';
579     !!!next-input-character;
580     redo A;
581     } elsif ($self->{next_input_character} == 0x003E) { # >
582     $before_leave->();
583     if ($self->{current_token}->{type} eq 'start tag') {
584     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
585     } elsif ($self->{current_token}->{type} eq 'end tag') {
586     $self->{content_model_flag} = 'PCDATA'; # MUST
587     if ($self->{current_token}->{attributes}) {
588 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
589 wakaba 1.1 }
590     } else {
591     die "$0: $self->{current_token}->{type}: Unknown token type";
592     }
593     $self->{state} = 'data';
594     !!!next-input-character;
595    
596     !!!emit ($self->{current_token}); # start tag or end tag
597     undef $self->{current_token};
598    
599     redo A;
600     } elsif (0x0041 <= $self->{next_input_character} and
601     $self->{next_input_character} <= 0x005A) { # A..Z
602     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
603     ## Stay in the state
604     !!!next-input-character;
605     redo A;
606     } elsif ($self->{next_input_character} == 0x002F) { # /
607     $before_leave->();
608     !!!next-input-character;
609     if ($self->{next_input_character} == 0x003E and # >
610     $self->{current_token}->{type} eq 'start tag' and
611     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
612     # permitted slash
613     #
614     } else {
615 wakaba 1.3 !!!parse-error (type => 'nestc');
616 wakaba 1.1 }
617     $self->{state} = 'before attribute name';
618     # next-input-character is already done
619     redo A;
620     } elsif ($self->{next_input_character} == 0x003C or # <
621     $self->{next_input_character} == -1) {
622 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
623 wakaba 1.1 $before_leave->();
624     if ($self->{current_token}->{type} eq 'start tag') {
625     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
626     } elsif ($self->{current_token}->{type} eq 'end tag') {
627     $self->{content_model_flag} = 'PCDATA'; # MUST
628     if ($self->{current_token}->{attributes}) {
629 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
630 wakaba 1.1 }
631     } else {
632     die "$0: $self->{current_token}->{type}: Unknown token type";
633     }
634     $self->{state} = 'data';
635     # reconsume
636    
637     !!!emit ($self->{current_token}); # start tag or end tag
638     undef $self->{current_token};
639    
640     redo A;
641     } else {
642     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
643     ## Stay in the state
644     !!!next-input-character;
645     redo A;
646     }
647     } elsif ($self->{state} eq 'after attribute name') {
648     if ($self->{next_input_character} == 0x0009 or # HT
649     $self->{next_input_character} == 0x000A or # LF
650     $self->{next_input_character} == 0x000B or # VT
651     $self->{next_input_character} == 0x000C or # FF
652     $self->{next_input_character} == 0x0020) { # SP
653     ## Stay in the state
654     !!!next-input-character;
655     redo A;
656     } elsif ($self->{next_input_character} == 0x003D) { # =
657     $self->{state} = 'before attribute value';
658     !!!next-input-character;
659     redo A;
660     } elsif ($self->{next_input_character} == 0x003E) { # >
661     if ($self->{current_token}->{type} eq 'start tag') {
662     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
663     } elsif ($self->{current_token}->{type} eq 'end tag') {
664     $self->{content_model_flag} = 'PCDATA'; # MUST
665     if ($self->{current_token}->{attributes}) {
666 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
667 wakaba 1.1 }
668     } else {
669     die "$0: $self->{current_token}->{type}: Unknown token type";
670     }
671     $self->{state} = 'data';
672     !!!next-input-character;
673    
674     !!!emit ($self->{current_token}); # start tag or end tag
675     undef $self->{current_token};
676    
677     redo A;
678     } elsif (0x0041 <= $self->{next_input_character} and
679     $self->{next_input_character} <= 0x005A) { # A..Z
680     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
681     value => ''};
682     $self->{state} = 'attribute name';
683     !!!next-input-character;
684     redo A;
685     } elsif ($self->{next_input_character} == 0x002F) { # /
686     !!!next-input-character;
687     if ($self->{next_input_character} == 0x003E and # >
688     $self->{current_token}->{type} eq 'start tag' and
689     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
690     # permitted slash
691     #
692     } else {
693 wakaba 1.3 !!!parse-error (type => 'nestc');
694 wakaba 1.1 }
695     $self->{state} = 'before attribute name';
696     # next-input-character is already done
697     redo A;
698     } elsif ($self->{next_input_character} == 0x003C or # <
699     $self->{next_input_character} == -1) {
700 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
701 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
702     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
703     } elsif ($self->{current_token}->{type} eq 'end tag') {
704     $self->{content_model_flag} = 'PCDATA'; # MUST
705     if ($self->{current_token}->{attributes}) {
706 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
707 wakaba 1.1 }
708     } else {
709     die "$0: $self->{current_token}->{type}: Unknown token type";
710     }
711     $self->{state} = 'data';
712     # reconsume
713    
714     !!!emit ($self->{current_token}); # start tag or end tag
715     undef $self->{current_token};
716    
717     redo A;
718     } else {
719     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
720     value => ''};
721     $self->{state} = 'attribute name';
722     !!!next-input-character;
723     redo A;
724     }
725     } elsif ($self->{state} eq 'before attribute value') {
726     if ($self->{next_input_character} == 0x0009 or # HT
727     $self->{next_input_character} == 0x000A or # LF
728     $self->{next_input_character} == 0x000B or # VT
729     $self->{next_input_character} == 0x000C or # FF
730     $self->{next_input_character} == 0x0020) { # SP
731     ## Stay in the state
732     !!!next-input-character;
733     redo A;
734     } elsif ($self->{next_input_character} == 0x0022) { # "
735     $self->{state} = 'attribute value (double-quoted)';
736     !!!next-input-character;
737     redo A;
738     } elsif ($self->{next_input_character} == 0x0026) { # &
739     $self->{state} = 'attribute value (unquoted)';
740     ## reconsume
741     redo A;
742     } elsif ($self->{next_input_character} == 0x0027) { # '
743     $self->{state} = 'attribute value (single-quoted)';
744     !!!next-input-character;
745     redo A;
746     } elsif ($self->{next_input_character} == 0x003E) { # >
747     if ($self->{current_token}->{type} eq 'start tag') {
748     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
749     } elsif ($self->{current_token}->{type} eq 'end tag') {
750     $self->{content_model_flag} = 'PCDATA'; # MUST
751     if ($self->{current_token}->{attributes}) {
752 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
753 wakaba 1.1 }
754     } else {
755     die "$0: $self->{current_token}->{type}: Unknown token type";
756     }
757     $self->{state} = 'data';
758     !!!next-input-character;
759    
760     !!!emit ($self->{current_token}); # start tag or end tag
761     undef $self->{current_token};
762    
763     redo A;
764     } elsif ($self->{next_input_character} == 0x003C or # <
765     $self->{next_input_character} == -1) {
766 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
767 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
768     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
769     } elsif ($self->{current_token}->{type} eq 'end tag') {
770     $self->{content_model_flag} = 'PCDATA'; # MUST
771     if ($self->{current_token}->{attributes}) {
772 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
773 wakaba 1.1 }
774     } else {
775     die "$0: $self->{current_token}->{type}: Unknown token type";
776     }
777     $self->{state} = 'data';
778     ## reconsume
779    
780     !!!emit ($self->{current_token}); # start tag or end tag
781     undef $self->{current_token};
782    
783     redo A;
784     } else {
785     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
786     $self->{state} = 'attribute value (unquoted)';
787     !!!next-input-character;
788     redo A;
789     }
790     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
791     if ($self->{next_input_character} == 0x0022) { # "
792     $self->{state} = 'before attribute name';
793     !!!next-input-character;
794     redo A;
795     } elsif ($self->{next_input_character} == 0x0026) { # &
796     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
797     $self->{state} = 'entity in attribute value';
798     !!!next-input-character;
799     redo A;
800     } elsif ($self->{next_input_character} == -1) {
801 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
802 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
803     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
804     } elsif ($self->{current_token}->{type} eq 'end tag') {
805     $self->{content_model_flag} = 'PCDATA'; # MUST
806     if ($self->{current_token}->{attributes}) {
807 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
808 wakaba 1.1 }
809     } else {
810     die "$0: $self->{current_token}->{type}: Unknown token type";
811     }
812     $self->{state} = 'data';
813     ## reconsume
814    
815     !!!emit ($self->{current_token}); # start tag or end tag
816     undef $self->{current_token};
817    
818     redo A;
819     } else {
820     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
821     ## Stay in the state
822     !!!next-input-character;
823     redo A;
824     }
825     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
826     if ($self->{next_input_character} == 0x0027) { # '
827     $self->{state} = 'before attribute name';
828     !!!next-input-character;
829     redo A;
830     } elsif ($self->{next_input_character} == 0x0026) { # &
831     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
832     $self->{state} = 'entity in attribute value';
833     !!!next-input-character;
834     redo A;
835     } elsif ($self->{next_input_character} == -1) {
836 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
837 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
838     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
839     } elsif ($self->{current_token}->{type} eq 'end tag') {
840     $self->{content_model_flag} = 'PCDATA'; # MUST
841     if ($self->{current_token}->{attributes}) {
842 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
843 wakaba 1.1 }
844     } else {
845     die "$0: $self->{current_token}->{type}: Unknown token type";
846     }
847     $self->{state} = 'data';
848     ## reconsume
849    
850     !!!emit ($self->{current_token}); # start tag or end tag
851     undef $self->{current_token};
852    
853     redo A;
854     } else {
855     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
856     ## Stay in the state
857     !!!next-input-character;
858     redo A;
859     }
860     } elsif ($self->{state} eq 'attribute value (unquoted)') {
861     if ($self->{next_input_character} == 0x0009 or # HT
862     $self->{next_input_character} == 0x000A or # LF
863     $self->{next_input_character} == 0x000B or # HT
864     $self->{next_input_character} == 0x000C or # FF
865     $self->{next_input_character} == 0x0020) { # SP
866     $self->{state} = 'before attribute name';
867     !!!next-input-character;
868     redo A;
869     } elsif ($self->{next_input_character} == 0x0026) { # &
870     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
871     $self->{state} = 'entity in attribute value';
872     !!!next-input-character;
873     redo A;
874     } elsif ($self->{next_input_character} == 0x003E) { # >
875     if ($self->{current_token}->{type} eq 'start tag') {
876     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
877     } elsif ($self->{current_token}->{type} eq 'end tag') {
878     $self->{content_model_flag} = 'PCDATA'; # MUST
879     if ($self->{current_token}->{attributes}) {
880 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
881 wakaba 1.1 }
882     } else {
883     die "$0: $self->{current_token}->{type}: Unknown token type";
884     }
885     $self->{state} = 'data';
886     !!!next-input-character;
887    
888     !!!emit ($self->{current_token}); # start tag or end tag
889     undef $self->{current_token};
890    
891     redo A;
892     } elsif ($self->{next_input_character} == 0x003C or # <
893     $self->{next_input_character} == -1) {
894 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
895 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
896     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
897     } elsif ($self->{current_token}->{type} eq 'end tag') {
898     $self->{content_model_flag} = 'PCDATA'; # MUST
899     if ($self->{current_token}->{attributes}) {
900 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
901 wakaba 1.1 }
902     } else {
903     die "$0: $self->{current_token}->{type}: Unknown token type";
904     }
905     $self->{state} = 'data';
906     ## reconsume
907    
908     !!!emit ($self->{current_token}); # start tag or end tag
909     undef $self->{current_token};
910    
911     redo A;
912     } else {
913     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
914     ## Stay in the state
915     !!!next-input-character;
916     redo A;
917     }
918     } elsif ($self->{state} eq 'entity in attribute value') {
919     my $token = $self->_tokenize_attempt_to_consume_an_entity;
920    
921     unless (defined $token) {
922     $self->{current_attribute}->{value} .= '&';
923     } else {
924     $self->{current_attribute}->{value} .= $token->{data};
925     ## ISSUE: spec says "append the returned character token to the current attribute's value"
926     }
927    
928     $self->{state} = $self->{last_attribute_value_state};
929     # next-input-character is already done
930     redo A;
931     } elsif ($self->{state} eq 'bogus comment') {
932     ## (only happen if PCDATA state)
933    
934     my $token = {type => 'comment', data => ''};
935    
936     BC: {
937     if ($self->{next_input_character} == 0x003E) { # >
938     $self->{state} = 'data';
939     !!!next-input-character;
940    
941     !!!emit ($token);
942    
943     redo A;
944     } elsif ($self->{next_input_character} == -1) {
945     $self->{state} = 'data';
946     ## reconsume
947    
948     !!!emit ($token);
949    
950     redo A;
951     } else {
952     $token->{data} .= chr ($self->{next_input_character});
953     !!!next-input-character;
954     redo BC;
955     }
956     } # BC
957     } elsif ($self->{state} eq 'markup declaration open') {
958     ## (only happen if PCDATA state)
959    
960     my @next_char;
961     push @next_char, $self->{next_input_character};
962    
963     if ($self->{next_input_character} == 0x002D) { # -
964     !!!next-input-character;
965     push @next_char, $self->{next_input_character};
966     if ($self->{next_input_character} == 0x002D) { # -
967     $self->{current_token} = {type => 'comment', data => ''};
968     $self->{state} = 'comment';
969     !!!next-input-character;
970     redo A;
971     }
972     } elsif ($self->{next_input_character} == 0x0044 or # D
973     $self->{next_input_character} == 0x0064) { # d
974     !!!next-input-character;
975     push @next_char, $self->{next_input_character};
976     if ($self->{next_input_character} == 0x004F or # O
977     $self->{next_input_character} == 0x006F) { # o
978     !!!next-input-character;
979     push @next_char, $self->{next_input_character};
980     if ($self->{next_input_character} == 0x0043 or # C
981     $self->{next_input_character} == 0x0063) { # c
982     !!!next-input-character;
983     push @next_char, $self->{next_input_character};
984     if ($self->{next_input_character} == 0x0054 or # T
985     $self->{next_input_character} == 0x0074) { # t
986     !!!next-input-character;
987     push @next_char, $self->{next_input_character};
988     if ($self->{next_input_character} == 0x0059 or # Y
989     $self->{next_input_character} == 0x0079) { # y
990     !!!next-input-character;
991     push @next_char, $self->{next_input_character};
992     if ($self->{next_input_character} == 0x0050 or # P
993     $self->{next_input_character} == 0x0070) { # p
994     !!!next-input-character;
995     push @next_char, $self->{next_input_character};
996     if ($self->{next_input_character} == 0x0045 or # E
997     $self->{next_input_character} == 0x0065) { # e
998     ## ISSUE: What a stupid code this is!
999     $self->{state} = 'DOCTYPE';
1000     !!!next-input-character;
1001     redo A;
1002     }
1003     }
1004     }
1005     }
1006     }
1007     }
1008     }
1009    
1010 wakaba 1.3 !!!parse-error (type => 'bogus comment open');
1011 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1012     !!!back-next-input-character (@next_char);
1013     $self->{state} = 'bogus comment';
1014     redo A;
1015    
1016     ## ISSUE: typos in spec: chacacters, is is a parse error
1017     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1018     } elsif ($self->{state} eq 'comment') {
1019     if ($self->{next_input_character} == 0x002D) { # -
1020     $self->{state} = 'comment dash';
1021     !!!next-input-character;
1022     redo A;
1023     } elsif ($self->{next_input_character} == -1) {
1024 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1025 wakaba 1.1 $self->{state} = 'data';
1026     ## reconsume
1027    
1028     !!!emit ($self->{current_token}); # comment
1029     undef $self->{current_token};
1030    
1031     redo A;
1032     } else {
1033     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1034     ## Stay in the state
1035     !!!next-input-character;
1036     redo A;
1037     }
1038     } elsif ($self->{state} eq 'comment dash') {
1039     if ($self->{next_input_character} == 0x002D) { # -
1040     $self->{state} = 'comment end';
1041     !!!next-input-character;
1042     redo A;
1043     } elsif ($self->{next_input_character} == -1) {
1044 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1045 wakaba 1.1 $self->{state} = 'data';
1046     ## reconsume
1047    
1048     !!!emit ($self->{current_token}); # comment
1049     undef $self->{current_token};
1050    
1051     redo A;
1052     } else {
1053     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1054     $self->{state} = 'comment';
1055     !!!next-input-character;
1056     redo A;
1057     }
1058     } elsif ($self->{state} eq 'comment end') {
1059     if ($self->{next_input_character} == 0x003E) { # >
1060     $self->{state} = 'data';
1061     !!!next-input-character;
1062    
1063     !!!emit ($self->{current_token}); # comment
1064     undef $self->{current_token};
1065    
1066     redo A;
1067     } elsif ($self->{next_input_character} == 0x002D) { # -
1068 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1069 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1070     ## Stay in the state
1071     !!!next-input-character;
1072     redo A;
1073     } elsif ($self->{next_input_character} == -1) {
1074 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1075 wakaba 1.1 $self->{state} = 'data';
1076     ## reconsume
1077    
1078     !!!emit ($self->{current_token}); # comment
1079     undef $self->{current_token};
1080    
1081     redo A;
1082     } else {
1083 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1084 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1085     $self->{state} = 'comment';
1086     !!!next-input-character;
1087     redo A;
1088     }
1089     } elsif ($self->{state} eq 'DOCTYPE') {
1090     if ($self->{next_input_character} == 0x0009 or # HT
1091     $self->{next_input_character} == 0x000A or # LF
1092     $self->{next_input_character} == 0x000B or # VT
1093     $self->{next_input_character} == 0x000C or # FF
1094     $self->{next_input_character} == 0x0020) { # SP
1095     $self->{state} = 'before DOCTYPE name';
1096     !!!next-input-character;
1097     redo A;
1098     } else {
1099 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1100 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1101     ## reconsume
1102     redo A;
1103     }
1104     } elsif ($self->{state} eq 'before DOCTYPE name') {
1105     if ($self->{next_input_character} == 0x0009 or # HT
1106     $self->{next_input_character} == 0x000A or # LF
1107     $self->{next_input_character} == 0x000B or # VT
1108     $self->{next_input_character} == 0x000C or # FF
1109     $self->{next_input_character} == 0x0020) { # SP
1110     ## Stay in the state
1111     !!!next-input-character;
1112     redo A;
1113     } elsif (0x0061 <= $self->{next_input_character} and
1114     $self->{next_input_character} <= 0x007A) { # a..z
1115 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1116 wakaba 1.1 $self->{current_token} = {type => 'DOCTYPE',
1117     name => chr ($self->{next_input_character} - 0x0020),
1118     error => 1};
1119     $self->{state} = 'DOCTYPE name';
1120     !!!next-input-character;
1121     redo A;
1122     } elsif ($self->{next_input_character} == 0x003E) { # >
1123 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1124 wakaba 1.1 $self->{state} = 'data';
1125     !!!next-input-character;
1126    
1127     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1128    
1129     redo A;
1130     } elsif ($self->{next_input_character} == -1) {
1131 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1132 wakaba 1.1 $self->{state} = 'data';
1133     ## reconsume
1134    
1135     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1136    
1137     redo A;
1138     } else {
1139     $self->{current_token} = {type => 'DOCTYPE',
1140     name => chr ($self->{next_input_character}),
1141     error => 1};
1142 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1143 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1144     !!!next-input-character;
1145     redo A;
1146     }
1147     } elsif ($self->{state} eq 'DOCTYPE name') {
1148     if ($self->{next_input_character} == 0x0009 or # HT
1149     $self->{next_input_character} == 0x000A or # LF
1150     $self->{next_input_character} == 0x000B or # VT
1151     $self->{next_input_character} == 0x000C or # FF
1152     $self->{next_input_character} == 0x0020) { # SP
1153     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1154     $self->{state} = 'after DOCTYPE name';
1155     !!!next-input-character;
1156     redo A;
1157     } elsif ($self->{next_input_character} == 0x003E) { # >
1158     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1159     $self->{state} = 'data';
1160     !!!next-input-character;
1161    
1162     !!!emit ($self->{current_token}); # DOCTYPE
1163     undef $self->{current_token};
1164    
1165     redo A;
1166     } elsif (0x0061 <= $self->{next_input_character} and
1167     $self->{next_input_character} <= 0x007A) { # a..z
1168     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1169     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1170     ## Stay in the state
1171     !!!next-input-character;
1172     redo A;
1173     } elsif ($self->{next_input_character} == -1) {
1174 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1175 wakaba 1.1 $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1176     $self->{state} = 'data';
1177     ## reconsume
1178    
1179     !!!emit ($self->{current_token});
1180     undef $self->{current_token};
1181    
1182     redo A;
1183     } else {
1184     $self->{current_token}->{name}
1185     .= chr ($self->{next_input_character}); # DOCTYPE
1186     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1187     ## Stay in the state
1188     !!!next-input-character;
1189     redo A;
1190     }
1191     } elsif ($self->{state} eq 'after DOCTYPE name') {
1192     if ($self->{next_input_character} == 0x0009 or # HT
1193     $self->{next_input_character} == 0x000A or # LF
1194     $self->{next_input_character} == 0x000B or # VT
1195     $self->{next_input_character} == 0x000C or # FF
1196     $self->{next_input_character} == 0x0020) { # SP
1197     ## Stay in the state
1198     !!!next-input-character;
1199     redo A;
1200     } elsif ($self->{next_input_character} == 0x003E) { # >
1201     $self->{state} = 'data';
1202     !!!next-input-character;
1203    
1204     !!!emit ($self->{current_token}); # DOCTYPE
1205     undef $self->{current_token};
1206    
1207     redo A;
1208     } elsif ($self->{next_input_character} == -1) {
1209 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1210 wakaba 1.1 $self->{state} = 'data';
1211     ## reconsume
1212    
1213     !!!emit ($self->{current_token}); # DOCTYPE
1214     undef $self->{current_token};
1215    
1216     redo A;
1217     } else {
1218 wakaba 1.3 !!!parse-error (type => 'string after DOCTYPE name');
1219 wakaba 1.1 $self->{current_token}->{error} = 1; # DOCTYPE
1220     $self->{state} = 'bogus DOCTYPE';
1221     !!!next-input-character;
1222     redo A;
1223     }
1224     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1225     if ($self->{next_input_character} == 0x003E) { # >
1226     $self->{state} = 'data';
1227     !!!next-input-character;
1228    
1229     !!!emit ($self->{current_token}); # DOCTYPE
1230     undef $self->{current_token};
1231    
1232     redo A;
1233     } elsif ($self->{next_input_character} == -1) {
1234 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1235 wakaba 1.1 $self->{state} = 'data';
1236     ## reconsume
1237    
1238     !!!emit ($self->{current_token}); # DOCTYPE
1239     undef $self->{current_token};
1240    
1241     redo A;
1242     } else {
1243     ## Stay in the state
1244     !!!next-input-character;
1245     redo A;
1246     }
1247     } else {
1248     die "$0: $self->{state}: Unknown state";
1249     }
1250     } # A
1251    
1252     die "$0: _get_next_token: unexpected case";
1253     } # _get_next_token
1254    
1255     sub _tokenize_attempt_to_consume_an_entity ($) {
1256     my $self = shift;
1257    
1258     if ($self->{next_input_character} == 0x0023) { # #
1259     !!!next-input-character;
1260     if ($self->{next_input_character} == 0x0078 or # x
1261     $self->{next_input_character} == 0x0058) { # X
1262 wakaba 1.4 my $num;
1263 wakaba 1.1 X: {
1264     my $x_char = $self->{next_input_character};
1265     !!!next-input-character;
1266     if (0x0030 <= $self->{next_input_character} and
1267     $self->{next_input_character} <= 0x0039) { # 0..9
1268     $num ||= 0;
1269     $num *= 0x10;
1270     $num += $self->{next_input_character} - 0x0030;
1271     redo X;
1272     } elsif (0x0061 <= $self->{next_input_character} and
1273     $self->{next_input_character} <= 0x0066) { # a..f
1274     ## ISSUE: the spec says U+0078, which is apparently incorrect
1275     $num ||= 0;
1276     $num *= 0x10;
1277     $num += $self->{next_input_character} - 0x0060 + 9;
1278     redo X;
1279     } elsif (0x0041 <= $self->{next_input_character} and
1280     $self->{next_input_character} <= 0x0046) { # A..F
1281     ## ISSUE: the spec says U+0058, which is apparently incorrect
1282     $num ||= 0;
1283     $num *= 0x10;
1284     $num += $self->{next_input_character} - 0x0040 + 9;
1285     redo X;
1286     } elsif (not defined $num) { # no hexadecimal digit
1287 wakaba 1.3 !!!parse-error (type => 'bare hcro');
1288 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
1289     !!!back-next-input-character ($x_char);
1290     return undef;
1291     } elsif ($self->{next_input_character} == 0x003B) { # ;
1292     !!!next-input-character;
1293     } else {
1294 wakaba 1.3 !!!parse-error (type => 'no refc');
1295 wakaba 1.1 }
1296    
1297     ## TODO: check the definition for |a valid Unicode character|.
1298 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
1299 wakaba 1.1 if ($num > 1114111 or $num == 0) {
1300     $num = 0xFFFD; # REPLACEMENT CHARACTER
1301     ## ISSUE: Why this is not an error?
1302 wakaba 1.4 } elsif (0x80 <= $num and $num <= 0x9F) {
1303 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $num);
1304 wakaba 1.4 $num = $c1_entity_char->{$num};
1305 wakaba 1.1 }
1306    
1307     return {type => 'character', data => chr $num};
1308     } # X
1309     } elsif (0x0030 <= $self->{next_input_character} and
1310     $self->{next_input_character} <= 0x0039) { # 0..9
1311     my $code = $self->{next_input_character} - 0x0030;
1312     !!!next-input-character;
1313    
1314     while (0x0030 <= $self->{next_input_character} and
1315     $self->{next_input_character} <= 0x0039) { # 0..9
1316     $code *= 10;
1317     $code += $self->{next_input_character} - 0x0030;
1318    
1319     !!!next-input-character;
1320     }
1321    
1322     if ($self->{next_input_character} == 0x003B) { # ;
1323     !!!next-input-character;
1324     } else {
1325 wakaba 1.3 !!!parse-error (type => 'no refc');
1326 wakaba 1.1 }
1327    
1328     ## TODO: check the definition for |a valid Unicode character|.
1329     if ($code > 1114111 or $code == 0) {
1330     $code = 0xFFFD; # REPLACEMENT CHARACTER
1331     ## ISSUE: Why this is not an error?
1332 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
1333 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
1334 wakaba 1.4 $code = $c1_entity_char->{$code};
1335 wakaba 1.1 }
1336    
1337     return {type => 'character', data => chr $code};
1338     } else {
1339 wakaba 1.3 !!!parse-error (type => 'bare nero');
1340 wakaba 1.1 !!!back-next-input-character ($self->{next_input_character});
1341     $self->{next_input_character} = 0x0023; # #
1342     return undef;
1343     }
1344     } elsif ((0x0041 <= $self->{next_input_character} and
1345     $self->{next_input_character} <= 0x005A) or
1346     (0x0061 <= $self->{next_input_character} and
1347     $self->{next_input_character} <= 0x007A)) {
1348     my $entity_name = chr $self->{next_input_character};
1349     !!!next-input-character;
1350    
1351     my $value = $entity_name;
1352     my $match;
1353 wakaba 1.16 require Whatpm::_NamedEntityList;
1354     our $EntityChar;
1355 wakaba 1.1
1356     while (length $entity_name < 10 and
1357     ## NOTE: Some number greater than the maximum length of entity name
1358 wakaba 1.16 ((0x0041 <= $self->{next_input_character} and # a
1359     $self->{next_input_character} <= 0x005A) or # x
1360     (0x0061 <= $self->{next_input_character} and # a
1361     $self->{next_input_character} <= 0x007A) or # z
1362     (0x0030 <= $self->{next_input_character} and # 0
1363     $self->{next_input_character} <= 0x0039) or # 9
1364     $self->{next_input_character} == 0x003B)) { # ;
1365 wakaba 1.1 $entity_name .= chr $self->{next_input_character};
1366 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
1367     $value = $EntityChar->{$entity_name};
1368     if ($self->{next_input_character} == 0x003B) { # ;
1369     $match = 1;
1370     !!!next-input-character;
1371     last;
1372     } else {
1373     $match = -1;
1374     }
1375 wakaba 1.1 } else {
1376     $value .= chr $self->{next_input_character};
1377     }
1378     !!!next-input-character;
1379     }
1380    
1381 wakaba 1.16 if ($match > 0) {
1382     return {type => 'character', data => $value};
1383     } elsif ($match < 0) {
1384     !!!parse-error (type => 'refc');
1385 wakaba 1.1 return {type => 'character', data => $value};
1386     } else {
1387 wakaba 1.3 !!!parse-error (type => 'bare ero');
1388 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
1389     !!!back-token ({type => 'character', data => $value});
1390     return undef;
1391     }
1392     } else {
1393     ## no characters are consumed
1394 wakaba 1.3 !!!parse-error (type => 'bare ero');
1395 wakaba 1.1 return undef;
1396     }
1397     } # _tokenize_attempt_to_consume_an_entity
1398    
1399     sub _initialize_tree_constructor ($) {
1400     my $self = shift;
1401     ## NOTE: $self->{document} MUST be specified before this method is called
1402     $self->{document}->strict_error_checking (0);
1403     ## TODO: Turn mutation events off # MUST
1404     ## TODO: Turn loose Document option (manakai extension) on
1405     ## TODO: Mark the Document as an HTML document # MUST
1406     } # _initialize_tree_constructor
1407    
1408     sub _terminate_tree_constructor ($) {
1409     my $self = shift;
1410     $self->{document}->strict_error_checking (1);
1411     ## TODO: Turn mutation events on
1412     } # _terminate_tree_constructor
1413    
1414     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1415    
1416 wakaba 1.3 { # tree construction stage
1417     my $token;
1418    
1419 wakaba 1.1 sub _construct_tree ($) {
1420     my ($self) = @_;
1421    
1422     ## When an interactive UA render the $self->{document} available
1423     ## to the user, or when it begin accepting user input, are
1424     ## not defined.
1425    
1426     ## Append a character: collect it and all subsequent consecutive
1427     ## characters and insert one Text node whose data is concatenation
1428     ## of all those characters. # MUST
1429    
1430     !!!next-token;
1431    
1432 wakaba 1.3 $self->{insertion_mode} = 'before head';
1433     undef $self->{form_element};
1434     undef $self->{head_element};
1435     $self->{open_elements} = [];
1436     undef $self->{inner_html_node};
1437    
1438     $self->_tree_construction_initial; # MUST
1439     $self->_tree_construction_root_element;
1440     $self->_tree_construction_main;
1441     } # _construct_tree
1442    
1443     sub _tree_construction_initial ($) {
1444     my $self = shift;
1445     B: {
1446     if ($token->{type} eq 'DOCTYPE') {
1447     if ($token->{error}) {
1448     ## ISSUE: Spec currently left this case undefined.
1449     !!!parse-error (type => 'bogus DOCTYPE');
1450     }
1451     my $doctype = $self->{document}->create_document_type_definition
1452     ($token->{name});
1453     $self->{document}->append_child ($doctype);
1454     #$phase = 'root element';
1455     !!!next-token;
1456     #redo B;
1457     return;
1458     } elsif ({
1459     comment => 1,
1460     'start tag' => 1,
1461     'end tag' => 1,
1462     'end-of-file' => 1,
1463     }->{$token->{type}}) {
1464     ## ISSUE: Spec currently left this case undefined.
1465     !!!parse-error (type => 'missing DOCTYPE');
1466     #$phase = 'root element';
1467     ## reprocess
1468     #redo B;
1469     return;
1470     } elsif ($token->{type} eq 'character') {
1471     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1472     $self->{document}->manakai_append_text ($1);
1473     ## ISSUE: DOM3 Core does not allow Document > Text
1474     unless (length $token->{data}) {
1475     ## Stay in the phase
1476     !!!next-token;
1477     redo B;
1478     }
1479     }
1480     ## ISSUE: Spec currently left this case undefined.
1481     !!!parse-error (type => 'missing DOCTYPE');
1482     #$phase = 'root element';
1483     ## reprocess
1484     #redo B;
1485     return;
1486     } else {
1487     die "$0: $token->{type}: Unknown token";
1488     }
1489     } # B
1490     } # _tree_construction_initial
1491    
1492     sub _tree_construction_root_element ($) {
1493     my $self = shift;
1494    
1495     B: {
1496     if ($token->{type} eq 'DOCTYPE') {
1497     !!!parse-error (type => 'in html:#DOCTYPE');
1498     ## Ignore the token
1499     ## Stay in the phase
1500     !!!next-token;
1501     redo B;
1502     } elsif ($token->{type} eq 'comment') {
1503     my $comment = $self->{document}->create_comment ($token->{data});
1504     $self->{document}->append_child ($comment);
1505     ## Stay in the phase
1506     !!!next-token;
1507     redo B;
1508     } elsif ($token->{type} eq 'character') {
1509     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1510     $self->{document}->manakai_append_text ($1);
1511     ## ISSUE: DOM3 Core does not allow Document > Text
1512     unless (length $token->{data}) {
1513     ## Stay in the phase
1514     !!!next-token;
1515     redo B;
1516     }
1517     }
1518     #
1519     } elsif ({
1520     'start tag' => 1,
1521     'end tag' => 1,
1522     'end-of-file' => 1,
1523     }->{$token->{type}}) {
1524     ## ISSUE: There is an issue in the spec
1525     #
1526     } else {
1527     die "$0: $token->{type}: Unknown token";
1528     }
1529     my $root_element; !!!create-element ($root_element, 'html');
1530     $self->{document}->append_child ($root_element);
1531     push @{$self->{open_elements}}, [$root_element, 'html'];
1532     #$phase = 'main';
1533     ## reprocess
1534     #redo B;
1535     return;
1536     } # B
1537     } # _tree_construction_root_element
1538    
1539     sub _reset_insertion_mode ($) {
1540     my $self = shift;
1541    
1542     ## Step 1
1543     my $last;
1544    
1545     ## Step 2
1546     my $i = -1;
1547     my $node = $self->{open_elements}->[$i];
1548    
1549     ## Step 3
1550     S3: {
1551     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
1552     if (defined $self->{inner_html_node}) {
1553     if ($self->{inner_html_node}->[1] eq 'td' or
1554     $self->{inner_html_node}->[1] eq 'th') {
1555     #
1556     } else {
1557     $node = $self->{inner_html_node};
1558     }
1559     }
1560    
1561     ## Step 4..13
1562     my $new_mode = {
1563     select => 'in select',
1564     td => 'in cell',
1565     th => 'in cell',
1566     tr => 'in row',
1567     tbody => 'in table body',
1568     thead => 'in table head',
1569     tfoot => 'in table foot',
1570     caption => 'in caption',
1571     colgroup => 'in column group',
1572     table => 'in table',
1573     head => 'in body', # not in head!
1574     body => 'in body',
1575     frameset => 'in frameset',
1576     }->{$node->[1]};
1577     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1578    
1579     ## Step 14
1580     if ($node->[1] eq 'html') {
1581     unless (defined $self->{head_element}) {
1582     $self->{insertion_mode} = 'before head';
1583     } else {
1584     $self->{insertion_mode} = 'after head';
1585     }
1586     return;
1587     }
1588    
1589     ## Step 15
1590     $self->{insertion_mode} = 'in body' and return if $last;
1591    
1592     ## Step 16
1593     $i--;
1594     $node = $self->{open_elements}->[$i];
1595    
1596     ## Step 17
1597     redo S3;
1598     } # S3
1599     } # _reset_insertion_mode
1600    
1601     sub _tree_construction_main ($) {
1602     my $self = shift;
1603    
1604     my $phase = 'main';
1605 wakaba 1.1
1606     my $active_formatting_elements = [];
1607    
1608     my $reconstruct_active_formatting_elements = sub { # MUST
1609     my $insert = shift;
1610    
1611     ## Step 1
1612     return unless @$active_formatting_elements;
1613    
1614     ## Step 3
1615     my $i = -1;
1616     my $entry = $active_formatting_elements->[$i];
1617    
1618     ## Step 2
1619     return if $entry->[0] eq '#marker';
1620 wakaba 1.3 for (@{$self->{open_elements}}) {
1621 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1622     return;
1623     }
1624     }
1625    
1626     S4: {
1627     ## Step 4
1628     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1629    
1630     ## Step 5
1631     $i--;
1632     $entry = $active_formatting_elements->[$i];
1633    
1634     ## Step 6
1635     if ($entry->[0] eq '#marker') {
1636     #
1637     } else {
1638     my $in_open_elements;
1639 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1640 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1641     $in_open_elements = 1;
1642     last OE;
1643     }
1644     }
1645     if ($in_open_elements) {
1646     #
1647     } else {
1648     redo S4;
1649     }
1650     }
1651    
1652     ## Step 7
1653     $i++;
1654     $entry = $active_formatting_elements->[$i];
1655     } # S4
1656    
1657     S7: {
1658     ## Step 8
1659     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1660    
1661     ## Step 9
1662     $insert->($clone->[0]);
1663 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1664 wakaba 1.1
1665     ## Step 10
1666 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1667 wakaba 1.1
1668     ## Step 11
1669     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1670     ## Step 7'
1671     $i++;
1672     $entry = $active_formatting_elements->[$i];
1673    
1674     redo S7;
1675     }
1676     } # S7
1677     }; # $reconstruct_active_formatting_elements
1678    
1679     my $clear_up_to_marker = sub {
1680     for (reverse 0..$#$active_formatting_elements) {
1681     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1682     splice @$active_formatting_elements, $_;
1683     return;
1684     }
1685     }
1686     }; # $clear_up_to_marker
1687    
1688     my $style_start_tag = sub {
1689 wakaba 1.6 my $style_el; !!!create-element ($style_el, 'style', $token->{attributes});
1690 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
1691     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1692     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
1693 wakaba 1.1 ->append_child ($style_el);
1694     $self->{content_model_flag} = 'CDATA';
1695 wakaba 1.13 delete $self->{escape}; # MUST
1696 wakaba 1.1
1697     my $text = '';
1698     !!!next-token;
1699     while ($token->{type} eq 'character') {
1700     $text .= $token->{data};
1701     !!!next-token;
1702     } # stop if non-character token or tokenizer stops tokenising
1703     if (length $text) {
1704     $style_el->manakai_append_text ($text);
1705     }
1706    
1707     $self->{content_model_flag} = 'PCDATA';
1708    
1709     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1710     ## Ignore the token
1711     } else {
1712 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1713 wakaba 1.1 ## ISSUE: And ignore?
1714     }
1715     !!!next-token;
1716     }; # $style_start_tag
1717    
1718     my $script_start_tag = sub {
1719     my $script_el;
1720     !!!create-element ($script_el, 'script', $token->{attributes});
1721     ## TODO: mark as "parser-inserted"
1722    
1723     $self->{content_model_flag} = 'CDATA';
1724 wakaba 1.13 delete $self->{escape}; # MUST
1725 wakaba 1.1
1726     my $text = '';
1727     !!!next-token;
1728     while ($token->{type} eq 'character') {
1729     $text .= $token->{data};
1730     !!!next-token;
1731     } # stop if non-character token or tokenizer stops tokenising
1732     if (length $text) {
1733     $script_el->manakai_append_text ($text);
1734     }
1735    
1736     $self->{content_model_flag} = 'PCDATA';
1737    
1738     if ($token->{type} eq 'end tag' and
1739     $token->{tag_name} eq 'script') {
1740     ## Ignore the token
1741     } else {
1742 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1743 wakaba 1.1 ## ISSUE: And ignore?
1744     ## TODO: mark as "already executed"
1745     }
1746    
1747 wakaba 1.3 if (defined $self->{inner_html_node}) {
1748     ## TODO: mark as "already executed"
1749     } else {
1750 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
1751     ## TODO: insertion point = just before the next input character
1752    
1753 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1754     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
1755 wakaba 1.1
1756     ## TODO: insertion point = $old_insertion_point (might be "undefined")
1757    
1758     ## TODO: if there is a script that will execute as soon as the parser resume, then...
1759     }
1760    
1761     !!!next-token;
1762     }; # $script_start_tag
1763    
1764     my $formatting_end_tag = sub {
1765     my $tag_name = shift;
1766    
1767     FET: {
1768     ## Step 1
1769     my $formatting_element;
1770     my $formatting_element_i_in_active;
1771     AFE: for (reverse 0..$#$active_formatting_elements) {
1772     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
1773     $formatting_element = $active_formatting_elements->[$_];
1774     $formatting_element_i_in_active = $_;
1775     last AFE;
1776     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
1777     last AFE;
1778     }
1779     } # AFE
1780     unless (defined $formatting_element) {
1781 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
1782 wakaba 1.1 ## Ignore the token
1783     !!!next-token;
1784     return;
1785     }
1786     ## has an element in scope
1787     my $in_scope = 1;
1788     my $formatting_element_i_in_open;
1789 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1790     my $node = $self->{open_elements}->[$_];
1791 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
1792     if ($in_scope) {
1793     $formatting_element_i_in_open = $_;
1794     last INSCOPE;
1795     } else { # in open elements but not in scope
1796 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
1797 wakaba 1.1 ## Ignore the token
1798     !!!next-token;
1799     return;
1800     }
1801     } elsif ({
1802     table => 1, caption => 1, td => 1, th => 1,
1803     button => 1, marquee => 1, object => 1, html => 1,
1804     }->{$node->[1]}) {
1805     $in_scope = 0;
1806     }
1807     } # INSCOPE
1808     unless (defined $formatting_element_i_in_open) {
1809 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
1810 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
1811     !!!next-token; ## TODO: ok?
1812     return;
1813     }
1814 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1815 wakaba 1.4 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
1816 wakaba 1.1 }
1817    
1818     ## Step 2
1819     my $furthest_block;
1820     my $furthest_block_i_in_open;
1821 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1822     my $node = $self->{open_elements}->[$_];
1823 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
1824     #not $phrasing_category->{$node->[1]} and
1825     ($special_category->{$node->[1]} or
1826     $scoping_category->{$node->[1]})) {
1827     $furthest_block = $node;
1828     $furthest_block_i_in_open = $_;
1829     } elsif ($node->[0] eq $formatting_element->[0]) {
1830     last OE;
1831     }
1832     } # OE
1833    
1834     ## Step 3
1835     unless (defined $furthest_block) { # MUST
1836 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
1837 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1838     !!!next-token;
1839     return;
1840     }
1841    
1842     ## Step 4
1843 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
1844 wakaba 1.1
1845     ## Step 5
1846     my $furthest_block_parent = $furthest_block->[0]->parent_node;
1847     if (defined $furthest_block_parent) {
1848     $furthest_block_parent->remove_child ($furthest_block->[0]);
1849     }
1850    
1851     ## Step 6
1852     my $bookmark_prev_el
1853     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1854     ->[0];
1855    
1856     ## Step 7
1857     my $node = $furthest_block;
1858     my $node_i_in_open = $furthest_block_i_in_open;
1859     my $last_node = $furthest_block;
1860     S7: {
1861     ## Step 1
1862     $node_i_in_open--;
1863 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
1864 wakaba 1.1
1865     ## Step 2
1866     my $node_i_in_active;
1867     S7S2: {
1868     for (reverse 0..$#$active_formatting_elements) {
1869     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1870     $node_i_in_active = $_;
1871     last S7S2;
1872     }
1873     }
1874 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
1875 wakaba 1.1 redo S7;
1876     } # S7S2
1877    
1878     ## Step 3
1879     last S7 if $node->[0] eq $formatting_element->[0];
1880    
1881     ## Step 4
1882     if ($last_node->[0] eq $furthest_block->[0]) {
1883     $bookmark_prev_el = $node->[0];
1884     }
1885    
1886     ## Step 5
1887     if ($node->[0]->has_child_nodes ()) {
1888     my $clone = [$node->[0]->clone_node (0), $node->[1]];
1889     $active_formatting_elements->[$node_i_in_active] = $clone;
1890 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
1891 wakaba 1.1 $node = $clone;
1892     }
1893    
1894     ## Step 6
1895     $node->[0]->append_child ($last_node->[0]);
1896    
1897     ## Step 7
1898     $last_node = $node;
1899    
1900     ## Step 8
1901     redo S7;
1902     } # S7
1903    
1904     ## Step 8
1905     $common_ancestor_node->[0]->append_child ($last_node->[0]);
1906    
1907     ## Step 9
1908     my $clone = [$formatting_element->[0]->clone_node (0),
1909     $formatting_element->[1]];
1910    
1911     ## Step 10
1912     my @cn = @{$furthest_block->[0]->child_nodes};
1913     $clone->[0]->append_child ($_) for @cn;
1914    
1915     ## Step 11
1916     $furthest_block->[0]->append_child ($clone->[0]);
1917    
1918     ## Step 12
1919     my $i;
1920     AFE: for (reverse 0..$#$active_formatting_elements) {
1921     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1922     splice @$active_formatting_elements, $_, 1;
1923     $i-- and last AFE if defined $i;
1924     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1925     $i = $_;
1926     }
1927     } # AFE
1928     splice @$active_formatting_elements, $i + 1, 0, $clone;
1929    
1930     ## Step 13
1931     undef $i;
1932 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1933     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
1934     splice @{$self->{open_elements}}, $_, 1;
1935 wakaba 1.1 $i-- and last OE if defined $i;
1936 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
1937 wakaba 1.1 $i = $_;
1938     }
1939     } # OE
1940 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
1941 wakaba 1.1
1942     ## Step 14
1943     redo FET;
1944     } # FET
1945     }; # $formatting_end_tag
1946    
1947     my $insert_to_current = sub {
1948 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
1949 wakaba 1.1 }; # $insert_to_current
1950    
1951     my $insert_to_foster = sub {
1952     my $child = shift;
1953     if ({
1954     table => 1, tbody => 1, tfoot => 1,
1955     thead => 1, tr => 1,
1956 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
1957 wakaba 1.1 # MUST
1958     my $foster_parent_element;
1959     my $next_sibling;
1960 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1961     if ($self->{open_elements}->[$_]->[1] eq 'table') {
1962     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1963 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
1964     $foster_parent_element = $parent;
1965 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
1966 wakaba 1.1 } else {
1967     $foster_parent_element
1968 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
1969 wakaba 1.1 }
1970     last OE;
1971     }
1972     } # OE
1973 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
1974 wakaba 1.1 unless defined $foster_parent_element;
1975     $foster_parent_element->insert_before
1976     ($child, $next_sibling);
1977     } else {
1978 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
1979 wakaba 1.1 }
1980     }; # $insert_to_foster
1981    
1982     my $in_body = sub {
1983     my $insert = shift;
1984     if ($token->{type} eq 'start tag') {
1985     if ($token->{tag_name} eq 'script') {
1986     $script_start_tag->();
1987     return;
1988     } elsif ($token->{tag_name} eq 'style') {
1989     $style_start_tag->();
1990     return;
1991     } elsif ({
1992     base => 1, link => 1, meta => 1,
1993     }->{$token->{tag_name}}) {
1994 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
1995 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
1996     my $el;
1997     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
1998 wakaba 1.3 if (defined $self->{head_element}) {
1999     $self->{head_element}->append_child ($el);
2000 wakaba 1.1 } else {
2001     $insert->($el);
2002     }
2003    
2004     !!!next-token;
2005     return;
2006     } elsif ($token->{tag_name} eq 'title') {
2007 wakaba 1.3 !!!parse-error (type => 'in body:title');
2008 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
2009     my $title_el;
2010     !!!create-element ($title_el, 'title', $token->{attributes});
2011 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2012 wakaba 1.1 ->append_child ($title_el);
2013     $self->{content_model_flag} = 'RCDATA';
2014 wakaba 1.13 delete $self->{escape}; # MUST
2015 wakaba 1.1
2016     my $text = '';
2017     !!!next-token;
2018     while ($token->{type} eq 'character') {
2019     $text .= $token->{data};
2020     !!!next-token;
2021     }
2022     if (length $text) {
2023     $title_el->manakai_append_text ($text);
2024     }
2025    
2026     $self->{content_model_flag} = 'PCDATA';
2027    
2028     if ($token->{type} eq 'end tag' and
2029     $token->{tag_name} eq 'title') {
2030     ## Ignore the token
2031     } else {
2032 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2033 wakaba 1.1 ## ISSUE: And ignore?
2034     }
2035     !!!next-token;
2036     return;
2037     } elsif ($token->{tag_name} eq 'body') {
2038 wakaba 1.3 !!!parse-error (type => 'in body:body');
2039 wakaba 1.1
2040 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
2041     $self->{open_elements}->[1]->[1] ne 'body') {
2042 wakaba 1.1 ## Ignore the token
2043     } else {
2044 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
2045 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2046     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2047     $body_el->set_attribute_ns
2048     (undef, [undef, $attr_name],
2049     $token->{attributes}->{$attr_name}->{value});
2050     }
2051     }
2052     }
2053     !!!next-token;
2054     return;
2055     } elsif ({
2056     address => 1, blockquote => 1, center => 1, dir => 1,
2057     div => 1, dl => 1, fieldset => 1, listing => 1,
2058     menu => 1, ol => 1, p => 1, ul => 1,
2059     pre => 1,
2060     }->{$token->{tag_name}}) {
2061     ## has a p element in scope
2062 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2063 wakaba 1.1 if ($_->[1] eq 'p') {
2064     !!!back-token;
2065     $token = {type => 'end tag', tag_name => 'p'};
2066     return;
2067     } elsif ({
2068     table => 1, caption => 1, td => 1, th => 1,
2069     button => 1, marquee => 1, object => 1, html => 1,
2070     }->{$_->[1]}) {
2071     last INSCOPE;
2072     }
2073     } # INSCOPE
2074    
2075     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2076     if ($token->{tag_name} eq 'pre') {
2077     !!!next-token;
2078     if ($token->{type} eq 'character') {
2079     $token->{data} =~ s/^\x0A//;
2080     unless (length $token->{data}) {
2081     !!!next-token;
2082     }
2083     }
2084     } else {
2085     !!!next-token;
2086     }
2087     return;
2088     } elsif ($token->{tag_name} eq 'form') {
2089 wakaba 1.3 if (defined $self->{form_element}) {
2090     !!!parse-error (type => 'in form:form');
2091 wakaba 1.1 ## Ignore the token
2092 wakaba 1.7 !!!next-token;
2093     return;
2094 wakaba 1.1 } else {
2095     ## has a p element in scope
2096 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2097 wakaba 1.1 if ($_->[1] eq 'p') {
2098     !!!back-token;
2099     $token = {type => 'end tag', tag_name => 'p'};
2100     return;
2101     } elsif ({
2102     table => 1, caption => 1, td => 1, th => 1,
2103     button => 1, marquee => 1, object => 1, html => 1,
2104     }->{$_->[1]}) {
2105     last INSCOPE;
2106     }
2107     } # INSCOPE
2108    
2109     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2110 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2111 wakaba 1.1 !!!next-token;
2112     return;
2113     }
2114     } elsif ($token->{tag_name} eq 'li') {
2115     ## has a p element in scope
2116 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2117 wakaba 1.1 if ($_->[1] eq 'p') {
2118     !!!back-token;
2119     $token = {type => 'end tag', tag_name => 'p'};
2120     return;
2121     } elsif ({
2122     table => 1, caption => 1, td => 1, th => 1,
2123     button => 1, marquee => 1, object => 1, html => 1,
2124     }->{$_->[1]}) {
2125     last INSCOPE;
2126     }
2127     } # INSCOPE
2128    
2129     ## Step 1
2130     my $i = -1;
2131 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2132 wakaba 1.1 LI: {
2133     ## Step 2
2134     if ($node->[1] eq 'li') {
2135 wakaba 1.8 if ($i != -1) {
2136     !!!parse-error (type => 'end tag missing:'.
2137     $self->{open_elements}->[-1]->[1]);
2138     ## TODO: test
2139     }
2140 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2141 wakaba 1.1 last LI;
2142     }
2143    
2144     ## Step 3
2145     if (not $formatting_category->{$node->[1]} and
2146     #not $phrasing_category->{$node->[1]} and
2147     ($special_category->{$node->[1]} or
2148     $scoping_category->{$node->[1]}) and
2149     $node->[1] ne 'address' and $node->[1] ne 'div') {
2150     last LI;
2151     }
2152    
2153     ## Step 4
2154     $i--;
2155 wakaba 1.3 $node = $self->{open_elements}->[$i];
2156 wakaba 1.1 redo LI;
2157     } # LI
2158    
2159     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2160     !!!next-token;
2161     return;
2162     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2163     ## has a p element in scope
2164 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2165 wakaba 1.1 if ($_->[1] eq 'p') {
2166     !!!back-token;
2167     $token = {type => 'end tag', tag_name => 'p'};
2168     return;
2169     } elsif ({
2170     table => 1, caption => 1, td => 1, th => 1,
2171     button => 1, marquee => 1, object => 1, html => 1,
2172     }->{$_->[1]}) {
2173     last INSCOPE;
2174     }
2175     } # INSCOPE
2176    
2177     ## Step 1
2178     my $i = -1;
2179 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2180 wakaba 1.1 LI: {
2181     ## Step 2
2182     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2183 wakaba 1.8 if ($i != -1) {
2184     !!!parse-error (type => 'end tag missing:'.
2185     $self->{open_elements}->[-1]->[1]);
2186     ## TODO: test
2187     }
2188 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2189 wakaba 1.1 last LI;
2190     }
2191    
2192     ## Step 3
2193     if (not $formatting_category->{$node->[1]} and
2194     #not $phrasing_category->{$node->[1]} and
2195     ($special_category->{$node->[1]} or
2196     $scoping_category->{$node->[1]}) and
2197     $node->[1] ne 'address' and $node->[1] ne 'div') {
2198     last LI;
2199     }
2200    
2201     ## Step 4
2202     $i--;
2203 wakaba 1.3 $node = $self->{open_elements}->[$i];
2204 wakaba 1.1 redo LI;
2205     } # LI
2206    
2207     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2208     !!!next-token;
2209     return;
2210     } elsif ($token->{tag_name} eq 'plaintext') {
2211     ## has a p element in scope
2212 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2213 wakaba 1.1 if ($_->[1] eq 'p') {
2214     !!!back-token;
2215     $token = {type => 'end tag', tag_name => 'p'};
2216     return;
2217     } elsif ({
2218     table => 1, caption => 1, td => 1, th => 1,
2219     button => 1, marquee => 1, object => 1, html => 1,
2220     }->{$_->[1]}) {
2221     last INSCOPE;
2222     }
2223     } # INSCOPE
2224    
2225     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2226    
2227     $self->{content_model_flag} = 'PLAINTEXT';
2228    
2229     !!!next-token;
2230     return;
2231     } elsif ({
2232     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2233     }->{$token->{tag_name}}) {
2234     ## has a p element in scope
2235 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2236     my $node = $self->{open_elements}->[$_];
2237 wakaba 1.1 if ($node->[1] eq 'p') {
2238     !!!back-token;
2239     $token = {type => 'end tag', tag_name => 'p'};
2240     return;
2241     } elsif ({
2242     table => 1, caption => 1, td => 1, th => 1,
2243     button => 1, marquee => 1, object => 1, html => 1,
2244     }->{$node->[1]}) {
2245     last INSCOPE;
2246     }
2247     } # INSCOPE
2248    
2249     ## has an element in scope
2250     my $i;
2251 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2252     my $node = $self->{open_elements}->[$_];
2253 wakaba 1.1 if ({
2254     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2255     }->{$node->[1]}) {
2256     $i = $_;
2257     last INSCOPE;
2258     } elsif ({
2259     table => 1, caption => 1, td => 1, th => 1,
2260     button => 1, marquee => 1, object => 1, html => 1,
2261     }->{$node->[1]}) {
2262     last INSCOPE;
2263     }
2264     } # INSCOPE
2265    
2266     if (defined $i) {
2267 wakaba 1.3 !!!parse-error (type => 'in hn:hn');
2268     splice @{$self->{open_elements}}, $i;
2269 wakaba 1.1 }
2270    
2271     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2272    
2273     !!!next-token;
2274     return;
2275     } elsif ($token->{tag_name} eq 'a') {
2276     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2277     my $node = $active_formatting_elements->[$i];
2278     if ($node->[1] eq 'a') {
2279 wakaba 1.3 !!!parse-error (type => 'in a:a');
2280 wakaba 1.1
2281     !!!back-token;
2282     $token = {type => 'end tag', tag_name => 'a'};
2283     $formatting_end_tag->($token->{tag_name});
2284    
2285     AFE2: for (reverse 0..$#$active_formatting_elements) {
2286     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2287     splice @$active_formatting_elements, $_, 1;
2288     last AFE2;
2289     }
2290     } # AFE2
2291 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2292     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2293     splice @{$self->{open_elements}}, $_, 1;
2294 wakaba 1.1 last OE;
2295     }
2296     } # OE
2297     last AFE;
2298     } elsif ($node->[0] eq '#marker') {
2299     last AFE;
2300     }
2301     } # AFE
2302    
2303     $reconstruct_active_formatting_elements->($insert_to_current);
2304    
2305     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2306 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2307 wakaba 1.1
2308     !!!next-token;
2309     return;
2310     } elsif ({
2311     b => 1, big => 1, em => 1, font => 1, i => 1,
2312     nobr => 1, s => 1, small => 1, strile => 1,
2313     strong => 1, tt => 1, u => 1,
2314     }->{$token->{tag_name}}) {
2315     $reconstruct_active_formatting_elements->($insert_to_current);
2316    
2317     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2318 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2319 wakaba 1.1
2320     !!!next-token;
2321     return;
2322     } elsif ($token->{tag_name} eq 'button') {
2323     ## has a button element in scope
2324 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2325     my $node = $self->{open_elements}->[$_];
2326 wakaba 1.1 if ($node->[1] eq 'button') {
2327 wakaba 1.3 !!!parse-error (type => 'in button:button');
2328 wakaba 1.1 !!!back-token;
2329     $token = {type => 'end tag', tag_name => 'button'};
2330     return;
2331     } elsif ({
2332     table => 1, caption => 1, td => 1, th => 1,
2333     button => 1, marquee => 1, object => 1, html => 1,
2334     }->{$node->[1]}) {
2335     last INSCOPE;
2336     }
2337     } # INSCOPE
2338    
2339     $reconstruct_active_formatting_elements->($insert_to_current);
2340    
2341     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2342     push @$active_formatting_elements, ['#marker', ''];
2343    
2344     !!!next-token;
2345     return;
2346     } elsif ($token->{tag_name} eq 'marquee' or
2347     $token->{tag_name} eq 'object') {
2348     $reconstruct_active_formatting_elements->($insert_to_current);
2349    
2350     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2351     push @$active_formatting_elements, ['#marker', ''];
2352    
2353     !!!next-token;
2354     return;
2355     } elsif ($token->{tag_name} eq 'xmp') {
2356     $reconstruct_active_formatting_elements->($insert_to_current);
2357    
2358     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2359    
2360     $self->{content_model_flag} = 'CDATA';
2361 wakaba 1.13 delete $self->{escape}; # MUST
2362 wakaba 1.1
2363     !!!next-token;
2364     return;
2365     } elsif ($token->{tag_name} eq 'table') {
2366     ## has a p element in scope
2367 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2368 wakaba 1.1 if ($_->[1] eq 'p') {
2369     !!!back-token;
2370     $token = {type => 'end tag', tag_name => 'p'};
2371     return;
2372     } elsif ({
2373     table => 1, caption => 1, td => 1, th => 1,
2374     button => 1, marquee => 1, object => 1, html => 1,
2375     }->{$_->[1]}) {
2376     last INSCOPE;
2377     }
2378     } # INSCOPE
2379    
2380     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2381    
2382 wakaba 1.3 $self->{insertion_mode} = 'in table';
2383 wakaba 1.1
2384     !!!next-token;
2385     return;
2386     } elsif ({
2387     area => 1, basefont => 1, bgsound => 1, br => 1,
2388     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2389     image => 1,
2390     }->{$token->{tag_name}}) {
2391     if ($token->{tag_name} eq 'image') {
2392 wakaba 1.3 !!!parse-error (type => 'image');
2393 wakaba 1.1 $token->{tag_name} = 'img';
2394     }
2395    
2396     $reconstruct_active_formatting_elements->($insert_to_current);
2397    
2398     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2399 wakaba 1.3 pop @{$self->{open_elements}};
2400 wakaba 1.1
2401     !!!next-token;
2402     return;
2403     } elsif ($token->{tag_name} eq 'hr') {
2404     ## has a p element in scope
2405 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2406 wakaba 1.1 if ($_->[1] eq 'p') {
2407     !!!back-token;
2408     $token = {type => 'end tag', tag_name => 'p'};
2409     return;
2410     } elsif ({
2411     table => 1, caption => 1, td => 1, th => 1,
2412     button => 1, marquee => 1, object => 1, html => 1,
2413     }->{$_->[1]}) {
2414     last INSCOPE;
2415     }
2416     } # INSCOPE
2417    
2418     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2419 wakaba 1.3 pop @{$self->{open_elements}};
2420 wakaba 1.1
2421     !!!next-token;
2422     return;
2423     } elsif ($token->{tag_name} eq 'input') {
2424     $reconstruct_active_formatting_elements->($insert_to_current);
2425    
2426     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2427 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
2428     pop @{$self->{open_elements}};
2429 wakaba 1.1
2430     !!!next-token;
2431     return;
2432     } elsif ($token->{tag_name} eq 'isindex') {
2433 wakaba 1.3 !!!parse-error (type => 'isindex');
2434 wakaba 1.1
2435 wakaba 1.3 if (defined $self->{form_element}) {
2436 wakaba 1.1 ## Ignore the token
2437     !!!next-token;
2438     return;
2439     } else {
2440     my $at = $token->{attributes};
2441     $at->{name} = {name => 'name', value => 'isindex'};
2442     my @tokens = (
2443     {type => 'start tag', tag_name => 'form'},
2444     {type => 'start tag', tag_name => 'hr'},
2445     {type => 'start tag', tag_name => 'p'},
2446     {type => 'start tag', tag_name => 'label'},
2447     {type => 'character',
2448     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2449     ## TODO: make this configurable
2450     {type => 'start tag', tag_name => 'input', attributes => $at},
2451     #{type => 'character', data => ''}, # SHOULD
2452     {type => 'end tag', tag_name => 'label'},
2453     {type => 'end tag', tag_name => 'p'},
2454     {type => 'start tag', tag_name => 'hr'},
2455     {type => 'end tag', tag_name => 'form'},
2456     );
2457     $token = shift @tokens;
2458     !!!back-token (@tokens);
2459     return;
2460     }
2461     } elsif ({
2462     textarea => 1,
2463 wakaba 1.5 iframe => 1,
2464 wakaba 1.1 noembed => 1,
2465     noframes => 1,
2466     noscript => 0, ## TODO: 1 if scripting is enabled
2467     }->{$token->{tag_name}}) {
2468     my $tag_name = $token->{tag_name};
2469     my $el;
2470     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2471    
2472     if ($token->{tag_name} eq 'textarea') {
2473 wakaba 1.3 ## TODO: $self->{form_element} if defined
2474 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
2475     } else {
2476     $self->{content_model_flag} = 'CDATA';
2477     }
2478 wakaba 1.13 delete $self->{escape}; # MUST
2479 wakaba 1.1
2480     $insert->($el);
2481    
2482     my $text = '';
2483 wakaba 1.9 if ($token->{tag_name} eq 'textarea') {
2484     !!!next-token;
2485     if ($token->{type} eq 'character') {
2486     $token->{data} =~ s/^\x0A//;
2487     unless (length $token->{data}) {
2488     !!!next-token;
2489     }
2490     }
2491     } else {
2492     !!!next-token;
2493     }
2494 wakaba 1.1 while ($token->{type} eq 'character') {
2495     $text .= $token->{data};
2496     !!!next-token;
2497     }
2498     if (length $text) {
2499     $el->manakai_append_text ($text);
2500     }
2501    
2502     $self->{content_model_flag} = 'PCDATA';
2503    
2504     if ($token->{type} eq 'end tag' and
2505     $token->{tag_name} eq $tag_name) {
2506     ## Ignore the token
2507     } else {
2508 wakaba 1.10 if ($token->{tag_name} eq 'textarea') {
2509     !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2510     } else {
2511 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2512     }
2513 wakaba 1.1 ## ISSUE: And ignore?
2514     }
2515     !!!next-token;
2516     return;
2517     } elsif ($token->{tag_name} eq 'select') {
2518     $reconstruct_active_formatting_elements->($insert_to_current);
2519    
2520     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2521    
2522 wakaba 1.3 $self->{insertion_mode} = 'in select';
2523 wakaba 1.1 !!!next-token;
2524     return;
2525     } elsif ({
2526     caption => 1, col => 1, colgroup => 1, frame => 1,
2527     frameset => 1, head => 1, option => 1, optgroup => 1,
2528     tbody => 1, td => 1, tfoot => 1, th => 1,
2529     thead => 1, tr => 1,
2530     }->{$token->{tag_name}}) {
2531 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2532 wakaba 1.1 ## Ignore the token
2533     !!!next-token;
2534     return;
2535    
2536     ## ISSUE: An issue on HTML5 new elements in the spec.
2537     } else {
2538     $reconstruct_active_formatting_elements->($insert_to_current);
2539    
2540     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2541    
2542     !!!next-token;
2543     return;
2544     }
2545     } elsif ($token->{type} eq 'end tag') {
2546     if ($token->{tag_name} eq 'body') {
2547 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2548 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2549 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2550     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2551 wakaba 1.1 }
2552 wakaba 1.3 $self->{insertion_mode} = 'after body';
2553 wakaba 1.1 !!!next-token;
2554     return;
2555     } else {
2556 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2557 wakaba 1.1 ## Ignore the token
2558     !!!next-token;
2559     return;
2560     }
2561     } elsif ($token->{tag_name} eq 'html') {
2562 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2563 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2564 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2565     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
2566 wakaba 1.1 }
2567 wakaba 1.3 $self->{insertion_mode} = 'after body';
2568 wakaba 1.1 ## reprocess
2569     return;
2570     } else {
2571 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2572 wakaba 1.1 ## Ignore the token
2573     !!!next-token;
2574     return;
2575     }
2576     } elsif ({
2577     address => 1, blockquote => 1, center => 1, dir => 1,
2578     div => 1, dl => 1, fieldset => 1, listing => 1,
2579     menu => 1, ol => 1, pre => 1, ul => 1,
2580     p => 1,
2581     dd => 1, dt => 1, li => 1,
2582     button => 1, marquee => 1, object => 1,
2583     }->{$token->{tag_name}}) {
2584     ## has an element in scope
2585     my $i;
2586 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2587     my $node = $self->{open_elements}->[$_];
2588 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
2589     ## generate implied end tags
2590     if ({
2591     dd => ($token->{tag_name} ne 'dd'),
2592     dt => ($token->{tag_name} ne 'dt'),
2593     li => ($token->{tag_name} ne 'li'),
2594     p => ($token->{tag_name} ne 'p'),
2595     td => 1, th => 1, tr => 1,
2596 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2597 wakaba 1.1 !!!back-token;
2598     $token = {type => 'end tag',
2599 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2600 wakaba 1.1 return;
2601     }
2602     $i = $_;
2603     last INSCOPE unless $token->{tag_name} eq 'p';
2604     } elsif ({
2605     table => 1, caption => 1, td => 1, th => 1,
2606     button => 1, marquee => 1, object => 1, html => 1,
2607     }->{$node->[1]}) {
2608     last INSCOPE;
2609     }
2610     } # INSCOPE
2611    
2612 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2613     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2614 wakaba 1.1 }
2615    
2616 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2617 wakaba 1.1 $clear_up_to_marker->()
2618     if {
2619     button => 1, marquee => 1, object => 1,
2620     }->{$token->{tag_name}};
2621     !!!next-token;
2622     return;
2623 wakaba 1.12 } elsif ($token->{tag_name} eq 'form') {
2624     ## has an element in scope
2625     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2626     my $node = $self->{open_elements}->[$_];
2627     if ($node->[1] eq $token->{tag_name}) {
2628     ## generate implied end tags
2629     if ({
2630     dd => 1, dt => 1, li => 1, p => 1,
2631     td => 1, th => 1, tr => 1,
2632     }->{$self->{open_elements}->[-1]->[1]}) {
2633     !!!back-token;
2634     $token = {type => 'end tag',
2635     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2636     return;
2637     }
2638     last INSCOPE;
2639     } elsif ({
2640     table => 1, caption => 1, td => 1, th => 1,
2641     button => 1, marquee => 1, object => 1, html => 1,
2642     }->{$node->[1]}) {
2643     last INSCOPE;
2644     }
2645     } # INSCOPE
2646    
2647     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
2648     pop @{$self->{open_elements}};
2649     } else {
2650     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2651     }
2652    
2653     undef $self->{form_element};
2654     !!!next-token;
2655     return;
2656 wakaba 1.1 } elsif ({
2657     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2658     }->{$token->{tag_name}}) {
2659     ## has an element in scope
2660     my $i;
2661 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2662     my $node = $self->{open_elements}->[$_];
2663 wakaba 1.1 if ({
2664     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2665     }->{$node->[1]}) {
2666     ## generate implied end tags
2667     if ({
2668     dd => 1, dt => 1, li => 1, p => 1,
2669     td => 1, th => 1, tr => 1,
2670 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2671 wakaba 1.1 !!!back-token;
2672     $token = {type => 'end tag',
2673 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2674 wakaba 1.1 return;
2675     }
2676     $i = $_;
2677     last INSCOPE;
2678     } elsif ({
2679     table => 1, caption => 1, td => 1, th => 1,
2680     button => 1, marquee => 1, object => 1, html => 1,
2681     }->{$node->[1]}) {
2682     last INSCOPE;
2683     }
2684     } # INSCOPE
2685    
2686 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2687     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2688 wakaba 1.1 }
2689    
2690 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2691 wakaba 1.1 !!!next-token;
2692     return;
2693     } elsif ({
2694     a => 1,
2695     b => 1, big => 1, em => 1, font => 1, i => 1,
2696     nobr => 1, s => 1, small => 1, strile => 1,
2697     strong => 1, tt => 1, u => 1,
2698     }->{$token->{tag_name}}) {
2699     $formatting_end_tag->($token->{tag_name});
2700 wakaba 1.8 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
2701 wakaba 1.1 return;
2702     } elsif ({
2703     caption => 1, col => 1, colgroup => 1, frame => 1,
2704     frameset => 1, head => 1, option => 1, optgroup => 1,
2705     tbody => 1, td => 1, tfoot => 1, th => 1,
2706     thead => 1, tr => 1,
2707     area => 1, basefont => 1, bgsound => 1, br => 1,
2708     embed => 1, hr => 1, iframe => 1, image => 1,
2709 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
2710 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
2711     table => 1, textarea => 1, wbr => 1,
2712     noscript => 0, ## TODO: if scripting is enabled
2713     }->{$token->{tag_name}}) {
2714 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2715 wakaba 1.1 ## Ignore the token
2716     !!!next-token;
2717     return;
2718    
2719     ## ISSUE: Issue on HTML5 new elements in spec
2720    
2721     } else {
2722     ## Step 1
2723     my $node_i = -1;
2724 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
2725 wakaba 1.1
2726     ## Step 2
2727     S2: {
2728     if ($node->[1] eq $token->{tag_name}) {
2729     ## Step 1
2730     ## generate implied end tags
2731     if ({
2732     dd => 1, dt => 1, li => 1, p => 1,
2733     td => 1, th => 1, tr => 1,
2734 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2735 wakaba 1.1 !!!back-token;
2736     $token = {type => 'end tag',
2737 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2738 wakaba 1.1 return;
2739     }
2740    
2741     ## Step 2
2742 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
2743     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2744 wakaba 1.1 }
2745    
2746     ## Step 3
2747 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
2748    
2749     !!!next-token;
2750 wakaba 1.1 last S2;
2751     } else {
2752     ## Step 3
2753     if (not $formatting_category->{$node->[1]} and
2754     #not $phrasing_category->{$node->[1]} and
2755     ($special_category->{$node->[1]} or
2756     $scoping_category->{$node->[1]})) {
2757 wakaba 1.3 !!!parse-error (type => 'not closed:'.$node->[1]);
2758 wakaba 1.1 ## Ignore the token
2759     !!!next-token;
2760     last S2;
2761     }
2762     }
2763    
2764     ## Step 4
2765     $node_i--;
2766 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
2767 wakaba 1.1
2768     ## Step 5;
2769     redo S2;
2770     } # S2
2771 wakaba 1.3 return;
2772 wakaba 1.1 }
2773     }
2774     }; # $in_body
2775    
2776     B: {
2777 wakaba 1.3 if ($phase eq 'main') {
2778 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
2779 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
2780 wakaba 1.1 ## Ignore the token
2781     ## Stay in the phase
2782     !!!next-token;
2783     redo B;
2784     } elsif ($token->{type} eq 'start tag' and
2785     $token->{tag_name} eq 'html') {
2786     ## TODO: unless it is the first start tag token, parse-error
2787 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
2788 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2789     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
2790     $top_el->set_attribute_ns
2791     (undef, [undef, $attr_name],
2792     $token->{attributes}->{$attr_name}->{value});
2793     }
2794     }
2795     !!!next-token;
2796     redo B;
2797     } elsif ($token->{type} eq 'end-of-file') {
2798     ## Generate implied end tags
2799     if ({
2800     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
2801 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2802 wakaba 1.1 !!!back-token;
2803 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
2804 wakaba 1.1 redo B;
2805     }
2806    
2807 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
2808     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
2809     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2810     } elsif (defined $self->{inner_html_node} and
2811     @{$self->{open_elements}} > 1 and
2812     $self->{open_elements}->[1]->[1] ne 'body') {
2813     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2814 wakaba 1.1 }
2815    
2816     ## Stop parsing
2817     last B;
2818    
2819     ## ISSUE: There is an issue in the spec.
2820     } else {
2821 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
2822 wakaba 1.1 if ($token->{type} eq 'character') {
2823     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2824 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2825 wakaba 1.1 unless (length $token->{data}) {
2826     !!!next-token;
2827     redo B;
2828     }
2829     }
2830     ## As if <head>
2831 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
2832     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2833     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
2834     $self->{insertion_mode} = 'in head';
2835 wakaba 1.1 ## reprocess
2836     redo B;
2837     } elsif ($token->{type} eq 'comment') {
2838     my $comment = $self->{document}->create_comment ($token->{data});
2839 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
2840 wakaba 1.1 !!!next-token;
2841     redo B;
2842     } elsif ($token->{type} eq 'start tag') {
2843     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
2844 wakaba 1.3 !!!create-element ($self->{head_element}, 'head', $attr);
2845     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2846     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
2847     $self->{insertion_mode} = 'in head';
2848 wakaba 1.1 if ($token->{tag_name} eq 'head') {
2849     !!!next-token;
2850     #} elsif ({
2851     # base => 1, link => 1, meta => 1,
2852     # script => 1, style => 1, title => 1,
2853     # }->{$token->{tag_name}}) {
2854     # ## reprocess
2855     } else {
2856     ## reprocess
2857     }
2858     redo B;
2859     } elsif ($token->{type} eq 'end tag') {
2860     if ($token->{tag_name} eq 'html') {
2861     ## As if <head>
2862 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
2863     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2864     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
2865     $self->{insertion_mode} = 'in head';
2866 wakaba 1.1 ## reprocess
2867     redo B;
2868     } else {
2869 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2870 wakaba 1.1 ## Ignore the token
2871     !!!next-token;
2872     redo B;
2873     }
2874     } else {
2875     die "$0: $token->{type}: Unknown type";
2876     }
2877 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
2878 wakaba 1.1 if ($token->{type} eq 'character') {
2879     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2880 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2881 wakaba 1.1 unless (length $token->{data}) {
2882     !!!next-token;
2883     redo B;
2884     }
2885     }
2886    
2887     #
2888     } elsif ($token->{type} eq 'comment') {
2889     my $comment = $self->{document}->create_comment ($token->{data});
2890 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
2891 wakaba 1.1 !!!next-token;
2892     redo B;
2893     } elsif ($token->{type} eq 'start tag') {
2894     if ($token->{tag_name} eq 'title') {
2895     ## NOTE: There is an "as if in head" code clone
2896     my $title_el;
2897     !!!create-element ($title_el, 'title', $token->{attributes});
2898 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2899 wakaba 1.1 ->append_child ($title_el);
2900     $self->{content_model_flag} = 'RCDATA';
2901 wakaba 1.13 delete $self->{escape}; # MUST
2902 wakaba 1.1
2903     my $text = '';
2904     !!!next-token;
2905     while ($token->{type} eq 'character') {
2906     $text .= $token->{data};
2907     !!!next-token;
2908     }
2909     if (length $text) {
2910     $title_el->manakai_append_text ($text);
2911     }
2912    
2913     $self->{content_model_flag} = 'PCDATA';
2914    
2915     if ($token->{type} eq 'end tag' and
2916     $token->{tag_name} eq 'title') {
2917     ## Ignore the token
2918     } else {
2919 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2920 wakaba 1.1 ## ISSUE: And ignore?
2921     }
2922     !!!next-token;
2923     redo B;
2924     } elsif ($token->{tag_name} eq 'style') {
2925     $style_start_tag->();
2926     redo B;
2927     } elsif ($token->{tag_name} eq 'script') {
2928     $script_start_tag->();
2929     redo B;
2930     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
2931     ## NOTE: There are "as if in head" code clones
2932     my $el;
2933     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2934 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2935 wakaba 1.1 ->append_child ($el);
2936    
2937     !!!next-token;
2938     redo B;
2939     } elsif ($token->{tag_name} eq 'head') {
2940 wakaba 1.3 !!!parse-error (type => 'in head:head');
2941 wakaba 1.1 ## Ignore the token
2942     !!!next-token;
2943     redo B;
2944     } else {
2945     #
2946     }
2947     } elsif ($token->{type} eq 'end tag') {
2948     if ($token->{tag_name} eq 'head') {
2949 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
2950     pop @{$self->{open_elements}};
2951 wakaba 1.1 } else {
2952 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:head');
2953 wakaba 1.1 }
2954 wakaba 1.3 $self->{insertion_mode} = 'after head';
2955 wakaba 1.1 !!!next-token;
2956     redo B;
2957     } elsif ($token->{tag_name} eq 'html') {
2958     #
2959     } else {
2960 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2961 wakaba 1.1 ## Ignore the token
2962     !!!next-token;
2963     redo B;
2964     }
2965     } else {
2966     #
2967     }
2968    
2969 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
2970 wakaba 1.1 ## As if </head>
2971 wakaba 1.3 pop @{$self->{open_elements}};
2972 wakaba 1.1 }
2973 wakaba 1.3 $self->{insertion_mode} = 'after head';
2974 wakaba 1.1 ## reprocess
2975     redo B;
2976    
2977     ## ISSUE: An issue in the spec.
2978 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
2979 wakaba 1.1 if ($token->{type} eq 'character') {
2980     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2981 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2982 wakaba 1.1 unless (length $token->{data}) {
2983     !!!next-token;
2984     redo B;
2985     }
2986     }
2987    
2988     #
2989     } elsif ($token->{type} eq 'comment') {
2990     my $comment = $self->{document}->create_comment ($token->{data});
2991 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
2992 wakaba 1.1 !!!next-token;
2993     redo B;
2994     } elsif ($token->{type} eq 'start tag') {
2995     if ($token->{tag_name} eq 'body') {
2996     !!!insert-element ('body', $token->{attributes});
2997 wakaba 1.3 $self->{insertion_mode} = 'in body';
2998 wakaba 1.1 !!!next-token;
2999     redo B;
3000     } elsif ($token->{tag_name} eq 'frameset') {
3001     !!!insert-element ('frameset', $token->{attributes});
3002 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
3003 wakaba 1.1 !!!next-token;
3004     redo B;
3005     } elsif ({
3006     base => 1, link => 1, meta => 1,
3007 wakaba 1.3 script => 1, style => 1, title => 1,
3008 wakaba 1.1 }->{$token->{tag_name}}) {
3009 wakaba 1.3 !!!parse-error (type => 'after head:'.$token->{tag_name});
3010     $self->{insertion_mode} = 'in head';
3011 wakaba 1.1 ## reprocess
3012     redo B;
3013     } else {
3014     #
3015     }
3016     } else {
3017     #
3018     }
3019    
3020     ## As if <body>
3021     !!!insert-element ('body');
3022 wakaba 1.3 $self->{insertion_mode} = 'in body';
3023 wakaba 1.1 ## reprocess
3024     redo B;
3025 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
3026 wakaba 1.1 if ($token->{type} eq 'character') {
3027     ## NOTE: There is a code clone of "character in body".
3028     $reconstruct_active_formatting_elements->($insert_to_current);
3029    
3030 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3031 wakaba 1.1
3032     !!!next-token;
3033     redo B;
3034     } elsif ($token->{type} eq 'comment') {
3035     ## NOTE: There is a code clone of "comment in body".
3036     my $comment = $self->{document}->create_comment ($token->{data});
3037 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3038 wakaba 1.1 !!!next-token;
3039     redo B;
3040     } else {
3041     $in_body->($insert_to_current);
3042     redo B;
3043     }
3044 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
3045 wakaba 1.1 if ($token->{type} eq 'character') {
3046     ## NOTE: There are "character in table" code clones.
3047     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3048 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3049 wakaba 1.1
3050     unless (length $token->{data}) {
3051     !!!next-token;
3052     redo B;
3053     }
3054     }
3055    
3056 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3057    
3058 wakaba 1.1 ## As if in body, but insert into foster parent element
3059     ## ISSUE: Spec says that "whenever a node would be inserted
3060     ## into the current node" while characters might not be
3061     ## result in a new Text node.
3062     $reconstruct_active_formatting_elements->($insert_to_foster);
3063    
3064     if ({
3065     table => 1, tbody => 1, tfoot => 1,
3066     thead => 1, tr => 1,
3067 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3068 wakaba 1.1 # MUST
3069     my $foster_parent_element;
3070     my $next_sibling;
3071     my $prev_sibling;
3072 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3073     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3074     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3075 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3076     $foster_parent_element = $parent;
3077 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3078 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3079     } else {
3080 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3081 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3082     }
3083     last OE;
3084     }
3085     } # OE
3086 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3087 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3088     unless defined $foster_parent_element;
3089     if (defined $prev_sibling and
3090     $prev_sibling->node_type == 3) {
3091     $prev_sibling->manakai_append_text ($token->{data});
3092     } else {
3093     $foster_parent_element->insert_before
3094     ($self->{document}->create_text_node ($token->{data}),
3095     $next_sibling);
3096     }
3097     } else {
3098 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3099 wakaba 1.1 }
3100    
3101     !!!next-token;
3102     redo B;
3103     } elsif ($token->{type} eq 'comment') {
3104     my $comment = $self->{document}->create_comment ($token->{data});
3105 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3106 wakaba 1.1 !!!next-token;
3107     redo B;
3108     } elsif ($token->{type} eq 'start tag') {
3109     if ({
3110     caption => 1,
3111     colgroup => 1,
3112     tbody => 1, tfoot => 1, thead => 1,
3113     }->{$token->{tag_name}}) {
3114     ## Clear back to table context
3115 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3116     $self->{open_elements}->[-1]->[1] ne 'html') {
3117     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3118     pop @{$self->{open_elements}};
3119 wakaba 1.1 }
3120    
3121     push @$active_formatting_elements, ['#marker', '']
3122     if $token->{tag_name} eq 'caption';
3123    
3124     !!!insert-element ($token->{tag_name}, $token->{attributes});
3125 wakaba 1.3 $self->{insertion_mode} = {
3126 wakaba 1.1 caption => 'in caption',
3127     colgroup => 'in column group',
3128     tbody => 'in table body',
3129     tfoot => 'in table body',
3130     thead => 'in table body',
3131     }->{$token->{tag_name}};
3132     !!!next-token;
3133     redo B;
3134     } elsif ({
3135     col => 1,
3136     td => 1, th => 1, tr => 1,
3137     }->{$token->{tag_name}}) {
3138     ## Clear back to table context
3139 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3140     $self->{open_elements}->[-1]->[1] ne 'html') {
3141     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3142     pop @{$self->{open_elements}};
3143 wakaba 1.1 }
3144    
3145     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3146 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3147 wakaba 1.1 ? 'in column group' : 'in table body';
3148     ## reprocess
3149     redo B;
3150     } elsif ($token->{tag_name} eq 'table') {
3151     ## NOTE: There are code clones for this "table in table"
3152 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3153 wakaba 1.1
3154     ## As if </table>
3155     ## have a table element in table scope
3156     my $i;
3157 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3158     my $node = $self->{open_elements}->[$_];
3159 wakaba 1.1 if ($node->[1] eq 'table') {
3160     $i = $_;
3161     last INSCOPE;
3162     } elsif ({
3163     table => 1, html => 1,
3164     }->{$node->[1]}) {
3165     last INSCOPE;
3166     }
3167     } # INSCOPE
3168     unless (defined $i) {
3169 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3170 wakaba 1.1 ## Ignore tokens </table><table>
3171     !!!next-token;
3172     redo B;
3173     }
3174    
3175     ## generate implied end tags
3176     if ({
3177     dd => 1, dt => 1, li => 1, p => 1,
3178     td => 1, th => 1, tr => 1,
3179 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3180 wakaba 1.1 !!!back-token; # <table>
3181     $token = {type => 'end tag', tag_name => 'table'};
3182     !!!back-token;
3183     $token = {type => 'end tag',
3184 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3185 wakaba 1.1 redo B;
3186     }
3187    
3188 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3189     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3190 wakaba 1.1 }
3191    
3192 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3193 wakaba 1.1
3194 wakaba 1.3 $self->_reset_insertion_mode;
3195 wakaba 1.1
3196     ## reprocess
3197     redo B;
3198     } else {
3199     #
3200     }
3201     } elsif ($token->{type} eq 'end tag') {
3202     if ($token->{tag_name} eq 'table') {
3203     ## have a table element in table scope
3204     my $i;
3205 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3206     my $node = $self->{open_elements}->[$_];
3207 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3208     $i = $_;
3209     last INSCOPE;
3210     } elsif ({
3211     table => 1, html => 1,
3212     }->{$node->[1]}) {
3213     last INSCOPE;
3214     }
3215     } # INSCOPE
3216     unless (defined $i) {
3217 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3218 wakaba 1.1 ## Ignore the token
3219     !!!next-token;
3220     redo B;
3221     }
3222    
3223     ## generate implied end tags
3224     if ({
3225     dd => 1, dt => 1, li => 1, p => 1,
3226     td => 1, th => 1, tr => 1,
3227 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3228 wakaba 1.1 !!!back-token;
3229     $token = {type => 'end tag',
3230 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3231 wakaba 1.1 redo B;
3232     }
3233    
3234 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3235     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3236 wakaba 1.1 }
3237    
3238 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3239 wakaba 1.1
3240 wakaba 1.3 $self->_reset_insertion_mode;
3241 wakaba 1.1
3242     !!!next-token;
3243     redo B;
3244     } elsif ({
3245     body => 1, caption => 1, col => 1, colgroup => 1,
3246     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3247     thead => 1, tr => 1,
3248     }->{$token->{tag_name}}) {
3249 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3250 wakaba 1.1 ## Ignore the token
3251     !!!next-token;
3252     redo B;
3253     } else {
3254     #
3255     }
3256     } else {
3257     #
3258     }
3259    
3260 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3261 wakaba 1.1 $in_body->($insert_to_foster);
3262     redo B;
3263 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
3264 wakaba 1.1 if ($token->{type} eq 'character') {
3265     ## NOTE: This is a code clone of "character in body".
3266     $reconstruct_active_formatting_elements->($insert_to_current);
3267    
3268 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3269 wakaba 1.1
3270     !!!next-token;
3271     redo B;
3272     } elsif ($token->{type} eq 'comment') {
3273     ## NOTE: This is a code clone of "comment in body".
3274     my $comment = $self->{document}->create_comment ($token->{data});
3275 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3276 wakaba 1.1 !!!next-token;
3277     redo B;
3278     } elsif ($token->{type} eq 'start tag') {
3279     if ({
3280     caption => 1, col => 1, colgroup => 1, tbody => 1,
3281     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3282     }->{$token->{tag_name}}) {
3283 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3284 wakaba 1.1
3285     ## As if </caption>
3286     ## have a table element in table scope
3287     my $i;
3288 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3289     my $node = $self->{open_elements}->[$_];
3290 wakaba 1.1 if ($node->[1] eq 'caption') {
3291     $i = $_;
3292     last INSCOPE;
3293     } elsif ({
3294     table => 1, html => 1,
3295     }->{$node->[1]}) {
3296     last INSCOPE;
3297     }
3298     } # INSCOPE
3299     unless (defined $i) {
3300 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3301 wakaba 1.1 ## Ignore the token
3302     !!!next-token;
3303     redo B;
3304     }
3305    
3306     ## generate implied end tags
3307     if ({
3308     dd => 1, dt => 1, li => 1, p => 1,
3309     td => 1, th => 1, tr => 1,
3310 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3311 wakaba 1.1 !!!back-token; # <?>
3312     $token = {type => 'end tag', tag_name => 'caption'};
3313     !!!back-token;
3314     $token = {type => 'end tag',
3315 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3316 wakaba 1.1 redo B;
3317     }
3318    
3319 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3320     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3321 wakaba 1.1 }
3322    
3323 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3324 wakaba 1.1
3325     $clear_up_to_marker->();
3326    
3327 wakaba 1.3 $self->{insertion_mode} = 'in table';
3328 wakaba 1.1
3329     ## reprocess
3330     redo B;
3331     } else {
3332     #
3333     }
3334     } elsif ($token->{type} eq 'end tag') {
3335     if ($token->{tag_name} eq 'caption') {
3336     ## have a table element in table scope
3337     my $i;
3338 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3339     my $node = $self->{open_elements}->[$_];
3340 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3341     $i = $_;
3342     last INSCOPE;
3343     } elsif ({
3344     table => 1, html => 1,
3345     }->{$node->[1]}) {
3346     last INSCOPE;
3347     }
3348     } # INSCOPE
3349     unless (defined $i) {
3350 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3351 wakaba 1.1 ## Ignore the token
3352     !!!next-token;
3353     redo B;
3354     }
3355    
3356     ## generate implied end tags
3357     if ({
3358     dd => 1, dt => 1, li => 1, p => 1,
3359     td => 1, th => 1, tr => 1,
3360 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3361 wakaba 1.1 !!!back-token;
3362     $token = {type => 'end tag',
3363 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3364 wakaba 1.1 redo B;
3365     }
3366    
3367 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3368     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3369 wakaba 1.1 }
3370    
3371 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3372 wakaba 1.1
3373     $clear_up_to_marker->();
3374    
3375 wakaba 1.3 $self->{insertion_mode} = 'in table';
3376 wakaba 1.1
3377     !!!next-token;
3378     redo B;
3379     } elsif ($token->{tag_name} eq 'table') {
3380 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3381 wakaba 1.1
3382     ## As if </caption>
3383     ## have a table element in table scope
3384     my $i;
3385 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3386     my $node = $self->{open_elements}->[$_];
3387 wakaba 1.1 if ($node->[1] eq 'caption') {
3388     $i = $_;
3389     last INSCOPE;
3390     } elsif ({
3391     table => 1, html => 1,
3392     }->{$node->[1]}) {
3393     last INSCOPE;
3394     }
3395     } # INSCOPE
3396     unless (defined $i) {
3397 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3398 wakaba 1.1 ## Ignore the token
3399     !!!next-token;
3400     redo B;
3401     }
3402    
3403     ## generate implied end tags
3404     if ({
3405     dd => 1, dt => 1, li => 1, p => 1,
3406     td => 1, th => 1, tr => 1,
3407 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3408 wakaba 1.1 !!!back-token; # </table>
3409     $token = {type => 'end tag', tag_name => 'caption'};
3410     !!!back-token;
3411     $token = {type => 'end tag',
3412 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3413 wakaba 1.1 redo B;
3414     }
3415    
3416 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3417     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3418 wakaba 1.1 }
3419    
3420 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3421 wakaba 1.1
3422     $clear_up_to_marker->();
3423    
3424 wakaba 1.3 $self->{insertion_mode} = 'in table';
3425 wakaba 1.1
3426     ## reprocess
3427     redo B;
3428     } elsif ({
3429     body => 1, col => 1, colgroup => 1,
3430     html => 1, tbody => 1, td => 1, tfoot => 1,
3431     th => 1, thead => 1, tr => 1,
3432     }->{$token->{tag_name}}) {
3433 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3434 wakaba 1.1 ## Ignore the token
3435     redo B;
3436     } else {
3437     #
3438     }
3439     } else {
3440     #
3441     }
3442    
3443     $in_body->($insert_to_current);
3444     redo B;
3445 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
3446 wakaba 1.1 if ($token->{type} eq 'character') {
3447     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3448 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3449 wakaba 1.1 unless (length $token->{data}) {
3450     !!!next-token;
3451     redo B;
3452     }
3453     }
3454    
3455     #
3456     } elsif ($token->{type} eq 'comment') {
3457     my $comment = $self->{document}->create_comment ($token->{data});
3458 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3459 wakaba 1.1 !!!next-token;
3460     redo B;
3461     } elsif ($token->{type} eq 'start tag') {
3462     if ($token->{tag_name} eq 'col') {
3463     !!!insert-element ($token->{tag_name}, $token->{attributes});
3464 wakaba 1.3 pop @{$self->{open_elements}};
3465 wakaba 1.1 !!!next-token;
3466     redo B;
3467     } else {
3468     #
3469     }
3470     } elsif ($token->{type} eq 'end tag') {
3471     if ($token->{tag_name} eq 'colgroup') {
3472 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3473     !!!parse-error (type => 'unmatched end tag:colgroup');
3474 wakaba 1.1 ## Ignore the token
3475     !!!next-token;
3476     redo B;
3477     } else {
3478 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3479     $self->{insertion_mode} = 'in table';
3480 wakaba 1.1 !!!next-token;
3481     redo B;
3482     }
3483     } elsif ($token->{tag_name} eq 'col') {
3484 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:col');
3485 wakaba 1.1 ## Ignore the token
3486     !!!next-token;
3487     redo B;
3488     } else {
3489     #
3490     }
3491     } else {
3492     #
3493     }
3494    
3495     ## As if </colgroup>
3496 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3497     !!!parse-error (type => 'unmatched end tag:colgroup');
3498 wakaba 1.1 ## Ignore the token
3499     !!!next-token;
3500     redo B;
3501     } else {
3502 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3503     $self->{insertion_mode} = 'in table';
3504 wakaba 1.1 ## reprocess
3505     redo B;
3506     }
3507 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
3508 wakaba 1.1 if ($token->{type} eq 'character') {
3509     ## NOTE: This is a "character in table" code clone.
3510     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3511 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3512 wakaba 1.1
3513     unless (length $token->{data}) {
3514     !!!next-token;
3515     redo B;
3516     }
3517     }
3518    
3519 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3520    
3521 wakaba 1.1 ## As if in body, but insert into foster parent element
3522     ## ISSUE: Spec says that "whenever a node would be inserted
3523     ## into the current node" while characters might not be
3524     ## result in a new Text node.
3525     $reconstruct_active_formatting_elements->($insert_to_foster);
3526    
3527     if ({
3528     table => 1, tbody => 1, tfoot => 1,
3529     thead => 1, tr => 1,
3530 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3531 wakaba 1.1 # MUST
3532     my $foster_parent_element;
3533     my $next_sibling;
3534     my $prev_sibling;
3535 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3536     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3537     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3538 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3539     $foster_parent_element = $parent;
3540 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3541 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3542     } else {
3543 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3544 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3545     }
3546     last OE;
3547     }
3548     } # OE
3549 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3550 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3551     unless defined $foster_parent_element;
3552     if (defined $prev_sibling and
3553     $prev_sibling->node_type == 3) {
3554     $prev_sibling->manakai_append_text ($token->{data});
3555     } else {
3556     $foster_parent_element->insert_before
3557     ($self->{document}->create_text_node ($token->{data}),
3558     $next_sibling);
3559     }
3560     } else {
3561 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3562 wakaba 1.1 }
3563    
3564     !!!next-token;
3565     redo B;
3566     } elsif ($token->{type} eq 'comment') {
3567     ## Copied from 'in table'
3568     my $comment = $self->{document}->create_comment ($token->{data});
3569 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3570 wakaba 1.1 !!!next-token;
3571     redo B;
3572     } elsif ($token->{type} eq 'start tag') {
3573     if ({
3574     tr => 1,
3575     th => 1, td => 1,
3576     }->{$token->{tag_name}}) {
3577 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
3578     !!!parse-error (type => 'missing start tag:tr');
3579     }
3580    
3581 wakaba 1.1 ## Clear back to table body context
3582     while (not {
3583     tbody => 1, tfoot => 1, thead => 1, html => 1,
3584 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3585     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3586     pop @{$self->{open_elements}};
3587 wakaba 1.1 }
3588    
3589 wakaba 1.3 $self->{insertion_mode} = 'in row';
3590 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
3591     !!!insert-element ($token->{tag_name}, $token->{attributes});
3592     !!!next-token;
3593     } else {
3594     !!!insert-element ('tr');
3595     ## reprocess
3596     }
3597     redo B;
3598     } elsif ({
3599     caption => 1, col => 1, colgroup => 1,
3600     tbody => 1, tfoot => 1, thead => 1,
3601     }->{$token->{tag_name}}) {
3602     ## have an element in table scope
3603     my $i;
3604 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3605     my $node = $self->{open_elements}->[$_];
3606 wakaba 1.1 if ({
3607     tbody => 1, thead => 1, tfoot => 1,
3608     }->{$node->[1]}) {
3609     $i = $_;
3610     last INSCOPE;
3611     } elsif ({
3612     table => 1, html => 1,
3613     }->{$node->[1]}) {
3614     last INSCOPE;
3615     }
3616     } # INSCOPE
3617     unless (defined $i) {
3618 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3619 wakaba 1.1 ## Ignore the token
3620     !!!next-token;
3621     redo B;
3622     }
3623    
3624     ## Clear back to table body context
3625     while (not {
3626     tbody => 1, tfoot => 1, thead => 1, html => 1,
3627 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3628     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3629     pop @{$self->{open_elements}};
3630 wakaba 1.1 }
3631    
3632     ## As if <{current node}>
3633     ## have an element in table scope
3634     ## true by definition
3635    
3636     ## Clear back to table body context
3637     ## nop by definition
3638    
3639 wakaba 1.3 pop @{$self->{open_elements}};
3640     $self->{insertion_mode} = 'in table';
3641 wakaba 1.1 ## reprocess
3642     redo B;
3643     } elsif ($token->{tag_name} eq 'table') {
3644     ## NOTE: This is a code clone of "table in table"
3645 wakaba 1.3 !!!parse-error (type => 'not closed:table');
3646 wakaba 1.1
3647     ## As if </table>
3648     ## have a table element in table scope
3649     my $i;
3650 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3651     my $node = $self->{open_elements}->[$_];
3652 wakaba 1.1 if ($node->[1] eq 'table') {
3653     $i = $_;
3654     last INSCOPE;
3655     } elsif ({
3656     table => 1, html => 1,
3657     }->{$node->[1]}) {
3658     last INSCOPE;
3659     }
3660     } # INSCOPE
3661     unless (defined $i) {
3662 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3663 wakaba 1.1 ## Ignore tokens </table><table>
3664     !!!next-token;
3665     redo B;
3666     }
3667    
3668     ## generate implied end tags
3669     if ({
3670     dd => 1, dt => 1, li => 1, p => 1,
3671     td => 1, th => 1, tr => 1,
3672 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3673 wakaba 1.1 !!!back-token; # <table>
3674     $token = {type => 'end tag', tag_name => 'table'};
3675     !!!back-token;
3676     $token = {type => 'end tag',
3677 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3678 wakaba 1.1 redo B;
3679     }
3680    
3681 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3682     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3683 wakaba 1.1 }
3684    
3685 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3686 wakaba 1.1
3687 wakaba 1.3 $self->_reset_insertion_mode;
3688 wakaba 1.1
3689     ## reprocess
3690     redo B;
3691     } else {
3692     #
3693     }
3694     } elsif ($token->{type} eq 'end tag') {
3695     if ({
3696     tbody => 1, tfoot => 1, thead => 1,
3697     }->{$token->{tag_name}}) {
3698     ## have an element in table scope
3699     my $i;
3700 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3701     my $node = $self->{open_elements}->[$_];
3702 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3703     $i = $_;
3704     last INSCOPE;
3705     } elsif ({
3706     table => 1, html => 1,
3707     }->{$node->[1]}) {
3708     last INSCOPE;
3709     }
3710     } # INSCOPE
3711     unless (defined $i) {
3712 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3713 wakaba 1.1 ## Ignore the token
3714     !!!next-token;
3715     redo B;
3716     }
3717    
3718     ## Clear back to table body context
3719     while (not {
3720     tbody => 1, tfoot => 1, thead => 1, html => 1,
3721 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3722     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3723     pop @{$self->{open_elements}};
3724 wakaba 1.1 }
3725    
3726 wakaba 1.3 pop @{$self->{open_elements}};
3727     $self->{insertion_mode} = 'in table';
3728 wakaba 1.1 !!!next-token;
3729     redo B;
3730     } elsif ($token->{tag_name} eq 'table') {
3731     ## have an element in table scope
3732     my $i;
3733 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3734     my $node = $self->{open_elements}->[$_];
3735 wakaba 1.1 if ({
3736     tbody => 1, thead => 1, tfoot => 1,
3737     }->{$node->[1]}) {
3738     $i = $_;
3739     last INSCOPE;
3740     } elsif ({
3741     table => 1, html => 1,
3742     }->{$node->[1]}) {
3743     last INSCOPE;
3744     }
3745     } # INSCOPE
3746     unless (defined $i) {
3747 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3748 wakaba 1.1 ## Ignore the token
3749     !!!next-token;
3750     redo B;
3751     }
3752    
3753     ## Clear back to table body context
3754     while (not {
3755     tbody => 1, tfoot => 1, thead => 1, html => 1,
3756 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3757     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3758     pop @{$self->{open_elements}};
3759 wakaba 1.1 }
3760    
3761     ## As if <{current node}>
3762     ## have an element in table scope
3763     ## true by definition
3764    
3765     ## Clear back to table body context
3766     ## nop by definition
3767    
3768 wakaba 1.3 pop @{$self->{open_elements}};
3769     $self->{insertion_mode} = 'in table';
3770 wakaba 1.1 ## reprocess
3771     redo B;
3772     } elsif ({
3773     body => 1, caption => 1, col => 1, colgroup => 1,
3774     html => 1, td => 1, th => 1, tr => 1,
3775     }->{$token->{tag_name}}) {
3776 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3777 wakaba 1.1 ## Ignore the token
3778     !!!next-token;
3779     redo B;
3780     } else {
3781     #
3782     }
3783     } else {
3784     #
3785     }
3786    
3787     ## As if in table
3788 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3789 wakaba 1.1 $in_body->($insert_to_foster);
3790     redo B;
3791 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
3792 wakaba 1.1 if ($token->{type} eq 'character') {
3793     ## NOTE: This is a "character in table" code clone.
3794     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3795 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3796 wakaba 1.1
3797     unless (length $token->{data}) {
3798     !!!next-token;
3799     redo B;
3800     }
3801     }
3802    
3803 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3804    
3805 wakaba 1.1 ## As if in body, but insert into foster parent element
3806     ## ISSUE: Spec says that "whenever a node would be inserted
3807     ## into the current node" while characters might not be
3808     ## result in a new Text node.
3809     $reconstruct_active_formatting_elements->($insert_to_foster);
3810    
3811     if ({
3812     table => 1, tbody => 1, tfoot => 1,
3813     thead => 1, tr => 1,
3814 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3815 wakaba 1.1 # MUST
3816     my $foster_parent_element;
3817     my $next_sibling;
3818     my $prev_sibling;
3819 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3820     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3821     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3822 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3823     $foster_parent_element = $parent;
3824 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3825 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3826     } else {
3827 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3828 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3829     }
3830     last OE;
3831     }
3832     } # OE
3833 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3834 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3835     unless defined $foster_parent_element;
3836     if (defined $prev_sibling and
3837     $prev_sibling->node_type == 3) {
3838     $prev_sibling->manakai_append_text ($token->{data});
3839     } else {
3840     $foster_parent_element->insert_before
3841     ($self->{document}->create_text_node ($token->{data}),
3842     $next_sibling);
3843     }
3844     } else {
3845 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3846 wakaba 1.1 }
3847    
3848     !!!next-token;
3849     redo B;
3850     } elsif ($token->{type} eq 'comment') {
3851     ## Copied from 'in table'
3852     my $comment = $self->{document}->create_comment ($token->{data});
3853 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3854 wakaba 1.1 !!!next-token;
3855     redo B;
3856     } elsif ($token->{type} eq 'start tag') {
3857     if ($token->{tag_name} eq 'th' or
3858     $token->{tag_name} eq 'td') {
3859     ## Clear back to table row context
3860     while (not {
3861     tr => 1, html => 1,
3862 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3863     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3864     pop @{$self->{open_elements}};
3865 wakaba 1.1 }
3866    
3867     !!!insert-element ($token->{tag_name}, $token->{attributes});
3868 wakaba 1.3 $self->{insertion_mode} = 'in cell';
3869 wakaba 1.1
3870     push @$active_formatting_elements, ['#marker', ''];
3871    
3872     !!!next-token;
3873     redo B;
3874     } elsif ({
3875     caption => 1, col => 1, colgroup => 1,
3876     tbody => 1, tfoot => 1, thead => 1, tr => 1,
3877     }->{$token->{tag_name}}) {
3878     ## As if </tr>
3879     ## have an element in table scope
3880     my $i;
3881 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3882     my $node = $self->{open_elements}->[$_];
3883 wakaba 1.1 if ($node->[1] eq 'tr') {
3884     $i = $_;
3885     last INSCOPE;
3886     } elsif ({
3887     table => 1, html => 1,
3888     }->{$node->[1]}) {
3889     last INSCOPE;
3890     }
3891     } # INSCOPE
3892     unless (defined $i) {
3893 wakaba 1.3 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
3894 wakaba 1.1 ## Ignore the token
3895     !!!next-token;
3896     redo B;
3897     }
3898    
3899     ## Clear back to table row context
3900     while (not {
3901     tr => 1, html => 1,
3902 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3903     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3904     pop @{$self->{open_elements}};
3905 wakaba 1.1 }
3906    
3907 wakaba 1.3 pop @{$self->{open_elements}}; # tr
3908     $self->{insertion_mode} = 'in table body';
3909 wakaba 1.1 ## reprocess
3910     redo B;
3911     } elsif ($token->{tag_name} eq 'table') {
3912     ## NOTE: This is a code clone of "table in table"
3913 wakaba 1.3 !!!parse-error (type => 'not closed:table');
3914 wakaba 1.1
3915     ## As if </table>
3916     ## have a table element in table scope
3917     my $i;
3918 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3919     my $node = $self->{open_elements}->[$_];
3920 wakaba 1.1 if ($node->[1] eq 'table') {
3921     $i = $_;
3922     last INSCOPE;
3923     } elsif ({
3924     table => 1, html => 1,
3925     }->{$node->[1]}) {
3926     last INSCOPE;
3927     }
3928     } # INSCOPE
3929     unless (defined $i) {
3930 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3931 wakaba 1.1 ## Ignore tokens </table><table>
3932     !!!next-token;
3933     redo B;
3934     }
3935    
3936     ## generate implied end tags
3937     if ({
3938     dd => 1, dt => 1, li => 1, p => 1,
3939     td => 1, th => 1, tr => 1,
3940 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3941 wakaba 1.1 !!!back-token; # <table>
3942     $token = {type => 'end tag', tag_name => 'table'};
3943     !!!back-token;
3944     $token = {type => 'end tag',
3945 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3946 wakaba 1.1 redo B;
3947     }
3948    
3949 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3950     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3951 wakaba 1.1 }
3952    
3953 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3954 wakaba 1.1
3955 wakaba 1.3 $self->_reset_insertion_mode;
3956 wakaba 1.1
3957     ## reprocess
3958     redo B;
3959     } else {
3960     #
3961     }
3962     } elsif ($token->{type} eq 'end tag') {
3963     if ($token->{tag_name} eq 'tr') {
3964     ## have an element in table scope
3965     my $i;
3966 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3967     my $node = $self->{open_elements}->[$_];
3968 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3969     $i = $_;
3970     last INSCOPE;
3971     } elsif ({
3972     table => 1, html => 1,
3973     }->{$node->[1]}) {
3974     last INSCOPE;
3975     }
3976     } # INSCOPE
3977     unless (defined $i) {
3978 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3979 wakaba 1.1 ## Ignore the token
3980     !!!next-token;
3981     redo B;
3982     }
3983    
3984     ## Clear back to table row context
3985     while (not {
3986     tr => 1, html => 1,
3987 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3988     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3989     pop @{$self->{open_elements}};
3990 wakaba 1.1 }
3991    
3992 wakaba 1.3 pop @{$self->{open_elements}}; # tr
3993     $self->{insertion_mode} = 'in table body';
3994 wakaba 1.1 !!!next-token;
3995     redo B;
3996     } elsif ($token->{tag_name} eq 'table') {
3997     ## As if </tr>
3998     ## have an element in table scope
3999     my $i;
4000 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4001     my $node = $self->{open_elements}->[$_];
4002 wakaba 1.1 if ($node->[1] eq 'tr') {
4003     $i = $_;
4004     last INSCOPE;
4005     } elsif ({
4006     table => 1, html => 1,
4007     }->{$node->[1]}) {
4008     last INSCOPE;
4009     }
4010     } # INSCOPE
4011     unless (defined $i) {
4012 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4013 wakaba 1.1 ## Ignore the token
4014     !!!next-token;
4015     redo B;
4016     }
4017    
4018     ## Clear back to table row context
4019     while (not {
4020     tr => 1, html => 1,
4021 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4022     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4023     pop @{$self->{open_elements}};
4024 wakaba 1.1 }
4025    
4026 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4027     $self->{insertion_mode} = 'in table body';
4028 wakaba 1.1 ## reprocess
4029     redo B;
4030     } elsif ({
4031     tbody => 1, tfoot => 1, thead => 1,
4032     }->{$token->{tag_name}}) {
4033     ## have an element in table scope
4034     my $i;
4035 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4036     my $node = $self->{open_elements}->[$_];
4037 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4038     $i = $_;
4039     last INSCOPE;
4040     } elsif ({
4041     table => 1, html => 1,
4042     }->{$node->[1]}) {
4043     last INSCOPE;
4044     }
4045     } # INSCOPE
4046     unless (defined $i) {
4047 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4048 wakaba 1.1 ## Ignore the token
4049     !!!next-token;
4050     redo B;
4051     }
4052    
4053     ## As if </tr>
4054     ## have an element in table scope
4055     my $i;
4056 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4057     my $node = $self->{open_elements}->[$_];
4058 wakaba 1.1 if ($node->[1] eq 'tr') {
4059     $i = $_;
4060     last INSCOPE;
4061     } elsif ({
4062     table => 1, html => 1,
4063     }->{$node->[1]}) {
4064     last INSCOPE;
4065     }
4066     } # INSCOPE
4067     unless (defined $i) {
4068 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:tr');
4069 wakaba 1.1 ## Ignore the token
4070     !!!next-token;
4071     redo B;
4072     }
4073    
4074     ## Clear back to table row context
4075     while (not {
4076     tr => 1, html => 1,
4077 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4078     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4079     pop @{$self->{open_elements}};
4080 wakaba 1.1 }
4081    
4082 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4083     $self->{insertion_mode} = 'in table body';
4084 wakaba 1.1 ## reprocess
4085     redo B;
4086     } elsif ({
4087     body => 1, caption => 1, col => 1,
4088     colgroup => 1, html => 1, td => 1, th => 1,
4089     }->{$token->{tag_name}}) {
4090 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4091 wakaba 1.1 ## Ignore the token
4092     !!!next-token;
4093     redo B;
4094     } else {
4095     #
4096     }
4097     } else {
4098     #
4099     }
4100    
4101     ## As if in table
4102 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4103 wakaba 1.1 $in_body->($insert_to_foster);
4104     redo B;
4105 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
4106 wakaba 1.1 if ($token->{type} eq 'character') {
4107     ## NOTE: This is a code clone of "character in body".
4108     $reconstruct_active_formatting_elements->($insert_to_current);
4109    
4110 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4111 wakaba 1.1
4112     !!!next-token;
4113     redo B;
4114     } elsif ($token->{type} eq 'comment') {
4115     ## NOTE: This is a code clone of "comment in body".
4116     my $comment = $self->{document}->create_comment ($token->{data});
4117 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4118 wakaba 1.1 !!!next-token;
4119     redo B;
4120     } elsif ($token->{type} eq 'start tag') {
4121     if ({
4122     caption => 1, col => 1, colgroup => 1,
4123     tbody => 1, td => 1, tfoot => 1, th => 1,
4124     thead => 1, tr => 1,
4125     }->{$token->{tag_name}}) {
4126     ## have an element in table scope
4127     my $tn;
4128 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4129     my $node = $self->{open_elements}->[$_];
4130 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4131     $tn = $node->[1];
4132     last INSCOPE;
4133     } elsif ({
4134     table => 1, html => 1,
4135     }->{$node->[1]}) {
4136     last INSCOPE;
4137     }
4138     } # INSCOPE
4139     unless (defined $tn) {
4140 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4141 wakaba 1.1 ## Ignore the token
4142     !!!next-token;
4143     redo B;
4144     }
4145    
4146     ## Close the cell
4147     !!!back-token; # <?>
4148     $token = {type => 'end tag', tag_name => $tn};
4149     redo B;
4150     } else {
4151     #
4152     }
4153     } elsif ($token->{type} eq 'end tag') {
4154     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4155     ## have an element in table scope
4156     my $i;
4157 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4158     my $node = $self->{open_elements}->[$_];
4159 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4160     $i = $_;
4161     last INSCOPE;
4162     } elsif ({
4163     table => 1, html => 1,
4164     }->{$node->[1]}) {
4165     last INSCOPE;
4166     }
4167     } # INSCOPE
4168     unless (defined $i) {
4169 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4170 wakaba 1.1 ## Ignore the token
4171     !!!next-token;
4172     redo B;
4173     }
4174    
4175     ## generate implied end tags
4176     if ({
4177     dd => 1, dt => 1, li => 1, p => 1,
4178     td => ($token->{tag_name} eq 'th'),
4179     th => ($token->{tag_name} eq 'td'),
4180     tr => 1,
4181 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4182 wakaba 1.1 !!!back-token;
4183     $token = {type => 'end tag',
4184 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4185 wakaba 1.1 redo B;
4186     }
4187    
4188 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4189     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4190 wakaba 1.1 }
4191    
4192 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4193 wakaba 1.1
4194     $clear_up_to_marker->();
4195    
4196 wakaba 1.3 $self->{insertion_mode} = 'in row';
4197 wakaba 1.1
4198     !!!next-token;
4199     redo B;
4200     } elsif ({
4201     body => 1, caption => 1, col => 1,
4202     colgroup => 1, html => 1,
4203     }->{$token->{tag_name}}) {
4204 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4205 wakaba 1.1 ## Ignore the token
4206     !!!next-token;
4207     redo B;
4208     } elsif ({
4209     table => 1, tbody => 1, tfoot => 1,
4210     thead => 1, tr => 1,
4211     }->{$token->{tag_name}}) {
4212     ## have an element in table scope
4213     my $i;
4214     my $tn;
4215 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4216     my $node = $self->{open_elements}->[$_];
4217 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4218     $i = $_;
4219     last INSCOPE;
4220     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4221     $tn = $node->[1];
4222     ## NOTE: There is exactly one |td| or |th| element
4223     ## in scope in the stack of open elements by definition.
4224     } elsif ({
4225     table => 1, html => 1,
4226     }->{$node->[1]}) {
4227     last INSCOPE;
4228     }
4229     } # INSCOPE
4230     unless (defined $i) {
4231 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4232 wakaba 1.1 ## Ignore the token
4233     !!!next-token;
4234     redo B;
4235     }
4236    
4237     ## Close the cell
4238     !!!back-token; # </?>
4239     $token = {type => 'end tag', tag_name => $tn};
4240     redo B;
4241     } else {
4242     #
4243     }
4244     } else {
4245     #
4246     }
4247    
4248     $in_body->($insert_to_current);
4249     redo B;
4250 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
4251 wakaba 1.1 if ($token->{type} eq 'character') {
4252 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4253 wakaba 1.1 !!!next-token;
4254     redo B;
4255     } elsif ($token->{type} eq 'comment') {
4256     my $comment = $self->{document}->create_comment ($token->{data});
4257 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4258 wakaba 1.1 !!!next-token;
4259     redo B;
4260     } elsif ($token->{type} eq 'start tag') {
4261     if ($token->{tag_name} eq 'option') {
4262 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4263 wakaba 1.1 ## As if </option>
4264 wakaba 1.3 pop @{$self->{open_elements}};
4265 wakaba 1.1 }
4266    
4267     !!!insert-element ($token->{tag_name}, $token->{attributes});
4268     !!!next-token;
4269     redo B;
4270     } elsif ($token->{tag_name} eq 'optgroup') {
4271 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4272 wakaba 1.1 ## As if </option>
4273 wakaba 1.3 pop @{$self->{open_elements}};
4274 wakaba 1.1 }
4275    
4276 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4277 wakaba 1.1 ## As if </optgroup>
4278 wakaba 1.3 pop @{$self->{open_elements}};
4279 wakaba 1.1 }
4280    
4281     !!!insert-element ($token->{tag_name}, $token->{attributes});
4282     !!!next-token;
4283     redo B;
4284     } elsif ($token->{tag_name} eq 'select') {
4285 wakaba 1.3 !!!parse-error (type => 'not closed:select');
4286 wakaba 1.1 ## As if </select> instead
4287     ## have an element in table scope
4288     my $i;
4289 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4290     my $node = $self->{open_elements}->[$_];
4291 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4292     $i = $_;
4293     last INSCOPE;
4294     } elsif ({
4295     table => 1, html => 1,
4296     }->{$node->[1]}) {
4297     last INSCOPE;
4298     }
4299     } # INSCOPE
4300     unless (defined $i) {
4301 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4302 wakaba 1.1 ## Ignore the token
4303     !!!next-token;
4304     redo B;
4305     }
4306    
4307 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4308 wakaba 1.1
4309 wakaba 1.3 $self->_reset_insertion_mode;
4310 wakaba 1.1
4311     !!!next-token;
4312     redo B;
4313     } else {
4314     #
4315     }
4316     } elsif ($token->{type} eq 'end tag') {
4317     if ($token->{tag_name} eq 'optgroup') {
4318 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4319     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4320 wakaba 1.1 ## As if </option>
4321 wakaba 1.3 splice @{$self->{open_elements}}, -2;
4322     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4323     pop @{$self->{open_elements}};
4324 wakaba 1.1 } else {
4325 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4326 wakaba 1.1 ## Ignore the token
4327     }
4328     !!!next-token;
4329     redo B;
4330     } elsif ($token->{tag_name} eq 'option') {
4331 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4332     pop @{$self->{open_elements}};
4333 wakaba 1.1 } else {
4334 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4335 wakaba 1.1 ## Ignore the token
4336     }
4337     !!!next-token;
4338     redo B;
4339     } elsif ($token->{tag_name} eq 'select') {
4340     ## have an element in table scope
4341     my $i;
4342 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4343     my $node = $self->{open_elements}->[$_];
4344 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4345     $i = $_;
4346     last INSCOPE;
4347     } elsif ({
4348     table => 1, html => 1,
4349     }->{$node->[1]}) {
4350     last INSCOPE;
4351     }
4352     } # INSCOPE
4353     unless (defined $i) {
4354 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4355 wakaba 1.1 ## Ignore the token
4356     !!!next-token;
4357     redo B;
4358     }
4359    
4360 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4361 wakaba 1.1
4362 wakaba 1.3 $self->_reset_insertion_mode;
4363 wakaba 1.1
4364     !!!next-token;
4365     redo B;
4366     } elsif ({
4367     caption => 1, table => 1, tbody => 1,
4368     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4369     }->{$token->{tag_name}}) {
4370 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4371 wakaba 1.1
4372     ## have an element in table scope
4373     my $i;
4374 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4375     my $node = $self->{open_elements}->[$_];
4376 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4377     $i = $_;
4378     last INSCOPE;
4379     } elsif ({
4380     table => 1, html => 1,
4381     }->{$node->[1]}) {
4382     last INSCOPE;
4383     }
4384     } # INSCOPE
4385     unless (defined $i) {
4386     ## Ignore the token
4387     !!!next-token;
4388     redo B;
4389     }
4390    
4391     ## As if </select>
4392     ## have an element in table scope
4393     undef $i;
4394 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4395     my $node = $self->{open_elements}->[$_];
4396 wakaba 1.1 if ($node->[1] eq 'select') {
4397     $i = $_;
4398     last INSCOPE;
4399     } elsif ({
4400     table => 1, html => 1,
4401     }->{$node->[1]}) {
4402     last INSCOPE;
4403     }
4404     } # INSCOPE
4405     unless (defined $i) {
4406 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4407 wakaba 1.1 ## Ignore the </select> token
4408     !!!next-token; ## TODO: ok?
4409     redo B;
4410     }
4411    
4412 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4413 wakaba 1.1
4414 wakaba 1.3 $self->_reset_insertion_mode;
4415 wakaba 1.1
4416     ## reprocess
4417     redo B;
4418     } else {
4419     #
4420     }
4421     } else {
4422     #
4423     }
4424    
4425 wakaba 1.3 !!!parse-error (type => 'in select:'.$token->{tag_name});
4426 wakaba 1.1 ## Ignore the token
4427     !!!next-token;
4428     redo B;
4429 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
4430 wakaba 1.1 if ($token->{type} eq 'character') {
4431     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4432     ## As if in body
4433     $reconstruct_active_formatting_elements->($insert_to_current);
4434    
4435 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4436 wakaba 1.1
4437     unless (length $token->{data}) {
4438     !!!next-token;
4439     redo B;
4440     }
4441     }
4442    
4443     #
4444 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4445 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
4446     my $comment = $self->{document}->create_comment ($token->{data});
4447 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
4448 wakaba 1.1 !!!next-token;
4449     redo B;
4450 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
4451     !!!parse-error (type => 'after body:'.$token->{tag_name});
4452     #
4453 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
4454     if ($token->{tag_name} eq 'html') {
4455 wakaba 1.3 if (defined $self->{inner_html_node}) {
4456     !!!parse-error (type => 'unmatched end tag:html');
4457     ## Ignore the token
4458     !!!next-token;
4459     redo B;
4460     } else {
4461     $phase = 'trailing end';
4462     !!!next-token;
4463     redo B;
4464     }
4465 wakaba 1.1 } else {
4466 wakaba 1.3 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4467 wakaba 1.1 }
4468     } else {
4469 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4470 wakaba 1.1 }
4471    
4472 wakaba 1.3 $self->{insertion_mode} = 'in body';
4473 wakaba 1.1 ## reprocess
4474     redo B;
4475 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
4476 wakaba 1.1 if ($token->{type} eq 'character') {
4477     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4478 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4479 wakaba 1.1
4480     unless (length $token->{data}) {
4481     !!!next-token;
4482     redo B;
4483     }
4484     }
4485    
4486     #
4487     } elsif ($token->{type} eq 'comment') {
4488     my $comment = $self->{document}->create_comment ($token->{data});
4489 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4490 wakaba 1.1 !!!next-token;
4491     redo B;
4492     } elsif ($token->{type} eq 'start tag') {
4493     if ($token->{tag_name} eq 'frameset') {
4494     !!!insert-element ($token->{tag_name}, $token->{attributes});
4495     !!!next-token;
4496     redo B;
4497     } elsif ($token->{tag_name} eq 'frame') {
4498     !!!insert-element ($token->{tag_name}, $token->{attributes});
4499 wakaba 1.3 pop @{$self->{open_elements}};
4500 wakaba 1.1 !!!next-token;
4501     redo B;
4502     } elsif ($token->{tag_name} eq 'noframes') {
4503     $in_body->($insert_to_current);
4504     redo B;
4505     } else {
4506     #
4507     }
4508     } elsif ($token->{type} eq 'end tag') {
4509     if ($token->{tag_name} eq 'frameset') {
4510 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
4511     @{$self->{open_elements}} == 1) {
4512     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4513 wakaba 1.1 ## Ignore the token
4514     !!!next-token;
4515     } else {
4516 wakaba 1.3 pop @{$self->{open_elements}};
4517 wakaba 1.1 !!!next-token;
4518     }
4519    
4520     ## if not inner_html and
4521 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
4522     $self->{insertion_mode} = 'after frameset';
4523 wakaba 1.1 }
4524     redo B;
4525     } else {
4526     #
4527     }
4528     } else {
4529     #
4530     }
4531    
4532 wakaba 1.3 if (defined $token->{tag_name}) {
4533     !!!parse-error (type => 'in frameset:'.$token->{tag_name});
4534     } else {
4535     !!!parse-error (type => 'in frameset:#'.$token->{type});
4536     }
4537 wakaba 1.1 ## Ignore the token
4538     !!!next-token;
4539     redo B;
4540 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
4541 wakaba 1.1 if ($token->{type} eq 'character') {
4542     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4543 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4544 wakaba 1.1
4545     unless (length $token->{data}) {
4546     !!!next-token;
4547     redo B;
4548     }
4549     }
4550    
4551     #
4552     } elsif ($token->{type} eq 'comment') {
4553     my $comment = $self->{document}->create_comment ($token->{data});
4554 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4555 wakaba 1.1 !!!next-token;
4556     redo B;
4557     } elsif ($token->{type} eq 'start tag') {
4558     if ($token->{tag_name} eq 'noframes') {
4559     $in_body->($insert_to_current);
4560     redo B;
4561     } else {
4562     #
4563     }
4564     } elsif ($token->{type} eq 'end tag') {
4565     if ($token->{tag_name} eq 'html') {
4566     $phase = 'trailing end';
4567     !!!next-token;
4568     redo B;
4569     } else {
4570     #
4571     }
4572     } else {
4573     #
4574     }
4575    
4576 wakaba 1.3 if (defined $token->{tag_name}) {
4577     !!!parse-error (type => 'after frameset:'.$token->{tag_name});
4578     } else {
4579     !!!parse-error (type => 'after frameset:#'.$token->{type});
4580     }
4581 wakaba 1.1 ## Ignore the token
4582     !!!next-token;
4583     redo B;
4584    
4585     ## ISSUE: An issue in spec there
4586     } else {
4587 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4588 wakaba 1.1 }
4589     }
4590     } elsif ($phase eq 'trailing end') {
4591     ## states in the main stage is preserved yet # MUST
4592    
4593     if ($token->{type} eq 'DOCTYPE') {
4594 wakaba 1.3 !!!parse-error (type => 'after html:#DOCTYPE');
4595 wakaba 1.1 ## Ignore the token
4596     !!!next-token;
4597     redo B;
4598     } elsif ($token->{type} eq 'comment') {
4599     my $comment = $self->{document}->create_comment ($token->{data});
4600     $self->{document}->append_child ($comment);
4601     !!!next-token;
4602     redo B;
4603     } elsif ($token->{type} eq 'character') {
4604     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4605     my $data = $1;
4606     ## As if in the main phase.
4607     ## NOTE: The insertion mode in the main phase
4608     ## just before the phase has been changed to the trailing
4609     ## end phase is either "after body" or "after frameset".
4610     $reconstruct_active_formatting_elements->($insert_to_current)
4611     if $phase eq 'main';
4612    
4613 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
4614 wakaba 1.1
4615     unless (length $token->{data}) {
4616     !!!next-token;
4617     redo B;
4618     }
4619     }
4620    
4621 wakaba 1.3 !!!parse-error (type => 'after html:#character');
4622 wakaba 1.1 $phase = 'main';
4623     ## reprocess
4624     redo B;
4625     } elsif ($token->{type} eq 'start tag' or
4626     $token->{type} eq 'end tag') {
4627 wakaba 1.3 !!!parse-error (type => 'after html:'.$token->{tag_name});
4628 wakaba 1.1 $phase = 'main';
4629     ## reprocess
4630     redo B;
4631     } elsif ($token->{type} eq 'end-of-file') {
4632     ## Stop parsing
4633     last B;
4634     } else {
4635     die "$0: $token->{type}: Unknown token";
4636     }
4637     }
4638     } # B
4639    
4640     ## Stop parsing # MUST
4641    
4642     ## TODO: script stuffs
4643 wakaba 1.3 } # _tree_construct_main
4644    
4645     sub set_inner_html ($$$) {
4646     my $class = shift;
4647     my $node = shift;
4648     my $s = \$_[0];
4649     my $onerror = $_[1];
4650    
4651     my $nt = $node->node_type;
4652     if ($nt == 9) {
4653     # MUST
4654    
4655     ## Step 1 # MUST
4656     ## TODO: If the document has an active parser, ...
4657     ## ISSUE: There is an issue in the spec.
4658    
4659     ## Step 2 # MUST
4660     my @cn = @{$node->child_nodes};
4661     for (@cn) {
4662     $node->remove_child ($_);
4663     }
4664    
4665     ## Step 3, 4, 5 # MUST
4666     $class->parse_string ($$s => $node, $onerror);
4667     } elsif ($nt == 1) {
4668     ## TODO: If non-html element
4669    
4670     ## NOTE: Most of this code is copied from |parse_string|
4671    
4672     ## Step 1 # MUST
4673 wakaba 1.14 my $this_doc = $node->owner_document;
4674     my $doc = $this_doc->implementation->create_document;
4675 wakaba 1.3 ## TODO: Mark as HTML document
4676     my $p = $class->new;
4677     $p->{document} = $doc;
4678    
4679     ## Step 9 # MUST
4680     my $i = 0;
4681     my $line = 1;
4682     my $column = 0;
4683     $p->{set_next_input_character} = sub {
4684     my $self = shift;
4685 wakaba 1.14
4686     pop @{$self->{prev_input_character}};
4687     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
4688    
4689 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
4690     $self->{next_input_character} = ord substr $$s, $i++, 1;
4691     $column++;
4692 wakaba 1.4
4693     if ($self->{next_input_character} == 0x000A) { # LF
4694     $line++;
4695     $column = 0;
4696     } elsif ($self->{next_input_character} == 0x000D) { # CR
4697 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
4698 wakaba 1.3 $self->{next_input_character} = 0x000A; # LF # MUST
4699     $line++;
4700 wakaba 1.4 $column = 0;
4701 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
4702     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4703     } elsif ($self->{next_input_character} == 0x0000) { # NULL
4704 wakaba 1.14 !!!parse-error (type => 'NULL');
4705 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4706     }
4707     };
4708 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
4709     $p->{next_input_character} = -1;
4710 wakaba 1.3
4711     my $ponerror = $onerror || sub {
4712     my (%opt) = @_;
4713     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
4714     };
4715     $p->{parse_error} = sub {
4716     $ponerror->(@_, line => $line, column => $column);
4717     };
4718    
4719     $p->_initialize_tokenizer;
4720     $p->_initialize_tree_constructor;
4721    
4722     ## Step 2
4723     my $node_ln = $node->local_name;
4724     $p->{content_model_flag} = {
4725     title => 'RCDATA',
4726     textarea => 'RCDATA',
4727     style => 'CDATA',
4728     script => 'CDATA',
4729     xmp => 'CDATA',
4730     iframe => 'CDATA',
4731     noembed => 'CDATA',
4732     noframes => 'CDATA',
4733     noscript => 'CDATA',
4734     plaintext => 'PLAINTEXT',
4735     }->{$node_ln} || 'PCDATA';
4736     ## ISSUE: What is "the name of the element"? local name?
4737    
4738     $p->{inner_html_node} = [$node, $node_ln];
4739    
4740     ## Step 4
4741     my $root = $doc->create_element_ns
4742     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
4743    
4744     ## Step 5 # MUST
4745     $doc->append_child ($root);
4746    
4747     ## Step 6 # MUST
4748     push @{$p->{open_elements}}, [$root, 'html'];
4749    
4750     undef $p->{head_element};
4751    
4752     ## Step 7 # MUST
4753     $p->_reset_insertion_mode;
4754    
4755     ## Step 8 # MUST
4756     my $anode = $node;
4757     AN: while (defined $anode) {
4758     if ($anode->node_type == 1) {
4759     my $nsuri = $anode->namespace_uri;
4760     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
4761     if ($anode->local_name eq 'form') { ## TODO: case?
4762     $p->{form_element} = $anode;
4763     last AN;
4764     }
4765     }
4766     }
4767     $anode = $anode->parent_node;
4768     } # AN
4769    
4770     ## Step 3 # MUST
4771     ## Step 10 # MUST
4772     {
4773     my $self = $p;
4774     !!!next-token;
4775     }
4776     $p->_tree_construction_main;
4777    
4778     ## Step 11 # MUST
4779     my @cn = @{$node->child_nodes};
4780     for (@cn) {
4781     $node->remove_child ($_);
4782     }
4783     ## ISSUE: mutation events? read-only?
4784    
4785     ## Step 12 # MUST
4786     @cn = @{$root->child_nodes};
4787     for (@cn) {
4788 wakaba 1.14 $this_doc->adopt_node ($_);
4789 wakaba 1.3 $node->append_child ($_);
4790     }
4791 wakaba 1.14 ## ISSUE: mutation events?
4792 wakaba 1.3
4793     $p->_terminate_tree_constructor;
4794     } else {
4795     die "$0: |set_inner_html| is not defined for node of type $nt";
4796     }
4797     } # set_inner_html
4798    
4799     } # tree construction stage
4800 wakaba 1.1
4801     sub get_inner_html ($$$) {
4802 wakaba 1.3 my (undef, $node, $on_error) = @_;
4803 wakaba 1.1
4804     ## Step 1
4805     my $s = '';
4806    
4807     my $in_cdata;
4808     my $parent = $node;
4809     while (defined $parent) {
4810     if ($parent->node_type == 1 and
4811     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
4812     {
4813     style => 1, script => 1, xmp => 1, iframe => 1,
4814     noembed => 1, noframes => 1, noscript => 1,
4815     }->{$parent->local_name}) { ## TODO: case thingy
4816     $in_cdata = 1;
4817     }
4818     $parent = $parent->parent_node;
4819     }
4820    
4821     ## Step 2
4822     my @node = @{$node->child_nodes};
4823     C: while (@node) {
4824     my $child = shift @node;
4825     unless (ref $child) {
4826     if ($child eq 'cdata-out') {
4827     $in_cdata = 0;
4828     } else {
4829     $s .= $child; # end tag
4830     }
4831     next C;
4832     }
4833    
4834     my $nt = $child->node_type;
4835     if ($nt == 1) { # Element
4836     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
4837     $s .= '<' . $tag_name;
4838    
4839     ## ISSUE: Non-html elements
4840    
4841     my @attrs = @{$child->attributes}; # sort order MUST be stable
4842     for my $attr (@attrs) { # order is implementation dependent
4843     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
4844     $s .= ' ' . $attr_name . '="';
4845     my $attr_value = $attr->value;
4846     ## escape
4847     $attr_value =~ s/&/&amp;/g;
4848     $attr_value =~ s/</&lt;/g;
4849     $attr_value =~ s/>/&gt;/g;
4850     $attr_value =~ s/"/&quot;/g;
4851     $s .= $attr_value . '"';
4852     }
4853     $s .= '>';
4854    
4855     next C if {
4856     area => 1, base => 1, basefont => 1, bgsound => 1,
4857     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
4858     img => 1, input => 1, link => 1, meta => 1, param => 1,
4859     spacer => 1, wbr => 1,
4860     }->{$tag_name};
4861    
4862     if (not $in_cdata and {
4863     style => 1, script => 1, xmp => 1, iframe => 1,
4864     noembed => 1, noframes => 1, noscript => 1,
4865     }->{$tag_name}) {
4866     unshift @node, 'cdata-out';
4867     $in_cdata = 1;
4868     }
4869    
4870     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
4871     } elsif ($nt == 3 or $nt == 4) {
4872     if ($in_cdata) {
4873     $s .= $child->data;
4874     } else {
4875     my $value = $child->data;
4876     $value =~ s/&/&amp;/g;
4877     $value =~ s/</&lt;/g;
4878     $value =~ s/>/&gt;/g;
4879     $value =~ s/"/&quot;/g;
4880     $s .= $value;
4881     }
4882     } elsif ($nt == 8) {
4883     $s .= '<!--' . $child->data . '-->';
4884     } elsif ($nt == 10) {
4885     $s .= '<!DOCTYPE ' . $child->name . '>';
4886     } elsif ($nt == 5) { # entrefs
4887     push @node, @{$child->child_nodes};
4888     } else {
4889     $on_error->($child) if defined $on_error;
4890     }
4891     ## ISSUE: This code does not support PIs.
4892     } # C
4893    
4894     ## Step 3
4895     return \$s;
4896     } # get_inner_html
4897    
4898     1;
4899 wakaba 1.16 # $Date: 2007/06/23 06:48:24 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24