/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.30 - (hide annotations) (download) (as text)
Sat Jun 30 13:12:32 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.29: +12 -11 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	30 Jun 2007 12:28:52 -0000
2007-06-30  Wakaba  <wakaba@suika.fam.cx>

	* URIChecker.t: Error level names in test results has
	been changed.

	* tokenizer-test-1.test: A test for bogus SYSTEM identifier
	is added.

	* content-model-1.dat, content-model-2.dat, content-model-3.dat,
	content-model-4.dat: Error messages has been changed.

	* ContentChecker.t: Appends error level to the error
	message if any.

++ whatpm/Whatpm/ChangeLog	30 Jun 2007 13:03:50 -0000
2007-06-30  Wakaba  <wakaba@suika.fam.cx>

	* IMTChecker.pm: Report warning for unregistered
	and private types/subtypes.

	* ContentChecker.pm, HTML.pm.src, IMTChecker.pm,
	URIChecker.pm, HTMLTable.pm: Error messages are now
	consistent; they are all listed in
	<http://suika.fam.cx/gate/2005/sw/Whatpm%20Error%20Types>.

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.30 our $VERSION=do{my @r=(q$Revision: 1.29 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 ## ISSUE:
6     ## var doc = implementation.createDocument (null, null, null);
7     ## doc.write ('');
8     ## alert (doc.compatMode);
9 wakaba 1.1
10     my $permitted_slash_tag_name = {
11     base => 1,
12     link => 1,
13     meta => 1,
14     hr => 1,
15     br => 1,
16     img=> 1,
17     embed => 1,
18     param => 1,
19     area => 1,
20     col => 1,
21     input => 1,
22     };
23    
24 wakaba 1.4 my $c1_entity_char = {
25 wakaba 1.10 0x80 => 0x20AC,
26     0x81 => 0xFFFD,
27     0x82 => 0x201A,
28     0x83 => 0x0192,
29     0x84 => 0x201E,
30     0x85 => 0x2026,
31     0x86 => 0x2020,
32     0x87 => 0x2021,
33     0x88 => 0x02C6,
34     0x89 => 0x2030,
35     0x8A => 0x0160,
36     0x8B => 0x2039,
37     0x8C => 0x0152,
38     0x8D => 0xFFFD,
39     0x8E => 0x017D,
40     0x8F => 0xFFFD,
41     0x90 => 0xFFFD,
42     0x91 => 0x2018,
43     0x92 => 0x2019,
44     0x93 => 0x201C,
45     0x94 => 0x201D,
46     0x95 => 0x2022,
47     0x96 => 0x2013,
48     0x97 => 0x2014,
49     0x98 => 0x02DC,
50     0x99 => 0x2122,
51     0x9A => 0x0161,
52     0x9B => 0x203A,
53     0x9C => 0x0153,
54     0x9D => 0xFFFD,
55     0x9E => 0x017E,
56     0x9F => 0x0178,
57 wakaba 1.4 }; # $c1_entity_char
58 wakaba 1.1
59     my $special_category = {
60     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
61     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
62     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
63     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
64     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
65     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
66     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
67     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
68     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
69     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
70     };
71     my $scoping_category = {
72     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
73     table => 1, td => 1, th => 1,
74     };
75     my $formatting_category = {
76     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
77     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
78     };
79     # $phrasing_category: all other elements
80    
81     sub parse_string ($$$;$) {
82     my $self = shift->new;
83     my $s = \$_[0];
84     $self->{document} = $_[1];
85    
86 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
87    
88 wakaba 1.1 my $i = 0;
89 wakaba 1.3 my $line = 1;
90     my $column = 0;
91 wakaba 1.1 $self->{set_next_input_character} = sub {
92     my $self = shift;
93 wakaba 1.13
94     pop @{$self->{prev_input_character}};
95     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
96    
97 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
98     $self->{next_input_character} = ord substr $$s, $i++, 1;
99 wakaba 1.3 $column++;
100 wakaba 1.1
101 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
102     $line++;
103     $column = 0;
104     } elsif ($self->{next_input_character} == 0x000D) { # CR
105 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
106 wakaba 1.1 $self->{next_input_character} = 0x000A; # LF # MUST
107 wakaba 1.3 $line++;
108 wakaba 1.4 $column = 0;
109 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
110     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
111     } elsif ($self->{next_input_character} == 0x0000) { # NULL
112 wakaba 1.8 !!!parse-error (type => 'NULL');
113 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
114     }
115     };
116 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
117     $self->{next_input_character} = -1;
118 wakaba 1.1
119 wakaba 1.3 my $onerror = $_[2] || sub {
120     my (%opt) = @_;
121     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
122     };
123     $self->{parse_error} = sub {
124     $onerror->(@_, line => $line, column => $column);
125 wakaba 1.1 };
126    
127     $self->_initialize_tokenizer;
128     $self->_initialize_tree_constructor;
129     $self->_construct_tree;
130     $self->_terminate_tree_constructor;
131    
132     return $self->{document};
133     } # parse_string
134    
135     sub new ($) {
136     my $class = shift;
137     my $self = bless {}, $class;
138     $self->{set_next_input_character} = sub {
139     $self->{next_input_character} = -1;
140     };
141     $self->{parse_error} = sub {
142     #
143     };
144     return $self;
145     } # new
146    
147     ## Implementations MUST act as if state machine in the spec
148    
149     sub _initialize_tokenizer ($) {
150     my $self = shift;
151     $self->{state} = 'data'; # MUST
152     $self->{content_model_flag} = 'PCDATA'; # be
153     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
154     undef $self->{current_attribute};
155     undef $self->{last_emitted_start_tag_name};
156     undef $self->{last_attribute_value_state};
157     $self->{char} = [];
158     # $self->{next_input_character}
159     !!!next-input-character;
160     $self->{token} = [];
161 wakaba 1.18 # $self->{escape}
162 wakaba 1.1 } # _initialize_tokenizer
163    
164     ## A token has:
165     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
166     ## 'character', or 'end-of-file'
167 wakaba 1.18 ## ->{name} (DOCTYPE, start tag (tag name), end tag (tag name))
168     ## ->{public_identifier} (DOCTYPE)
169     ## ->{system_identifier} (DOCTYPE)
170     ## ->{correct} == 1 or 0 (DOCTYPE)
171 wakaba 1.1 ## ->{attributes} isa HASH (start tag, end tag)
172     ## ->{data} (comment, character)
173    
174     ## Emitted token MUST immediately be handled by the tree construction state.
175    
176     ## Before each step, UA MAY check to see if either one of the scripts in
177     ## "list of scripts that will execute as soon as possible" or the first
178     ## script in the "list of scripts that will execute asynchronously",
179     ## has completed loading. If one has, then it MUST be executed
180     ## and removed from the list.
181    
182     sub _get_next_token ($) {
183     my $self = shift;
184     if (@{$self->{token}}) {
185     return shift @{$self->{token}};
186     }
187    
188     A: {
189     if ($self->{state} eq 'data') {
190     if ($self->{next_input_character} == 0x0026) { # &
191     if ($self->{content_model_flag} eq 'PCDATA' or
192     $self->{content_model_flag} eq 'RCDATA') {
193     $self->{state} = 'entity data';
194     !!!next-input-character;
195     redo A;
196     } else {
197     #
198     }
199 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
200     if ($self->{content_model_flag} eq 'RCDATA' or
201     $self->{content_model_flag} eq 'CDATA') {
202     unless ($self->{escape}) {
203     if ($self->{prev_input_character}->[0] == 0x002D and # -
204     $self->{prev_input_character}->[1] == 0x0021 and # !
205     $self->{prev_input_character}->[2] == 0x003C) { # <
206     $self->{escape} = 1;
207     }
208     }
209     }
210    
211     #
212 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
213 wakaba 1.13 if ($self->{content_model_flag} eq 'PCDATA' or
214     (($self->{content_model_flag} eq 'CDATA' or
215     $self->{content_model_flag} eq 'RCDATA') and
216     not $self->{escape})) {
217 wakaba 1.1 $self->{state} = 'tag open';
218     !!!next-input-character;
219     redo A;
220     } else {
221     #
222     }
223 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
224     if ($self->{escape} and
225     ($self->{content_model_flag} eq 'RCDATA' or
226     $self->{content_model_flag} eq 'CDATA')) {
227     if ($self->{prev_input_character}->[0] == 0x002D and # -
228     $self->{prev_input_character}->[1] == 0x002D) { # -
229     delete $self->{escape};
230     }
231     }
232    
233     #
234 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
235     !!!emit ({type => 'end-of-file'});
236     last A; ## TODO: ok?
237     }
238     # Anything else
239     my $token = {type => 'character',
240     data => chr $self->{next_input_character}};
241     ## Stay in the data state
242     !!!next-input-character;
243    
244     !!!emit ($token);
245    
246     redo A;
247     } elsif ($self->{state} eq 'entity data') {
248     ## (cannot happen in CDATA state)
249    
250 wakaba 1.26 my $token = $self->_tokenize_attempt_to_consume_an_entity (0);
251 wakaba 1.1
252     $self->{state} = 'data';
253     # next-input-character is already done
254    
255     unless (defined $token) {
256     !!!emit ({type => 'character', data => '&'});
257     } else {
258     !!!emit ($token);
259     }
260    
261     redo A;
262     } elsif ($self->{state} eq 'tag open') {
263     if ($self->{content_model_flag} eq 'RCDATA' or
264     $self->{content_model_flag} eq 'CDATA') {
265     if ($self->{next_input_character} == 0x002F) { # /
266     !!!next-input-character;
267     $self->{state} = 'close tag open';
268     redo A;
269     } else {
270     ## reconsume
271     $self->{state} = 'data';
272    
273     !!!emit ({type => 'character', data => '<'});
274    
275     redo A;
276     }
277     } elsif ($self->{content_model_flag} eq 'PCDATA') {
278     if ($self->{next_input_character} == 0x0021) { # !
279     $self->{state} = 'markup declaration open';
280     !!!next-input-character;
281     redo A;
282     } elsif ($self->{next_input_character} == 0x002F) { # /
283     $self->{state} = 'close tag open';
284     !!!next-input-character;
285     redo A;
286     } elsif (0x0041 <= $self->{next_input_character} and
287     $self->{next_input_character} <= 0x005A) { # A..Z
288     $self->{current_token}
289     = {type => 'start tag',
290     tag_name => chr ($self->{next_input_character} + 0x0020)};
291     $self->{state} = 'tag name';
292     !!!next-input-character;
293     redo A;
294     } elsif (0x0061 <= $self->{next_input_character} and
295     $self->{next_input_character} <= 0x007A) { # a..z
296     $self->{current_token} = {type => 'start tag',
297     tag_name => chr ($self->{next_input_character})};
298     $self->{state} = 'tag name';
299     !!!next-input-character;
300     redo A;
301     } elsif ($self->{next_input_character} == 0x003E) { # >
302 wakaba 1.3 !!!parse-error (type => 'empty start tag');
303 wakaba 1.1 $self->{state} = 'data';
304     !!!next-input-character;
305    
306     !!!emit ({type => 'character', data => '<>'});
307    
308     redo A;
309     } elsif ($self->{next_input_character} == 0x003F) { # ?
310 wakaba 1.3 !!!parse-error (type => 'pio');
311 wakaba 1.1 $self->{state} = 'bogus comment';
312     ## $self->{next_input_character} is intentionally left as is
313     redo A;
314     } else {
315 wakaba 1.3 !!!parse-error (type => 'bare stago');
316 wakaba 1.1 $self->{state} = 'data';
317     ## reconsume
318    
319     !!!emit ({type => 'character', data => '<'});
320    
321     redo A;
322     }
323     } else {
324     die "$0: $self->{content_model_flag}: Unknown content model flag";
325     }
326     } elsif ($self->{state} eq 'close tag open') {
327     if ($self->{content_model_flag} eq 'RCDATA' or
328     $self->{content_model_flag} eq 'CDATA') {
329 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
330 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
331 wakaba 1.23 my @next_char;
332     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
333     push @next_char, $self->{next_input_character};
334     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
335     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
336     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
337     !!!next-input-character;
338     next TAGNAME;
339     } else {
340     $self->{next_input_character} = shift @next_char; # reconsume
341     !!!back-next-input-character (@next_char);
342     $self->{state} = 'data';
343    
344     !!!emit ({type => 'character', data => '</'});
345    
346     redo A;
347     }
348     }
349 wakaba 1.1 push @next_char, $self->{next_input_character};
350 wakaba 1.23
351     unless ($self->{next_input_character} == 0x0009 or # HT
352     $self->{next_input_character} == 0x000A or # LF
353     $self->{next_input_character} == 0x000B or # VT
354     $self->{next_input_character} == 0x000C or # FF
355     $self->{next_input_character} == 0x0020 or # SP
356     $self->{next_input_character} == 0x003E or # >
357     $self->{next_input_character} == 0x002F or # /
358     $self->{next_input_character} == -1) {
359 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
360     !!!back-next-input-character (@next_char);
361     $self->{state} = 'data';
362     !!!emit ({type => 'character', data => '</'});
363     redo A;
364 wakaba 1.23 } else {
365     $self->{next_input_character} = shift @next_char;
366     !!!back-next-input-character (@next_char);
367     # and consume...
368 wakaba 1.1 }
369 wakaba 1.23 } else {
370     ## No start tag token has ever been emitted
371     # next-input-character is already done
372 wakaba 1.1 $self->{state} = 'data';
373     !!!emit ({type => 'character', data => '</'});
374     redo A;
375     }
376     }
377    
378     if (0x0041 <= $self->{next_input_character} and
379     $self->{next_input_character} <= 0x005A) { # A..Z
380     $self->{current_token} = {type => 'end tag',
381     tag_name => chr ($self->{next_input_character} + 0x0020)};
382     $self->{state} = 'tag name';
383     !!!next-input-character;
384     redo A;
385     } elsif (0x0061 <= $self->{next_input_character} and
386     $self->{next_input_character} <= 0x007A) { # a..z
387     $self->{current_token} = {type => 'end tag',
388     tag_name => chr ($self->{next_input_character})};
389     $self->{state} = 'tag name';
390     !!!next-input-character;
391     redo A;
392     } elsif ($self->{next_input_character} == 0x003E) { # >
393 wakaba 1.3 !!!parse-error (type => 'empty end tag');
394 wakaba 1.1 $self->{state} = 'data';
395     !!!next-input-character;
396     redo A;
397     } elsif ($self->{next_input_character} == -1) {
398 wakaba 1.3 !!!parse-error (type => 'bare etago');
399 wakaba 1.1 $self->{state} = 'data';
400     # reconsume
401    
402     !!!emit ({type => 'character', data => '</'});
403    
404     redo A;
405     } else {
406 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
407 wakaba 1.1 $self->{state} = 'bogus comment';
408     ## $self->{next_input_character} is intentionally left as is
409     redo A;
410     }
411     } elsif ($self->{state} eq 'tag name') {
412     if ($self->{next_input_character} == 0x0009 or # HT
413     $self->{next_input_character} == 0x000A or # LF
414     $self->{next_input_character} == 0x000B or # VT
415     $self->{next_input_character} == 0x000C or # FF
416     $self->{next_input_character} == 0x0020) { # SP
417     $self->{state} = 'before attribute name';
418     !!!next-input-character;
419     redo A;
420     } elsif ($self->{next_input_character} == 0x003E) { # >
421     if ($self->{current_token}->{type} eq 'start tag') {
422 wakaba 1.28 $self->{current_token}->{first_start_tag}
423     = not defined $self->{last_emitted_start_tag_name};
424 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
425     } elsif ($self->{current_token}->{type} eq 'end tag') {
426     $self->{content_model_flag} = 'PCDATA'; # MUST
427     if ($self->{current_token}->{attributes}) {
428 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
429 wakaba 1.1 }
430     } else {
431     die "$0: $self->{current_token}->{type}: Unknown token type";
432     }
433     $self->{state} = 'data';
434     !!!next-input-character;
435    
436     !!!emit ($self->{current_token}); # start tag or end tag
437    
438     redo A;
439     } elsif (0x0041 <= $self->{next_input_character} and
440     $self->{next_input_character} <= 0x005A) { # A..Z
441     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
442     # start tag or end tag
443     ## Stay in this state
444     !!!next-input-character;
445     redo A;
446 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
447 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
448 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
449 wakaba 1.28 $self->{current_token}->{first_start_tag}
450     = not defined $self->{last_emitted_start_tag_name};
451 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
452     } elsif ($self->{current_token}->{type} eq 'end tag') {
453     $self->{content_model_flag} = 'PCDATA'; # MUST
454     if ($self->{current_token}->{attributes}) {
455 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
456 wakaba 1.1 }
457     } else {
458     die "$0: $self->{current_token}->{type}: Unknown token type";
459     }
460     $self->{state} = 'data';
461     # reconsume
462    
463     !!!emit ($self->{current_token}); # start tag or end tag
464    
465     redo A;
466     } elsif ($self->{next_input_character} == 0x002F) { # /
467     !!!next-input-character;
468     if ($self->{next_input_character} == 0x003E and # >
469     $self->{current_token}->{type} eq 'start tag' and
470     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
471     # permitted slash
472     #
473     } else {
474 wakaba 1.3 !!!parse-error (type => 'nestc');
475 wakaba 1.1 }
476     $self->{state} = 'before attribute name';
477     # next-input-character is already done
478     redo A;
479     } else {
480     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
481     # start tag or end tag
482     ## Stay in the state
483     !!!next-input-character;
484     redo A;
485     }
486     } elsif ($self->{state} eq 'before attribute name') {
487     if ($self->{next_input_character} == 0x0009 or # HT
488     $self->{next_input_character} == 0x000A or # LF
489     $self->{next_input_character} == 0x000B or # VT
490     $self->{next_input_character} == 0x000C or # FF
491     $self->{next_input_character} == 0x0020) { # SP
492     ## Stay in the state
493     !!!next-input-character;
494     redo A;
495     } elsif ($self->{next_input_character} == 0x003E) { # >
496     if ($self->{current_token}->{type} eq 'start tag') {
497 wakaba 1.28 $self->{current_token}->{first_start_tag}
498     = not defined $self->{last_emitted_start_tag_name};
499 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
500     } elsif ($self->{current_token}->{type} eq 'end tag') {
501     $self->{content_model_flag} = 'PCDATA'; # MUST
502     if ($self->{current_token}->{attributes}) {
503 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
504 wakaba 1.1 }
505     } else {
506     die "$0: $self->{current_token}->{type}: Unknown token type";
507     }
508     $self->{state} = 'data';
509     !!!next-input-character;
510    
511     !!!emit ($self->{current_token}); # start tag or end tag
512    
513     redo A;
514     } elsif (0x0041 <= $self->{next_input_character} and
515     $self->{next_input_character} <= 0x005A) { # A..Z
516     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
517     value => ''};
518     $self->{state} = 'attribute name';
519     !!!next-input-character;
520     redo A;
521     } elsif ($self->{next_input_character} == 0x002F) { # /
522     !!!next-input-character;
523     if ($self->{next_input_character} == 0x003E and # >
524     $self->{current_token}->{type} eq 'start tag' and
525     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
526     # permitted slash
527     #
528     } else {
529 wakaba 1.3 !!!parse-error (type => 'nestc');
530 wakaba 1.1 }
531     ## Stay in the state
532     # next-input-character is already done
533     redo A;
534 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
535 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
536 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
537 wakaba 1.28 $self->{current_token}->{first_start_tag}
538     = not defined $self->{last_emitted_start_tag_name};
539 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
540     } elsif ($self->{current_token}->{type} eq 'end tag') {
541     $self->{content_model_flag} = 'PCDATA'; # MUST
542     if ($self->{current_token}->{attributes}) {
543 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
544 wakaba 1.1 }
545     } else {
546     die "$0: $self->{current_token}->{type}: Unknown token type";
547     }
548     $self->{state} = 'data';
549     # reconsume
550    
551     !!!emit ($self->{current_token}); # start tag or end tag
552    
553     redo A;
554     } else {
555     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
556     value => ''};
557     $self->{state} = 'attribute name';
558     !!!next-input-character;
559     redo A;
560     }
561     } elsif ($self->{state} eq 'attribute name') {
562     my $before_leave = sub {
563     if (exists $self->{current_token}->{attributes} # start tag or end tag
564     ->{$self->{current_attribute}->{name}}) { # MUST
565 wakaba 1.3 !!!parse-error (type => 'dupulicate attribute');
566 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
567     } else {
568     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
569     = $self->{current_attribute};
570     }
571     }; # $before_leave
572    
573     if ($self->{next_input_character} == 0x0009 or # HT
574     $self->{next_input_character} == 0x000A or # LF
575     $self->{next_input_character} == 0x000B or # VT
576     $self->{next_input_character} == 0x000C or # FF
577     $self->{next_input_character} == 0x0020) { # SP
578     $before_leave->();
579     $self->{state} = 'after attribute name';
580     !!!next-input-character;
581     redo A;
582     } elsif ($self->{next_input_character} == 0x003D) { # =
583     $before_leave->();
584     $self->{state} = 'before attribute value';
585     !!!next-input-character;
586     redo A;
587     } elsif ($self->{next_input_character} == 0x003E) { # >
588     $before_leave->();
589     if ($self->{current_token}->{type} eq 'start tag') {
590 wakaba 1.28 $self->{current_token}->{first_start_tag}
591     = not defined $self->{last_emitted_start_tag_name};
592 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
593     } elsif ($self->{current_token}->{type} eq 'end tag') {
594     $self->{content_model_flag} = 'PCDATA'; # MUST
595     if ($self->{current_token}->{attributes}) {
596 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
597 wakaba 1.1 }
598     } else {
599     die "$0: $self->{current_token}->{type}: Unknown token type";
600     }
601     $self->{state} = 'data';
602     !!!next-input-character;
603    
604     !!!emit ($self->{current_token}); # start tag or end tag
605    
606     redo A;
607     } elsif (0x0041 <= $self->{next_input_character} and
608     $self->{next_input_character} <= 0x005A) { # A..Z
609     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
610     ## Stay in the state
611     !!!next-input-character;
612     redo A;
613     } elsif ($self->{next_input_character} == 0x002F) { # /
614     $before_leave->();
615     !!!next-input-character;
616     if ($self->{next_input_character} == 0x003E and # >
617     $self->{current_token}->{type} eq 'start tag' and
618     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
619     # permitted slash
620     #
621     } else {
622 wakaba 1.3 !!!parse-error (type => 'nestc');
623 wakaba 1.1 }
624     $self->{state} = 'before attribute name';
625     # next-input-character is already done
626     redo A;
627 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
628 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
629 wakaba 1.1 $before_leave->();
630     if ($self->{current_token}->{type} eq 'start tag') {
631 wakaba 1.28 $self->{current_token}->{first_start_tag}
632     = not defined $self->{last_emitted_start_tag_name};
633 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
634     } elsif ($self->{current_token}->{type} eq 'end tag') {
635     $self->{content_model_flag} = 'PCDATA'; # MUST
636     if ($self->{current_token}->{attributes}) {
637 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
638 wakaba 1.1 }
639     } else {
640     die "$0: $self->{current_token}->{type}: Unknown token type";
641     }
642     $self->{state} = 'data';
643     # reconsume
644    
645     !!!emit ($self->{current_token}); # start tag or end tag
646    
647     redo A;
648     } else {
649     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
650     ## Stay in the state
651     !!!next-input-character;
652     redo A;
653     }
654     } elsif ($self->{state} eq 'after attribute name') {
655     if ($self->{next_input_character} == 0x0009 or # HT
656     $self->{next_input_character} == 0x000A or # LF
657     $self->{next_input_character} == 0x000B or # VT
658     $self->{next_input_character} == 0x000C or # FF
659     $self->{next_input_character} == 0x0020) { # SP
660     ## Stay in the state
661     !!!next-input-character;
662     redo A;
663     } elsif ($self->{next_input_character} == 0x003D) { # =
664     $self->{state} = 'before attribute value';
665     !!!next-input-character;
666     redo A;
667     } elsif ($self->{next_input_character} == 0x003E) { # >
668     if ($self->{current_token}->{type} eq 'start tag') {
669 wakaba 1.28 $self->{current_token}->{first_start_tag}
670     = not defined $self->{last_emitted_start_tag_name};
671 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
672     } elsif ($self->{current_token}->{type} eq 'end tag') {
673     $self->{content_model_flag} = 'PCDATA'; # MUST
674     if ($self->{current_token}->{attributes}) {
675 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
676 wakaba 1.1 }
677     } else {
678     die "$0: $self->{current_token}->{type}: Unknown token type";
679     }
680     $self->{state} = 'data';
681     !!!next-input-character;
682    
683     !!!emit ($self->{current_token}); # start tag or end tag
684    
685     redo A;
686     } elsif (0x0041 <= $self->{next_input_character} and
687     $self->{next_input_character} <= 0x005A) { # A..Z
688     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
689     value => ''};
690     $self->{state} = 'attribute name';
691     !!!next-input-character;
692     redo A;
693     } elsif ($self->{next_input_character} == 0x002F) { # /
694     !!!next-input-character;
695     if ($self->{next_input_character} == 0x003E and # >
696     $self->{current_token}->{type} eq 'start tag' and
697     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
698     # permitted slash
699     #
700     } else {
701 wakaba 1.3 !!!parse-error (type => 'nestc');
702 wakaba 1.1 }
703     $self->{state} = 'before attribute name';
704     # next-input-character is already done
705     redo A;
706 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
707 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
708 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
709 wakaba 1.28 $self->{current_token}->{first_start_tag}
710     = not defined $self->{last_emitted_start_tag_name};
711 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
712     } elsif ($self->{current_token}->{type} eq 'end tag') {
713     $self->{content_model_flag} = 'PCDATA'; # MUST
714     if ($self->{current_token}->{attributes}) {
715 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
716 wakaba 1.1 }
717     } else {
718     die "$0: $self->{current_token}->{type}: Unknown token type";
719     }
720     $self->{state} = 'data';
721     # reconsume
722    
723     !!!emit ($self->{current_token}); # start tag or end tag
724    
725     redo A;
726     } else {
727     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
728     value => ''};
729     $self->{state} = 'attribute name';
730     !!!next-input-character;
731     redo A;
732     }
733     } elsif ($self->{state} eq 'before attribute value') {
734     if ($self->{next_input_character} == 0x0009 or # HT
735     $self->{next_input_character} == 0x000A or # LF
736     $self->{next_input_character} == 0x000B or # VT
737     $self->{next_input_character} == 0x000C or # FF
738     $self->{next_input_character} == 0x0020) { # SP
739     ## Stay in the state
740     !!!next-input-character;
741     redo A;
742     } elsif ($self->{next_input_character} == 0x0022) { # "
743     $self->{state} = 'attribute value (double-quoted)';
744     !!!next-input-character;
745     redo A;
746     } elsif ($self->{next_input_character} == 0x0026) { # &
747     $self->{state} = 'attribute value (unquoted)';
748     ## reconsume
749     redo A;
750     } elsif ($self->{next_input_character} == 0x0027) { # '
751     $self->{state} = 'attribute value (single-quoted)';
752     !!!next-input-character;
753     redo A;
754     } elsif ($self->{next_input_character} == 0x003E) { # >
755     if ($self->{current_token}->{type} eq 'start tag') {
756 wakaba 1.28 $self->{current_token}->{first_start_tag}
757     = not defined $self->{last_emitted_start_tag_name};
758 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
759     } elsif ($self->{current_token}->{type} eq 'end tag') {
760     $self->{content_model_flag} = 'PCDATA'; # MUST
761     if ($self->{current_token}->{attributes}) {
762 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
763 wakaba 1.1 }
764     } else {
765     die "$0: $self->{current_token}->{type}: Unknown token type";
766     }
767     $self->{state} = 'data';
768     !!!next-input-character;
769    
770     !!!emit ($self->{current_token}); # start tag or end tag
771    
772     redo A;
773 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
774 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
775 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
776 wakaba 1.28 $self->{current_token}->{first_start_tag}
777     = not defined $self->{last_emitted_start_tag_name};
778 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
779     } elsif ($self->{current_token}->{type} eq 'end tag') {
780     $self->{content_model_flag} = 'PCDATA'; # MUST
781     if ($self->{current_token}->{attributes}) {
782 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
783 wakaba 1.1 }
784     } else {
785     die "$0: $self->{current_token}->{type}: Unknown token type";
786     }
787     $self->{state} = 'data';
788     ## reconsume
789    
790     !!!emit ($self->{current_token}); # start tag or end tag
791    
792     redo A;
793     } else {
794     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
795     $self->{state} = 'attribute value (unquoted)';
796     !!!next-input-character;
797     redo A;
798     }
799     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
800     if ($self->{next_input_character} == 0x0022) { # "
801     $self->{state} = 'before attribute name';
802     !!!next-input-character;
803     redo A;
804     } elsif ($self->{next_input_character} == 0x0026) { # &
805     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
806     $self->{state} = 'entity in attribute value';
807     !!!next-input-character;
808     redo A;
809     } elsif ($self->{next_input_character} == -1) {
810 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
811 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
812 wakaba 1.28 $self->{current_token}->{first_start_tag}
813     = not defined $self->{last_emitted_start_tag_name};
814 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
815     } elsif ($self->{current_token}->{type} eq 'end tag') {
816     $self->{content_model_flag} = 'PCDATA'; # MUST
817     if ($self->{current_token}->{attributes}) {
818 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
819 wakaba 1.1 }
820     } else {
821     die "$0: $self->{current_token}->{type}: Unknown token type";
822     }
823     $self->{state} = 'data';
824     ## reconsume
825    
826     !!!emit ($self->{current_token}); # start tag or end tag
827    
828     redo A;
829     } else {
830     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
831     ## Stay in the state
832     !!!next-input-character;
833     redo A;
834     }
835     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
836     if ($self->{next_input_character} == 0x0027) { # '
837     $self->{state} = 'before attribute name';
838     !!!next-input-character;
839     redo A;
840     } elsif ($self->{next_input_character} == 0x0026) { # &
841     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
842     $self->{state} = 'entity in attribute value';
843     !!!next-input-character;
844     redo A;
845     } elsif ($self->{next_input_character} == -1) {
846 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
847 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
848 wakaba 1.28 $self->{current_token}->{first_start_tag}
849     = not defined $self->{last_emitted_start_tag_name};
850 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
851     } elsif ($self->{current_token}->{type} eq 'end tag') {
852     $self->{content_model_flag} = 'PCDATA'; # MUST
853     if ($self->{current_token}->{attributes}) {
854 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
855 wakaba 1.1 }
856     } else {
857     die "$0: $self->{current_token}->{type}: Unknown token type";
858     }
859     $self->{state} = 'data';
860     ## reconsume
861    
862     !!!emit ($self->{current_token}); # start tag or end tag
863    
864     redo A;
865     } else {
866     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
867     ## Stay in the state
868     !!!next-input-character;
869     redo A;
870     }
871     } elsif ($self->{state} eq 'attribute value (unquoted)') {
872     if ($self->{next_input_character} == 0x0009 or # HT
873     $self->{next_input_character} == 0x000A or # LF
874     $self->{next_input_character} == 0x000B or # HT
875     $self->{next_input_character} == 0x000C or # FF
876     $self->{next_input_character} == 0x0020) { # SP
877     $self->{state} = 'before attribute name';
878     !!!next-input-character;
879     redo A;
880     } elsif ($self->{next_input_character} == 0x0026) { # &
881     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
882     $self->{state} = 'entity in attribute value';
883     !!!next-input-character;
884     redo A;
885     } elsif ($self->{next_input_character} == 0x003E) { # >
886     if ($self->{current_token}->{type} eq 'start tag') {
887 wakaba 1.28 $self->{current_token}->{first_start_tag}
888     = not defined $self->{last_emitted_start_tag_name};
889 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
890     } elsif ($self->{current_token}->{type} eq 'end tag') {
891     $self->{content_model_flag} = 'PCDATA'; # MUST
892     if ($self->{current_token}->{attributes}) {
893 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
894 wakaba 1.1 }
895     } else {
896     die "$0: $self->{current_token}->{type}: Unknown token type";
897     }
898     $self->{state} = 'data';
899     !!!next-input-character;
900    
901     !!!emit ($self->{current_token}); # start tag or end tag
902    
903     redo A;
904 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
905 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
906 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
907 wakaba 1.28 $self->{current_token}->{first_start_tag}
908     = not defined $self->{last_emitted_start_tag_name};
909 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
910     } elsif ($self->{current_token}->{type} eq 'end tag') {
911     $self->{content_model_flag} = 'PCDATA'; # MUST
912     if ($self->{current_token}->{attributes}) {
913 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
914 wakaba 1.1 }
915     } else {
916     die "$0: $self->{current_token}->{type}: Unknown token type";
917     }
918     $self->{state} = 'data';
919     ## reconsume
920    
921     !!!emit ($self->{current_token}); # start tag or end tag
922    
923     redo A;
924     } else {
925     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
926     ## Stay in the state
927     !!!next-input-character;
928     redo A;
929     }
930     } elsif ($self->{state} eq 'entity in attribute value') {
931 wakaba 1.26 my $token = $self->_tokenize_attempt_to_consume_an_entity (1);
932 wakaba 1.1
933     unless (defined $token) {
934     $self->{current_attribute}->{value} .= '&';
935     } else {
936     $self->{current_attribute}->{value} .= $token->{data};
937     ## ISSUE: spec says "append the returned character token to the current attribute's value"
938     }
939    
940     $self->{state} = $self->{last_attribute_value_state};
941     # next-input-character is already done
942     redo A;
943     } elsif ($self->{state} eq 'bogus comment') {
944     ## (only happen if PCDATA state)
945    
946     my $token = {type => 'comment', data => ''};
947    
948     BC: {
949     if ($self->{next_input_character} == 0x003E) { # >
950     $self->{state} = 'data';
951     !!!next-input-character;
952    
953     !!!emit ($token);
954    
955     redo A;
956     } elsif ($self->{next_input_character} == -1) {
957     $self->{state} = 'data';
958     ## reconsume
959    
960     !!!emit ($token);
961    
962     redo A;
963     } else {
964     $token->{data} .= chr ($self->{next_input_character});
965     !!!next-input-character;
966     redo BC;
967     }
968     } # BC
969     } elsif ($self->{state} eq 'markup declaration open') {
970     ## (only happen if PCDATA state)
971    
972     my @next_char;
973     push @next_char, $self->{next_input_character};
974    
975     if ($self->{next_input_character} == 0x002D) { # -
976     !!!next-input-character;
977     push @next_char, $self->{next_input_character};
978     if ($self->{next_input_character} == 0x002D) { # -
979     $self->{current_token} = {type => 'comment', data => ''};
980 wakaba 1.23 $self->{state} = 'comment start';
981 wakaba 1.1 !!!next-input-character;
982     redo A;
983     }
984     } elsif ($self->{next_input_character} == 0x0044 or # D
985     $self->{next_input_character} == 0x0064) { # d
986     !!!next-input-character;
987     push @next_char, $self->{next_input_character};
988     if ($self->{next_input_character} == 0x004F or # O
989     $self->{next_input_character} == 0x006F) { # o
990     !!!next-input-character;
991     push @next_char, $self->{next_input_character};
992     if ($self->{next_input_character} == 0x0043 or # C
993     $self->{next_input_character} == 0x0063) { # c
994     !!!next-input-character;
995     push @next_char, $self->{next_input_character};
996     if ($self->{next_input_character} == 0x0054 or # T
997     $self->{next_input_character} == 0x0074) { # t
998     !!!next-input-character;
999     push @next_char, $self->{next_input_character};
1000     if ($self->{next_input_character} == 0x0059 or # Y
1001     $self->{next_input_character} == 0x0079) { # y
1002     !!!next-input-character;
1003     push @next_char, $self->{next_input_character};
1004     if ($self->{next_input_character} == 0x0050 or # P
1005     $self->{next_input_character} == 0x0070) { # p
1006     !!!next-input-character;
1007     push @next_char, $self->{next_input_character};
1008     if ($self->{next_input_character} == 0x0045 or # E
1009     $self->{next_input_character} == 0x0065) { # e
1010     ## ISSUE: What a stupid code this is!
1011     $self->{state} = 'DOCTYPE';
1012     !!!next-input-character;
1013     redo A;
1014     }
1015     }
1016     }
1017     }
1018     }
1019     }
1020     }
1021    
1022 wakaba 1.30 !!!parse-error (type => 'bogus comment');
1023 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1024     !!!back-next-input-character (@next_char);
1025     $self->{state} = 'bogus comment';
1026     redo A;
1027    
1028     ## ISSUE: typos in spec: chacacters, is is a parse error
1029     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1030 wakaba 1.23 } elsif ($self->{state} eq 'comment start') {
1031     if ($self->{next_input_character} == 0x002D) { # -
1032     $self->{state} = 'comment start dash';
1033     !!!next-input-character;
1034     redo A;
1035     } elsif ($self->{next_input_character} == 0x003E) { # >
1036     !!!parse-error (type => 'bogus comment');
1037     $self->{state} = 'data';
1038     !!!next-input-character;
1039    
1040     !!!emit ($self->{current_token}); # comment
1041    
1042     redo A;
1043     } elsif ($self->{next_input_character} == -1) {
1044     !!!parse-error (type => 'unclosed comment');
1045     $self->{state} = 'data';
1046     ## reconsume
1047    
1048     !!!emit ($self->{current_token}); # comment
1049    
1050     redo A;
1051     } else {
1052     $self->{current_token}->{data} # comment
1053     .= chr ($self->{next_input_character});
1054     $self->{state} = 'comment';
1055     !!!next-input-character;
1056     redo A;
1057     }
1058     } elsif ($self->{state} eq 'comment start dash') {
1059     if ($self->{next_input_character} == 0x002D) { # -
1060     $self->{state} = 'comment end';
1061     !!!next-input-character;
1062     redo A;
1063     } elsif ($self->{next_input_character} == 0x003E) { # >
1064     !!!parse-error (type => 'bogus comment');
1065     $self->{state} = 'data';
1066     !!!next-input-character;
1067    
1068     !!!emit ($self->{current_token}); # comment
1069    
1070     redo A;
1071     } elsif ($self->{next_input_character} == -1) {
1072     !!!parse-error (type => 'unclosed comment');
1073     $self->{state} = 'data';
1074     ## reconsume
1075    
1076     !!!emit ($self->{current_token}); # comment
1077    
1078     redo A;
1079     } else {
1080     $self->{current_token}->{data} # comment
1081     .= chr ($self->{next_input_character});
1082     $self->{state} = 'comment';
1083     !!!next-input-character;
1084     redo A;
1085     }
1086 wakaba 1.1 } elsif ($self->{state} eq 'comment') {
1087     if ($self->{next_input_character} == 0x002D) { # -
1088 wakaba 1.23 $self->{state} = 'comment end dash';
1089 wakaba 1.1 !!!next-input-character;
1090     redo A;
1091     } elsif ($self->{next_input_character} == -1) {
1092 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1093 wakaba 1.1 $self->{state} = 'data';
1094     ## reconsume
1095    
1096     !!!emit ($self->{current_token}); # comment
1097    
1098     redo A;
1099     } else {
1100     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1101     ## Stay in the state
1102     !!!next-input-character;
1103     redo A;
1104     }
1105 wakaba 1.23 } elsif ($self->{state} eq 'comment end dash') {
1106 wakaba 1.1 if ($self->{next_input_character} == 0x002D) { # -
1107     $self->{state} = 'comment end';
1108     !!!next-input-character;
1109     redo A;
1110     } elsif ($self->{next_input_character} == -1) {
1111 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1112 wakaba 1.1 $self->{state} = 'data';
1113     ## reconsume
1114    
1115     !!!emit ($self->{current_token}); # comment
1116    
1117     redo A;
1118     } else {
1119     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1120     $self->{state} = 'comment';
1121     !!!next-input-character;
1122     redo A;
1123     }
1124     } elsif ($self->{state} eq 'comment end') {
1125     if ($self->{next_input_character} == 0x003E) { # >
1126     $self->{state} = 'data';
1127     !!!next-input-character;
1128    
1129     !!!emit ($self->{current_token}); # comment
1130    
1131     redo A;
1132     } elsif ($self->{next_input_character} == 0x002D) { # -
1133 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1134 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1135     ## Stay in the state
1136     !!!next-input-character;
1137     redo A;
1138     } elsif ($self->{next_input_character} == -1) {
1139 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1140 wakaba 1.1 $self->{state} = 'data';
1141     ## reconsume
1142    
1143     !!!emit ($self->{current_token}); # comment
1144    
1145     redo A;
1146     } else {
1147 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1148 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1149     $self->{state} = 'comment';
1150     !!!next-input-character;
1151     redo A;
1152     }
1153     } elsif ($self->{state} eq 'DOCTYPE') {
1154     if ($self->{next_input_character} == 0x0009 or # HT
1155     $self->{next_input_character} == 0x000A or # LF
1156     $self->{next_input_character} == 0x000B or # VT
1157     $self->{next_input_character} == 0x000C or # FF
1158     $self->{next_input_character} == 0x0020) { # SP
1159     $self->{state} = 'before DOCTYPE name';
1160     !!!next-input-character;
1161     redo A;
1162     } else {
1163 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1164 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1165     ## reconsume
1166     redo A;
1167     }
1168     } elsif ($self->{state} eq 'before DOCTYPE name') {
1169     if ($self->{next_input_character} == 0x0009 or # HT
1170     $self->{next_input_character} == 0x000A or # LF
1171     $self->{next_input_character} == 0x000B or # VT
1172     $self->{next_input_character} == 0x000C or # FF
1173     $self->{next_input_character} == 0x0020) { # SP
1174     ## Stay in the state
1175     !!!next-input-character;
1176     redo A;
1177     } elsif ($self->{next_input_character} == 0x003E) { # >
1178 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1179 wakaba 1.1 $self->{state} = 'data';
1180     !!!next-input-character;
1181    
1182 wakaba 1.18 !!!emit ({type => 'DOCTYPE'}); # incorrect
1183 wakaba 1.1
1184     redo A;
1185     } elsif ($self->{next_input_character} == -1) {
1186 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1187 wakaba 1.1 $self->{state} = 'data';
1188     ## reconsume
1189    
1190 wakaba 1.18 !!!emit ({type => 'DOCTYPE'}); # incorrect
1191 wakaba 1.1
1192     redo A;
1193     } else {
1194 wakaba 1.18 $self->{current_token}
1195     = {type => 'DOCTYPE',
1196     name => chr ($self->{next_input_character}),
1197     correct => 1};
1198 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1199 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1200     !!!next-input-character;
1201     redo A;
1202     }
1203     } elsif ($self->{state} eq 'DOCTYPE name') {
1204 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
1205 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1206     $self->{next_input_character} == 0x000A or # LF
1207     $self->{next_input_character} == 0x000B or # VT
1208     $self->{next_input_character} == 0x000C or # FF
1209     $self->{next_input_character} == 0x0020) { # SP
1210     $self->{state} = 'after DOCTYPE name';
1211     !!!next-input-character;
1212     redo A;
1213     } elsif ($self->{next_input_character} == 0x003E) { # >
1214     $self->{state} = 'data';
1215     !!!next-input-character;
1216    
1217     !!!emit ($self->{current_token}); # DOCTYPE
1218    
1219     redo A;
1220     } elsif ($self->{next_input_character} == -1) {
1221 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1222 wakaba 1.1 $self->{state} = 'data';
1223     ## reconsume
1224    
1225 wakaba 1.18 delete $self->{current_token}->{correct};
1226     !!!emit ($self->{current_token}); # DOCTYPE
1227 wakaba 1.1
1228     redo A;
1229     } else {
1230     $self->{current_token}->{name}
1231     .= chr ($self->{next_input_character}); # DOCTYPE
1232     ## Stay in the state
1233     !!!next-input-character;
1234     redo A;
1235     }
1236     } elsif ($self->{state} eq 'after DOCTYPE name') {
1237     if ($self->{next_input_character} == 0x0009 or # HT
1238     $self->{next_input_character} == 0x000A or # LF
1239     $self->{next_input_character} == 0x000B or # VT
1240     $self->{next_input_character} == 0x000C or # FF
1241     $self->{next_input_character} == 0x0020) { # SP
1242     ## Stay in the state
1243     !!!next-input-character;
1244     redo A;
1245     } elsif ($self->{next_input_character} == 0x003E) { # >
1246     $self->{state} = 'data';
1247     !!!next-input-character;
1248    
1249     !!!emit ($self->{current_token}); # DOCTYPE
1250    
1251     redo A;
1252     } elsif ($self->{next_input_character} == -1) {
1253 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1254 wakaba 1.1 $self->{state} = 'data';
1255     ## reconsume
1256    
1257 wakaba 1.18 delete $self->{current_token}->{correct};
1258     !!!emit ($self->{current_token}); # DOCTYPE
1259    
1260     redo A;
1261     } elsif ($self->{next_input_character} == 0x0050 or # P
1262     $self->{next_input_character} == 0x0070) { # p
1263     !!!next-input-character;
1264     if ($self->{next_input_character} == 0x0055 or # U
1265     $self->{next_input_character} == 0x0075) { # u
1266     !!!next-input-character;
1267     if ($self->{next_input_character} == 0x0042 or # B
1268     $self->{next_input_character} == 0x0062) { # b
1269     !!!next-input-character;
1270     if ($self->{next_input_character} == 0x004C or # L
1271     $self->{next_input_character} == 0x006C) { # l
1272     !!!next-input-character;
1273     if ($self->{next_input_character} == 0x0049 or # I
1274     $self->{next_input_character} == 0x0069) { # i
1275     !!!next-input-character;
1276     if ($self->{next_input_character} == 0x0043 or # C
1277     $self->{next_input_character} == 0x0063) { # c
1278     $self->{state} = 'before DOCTYPE public identifier';
1279     !!!next-input-character;
1280     redo A;
1281     }
1282     }
1283     }
1284     }
1285     }
1286    
1287     #
1288     } elsif ($self->{next_input_character} == 0x0053 or # S
1289     $self->{next_input_character} == 0x0073) { # s
1290     !!!next-input-character;
1291     if ($self->{next_input_character} == 0x0059 or # Y
1292     $self->{next_input_character} == 0x0079) { # y
1293     !!!next-input-character;
1294     if ($self->{next_input_character} == 0x0053 or # S
1295     $self->{next_input_character} == 0x0073) { # s
1296     !!!next-input-character;
1297     if ($self->{next_input_character} == 0x0054 or # T
1298     $self->{next_input_character} == 0x0074) { # t
1299     !!!next-input-character;
1300     if ($self->{next_input_character} == 0x0045 or # E
1301     $self->{next_input_character} == 0x0065) { # e
1302     !!!next-input-character;
1303     if ($self->{next_input_character} == 0x004D or # M
1304     $self->{next_input_character} == 0x006D) { # m
1305     $self->{state} = 'before DOCTYPE system identifier';
1306     !!!next-input-character;
1307     redo A;
1308     }
1309     }
1310     }
1311     }
1312     }
1313    
1314     #
1315     } else {
1316     !!!next-input-character;
1317     #
1318     }
1319    
1320     !!!parse-error (type => 'string after DOCTYPE name');
1321     $self->{state} = 'bogus DOCTYPE';
1322     # next-input-character is already done
1323     redo A;
1324     } elsif ($self->{state} eq 'before DOCTYPE public identifier') {
1325     if ({
1326     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1327     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1328     }->{$self->{next_input_character}}) {
1329     ## Stay in the state
1330     !!!next-input-character;
1331     redo A;
1332     } elsif ($self->{next_input_character} eq 0x0022) { # "
1333     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1334     $self->{state} = 'DOCTYPE public identifier (double-quoted)';
1335     !!!next-input-character;
1336     redo A;
1337     } elsif ($self->{next_input_character} eq 0x0027) { # '
1338     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1339     $self->{state} = 'DOCTYPE public identifier (single-quoted)';
1340     !!!next-input-character;
1341     redo A;
1342     } elsif ($self->{next_input_character} eq 0x003E) { # >
1343     !!!parse-error (type => 'no PUBLIC literal');
1344    
1345     $self->{state} = 'data';
1346     !!!next-input-character;
1347    
1348     delete $self->{current_token}->{correct};
1349     !!!emit ($self->{current_token}); # DOCTYPE
1350    
1351     redo A;
1352     } elsif ($self->{next_input_character} == -1) {
1353     !!!parse-error (type => 'unclosed DOCTYPE');
1354    
1355     $self->{state} = 'data';
1356     ## reconsume
1357    
1358     delete $self->{current_token}->{correct};
1359     !!!emit ($self->{current_token}); # DOCTYPE
1360    
1361     redo A;
1362     } else {
1363     !!!parse-error (type => 'string after PUBLIC');
1364     $self->{state} = 'bogus DOCTYPE';
1365     !!!next-input-character;
1366     redo A;
1367     }
1368     } elsif ($self->{state} eq 'DOCTYPE public identifier (double-quoted)') {
1369     if ($self->{next_input_character} == 0x0022) { # "
1370     $self->{state} = 'after DOCTYPE public identifier';
1371     !!!next-input-character;
1372     redo A;
1373     } elsif ($self->{next_input_character} == -1) {
1374     !!!parse-error (type => 'unclosed PUBLIC literal');
1375    
1376     $self->{state} = 'data';
1377     ## reconsume
1378    
1379     delete $self->{current_token}->{correct};
1380     !!!emit ($self->{current_token}); # DOCTYPE
1381    
1382     redo A;
1383     } else {
1384     $self->{current_token}->{public_identifier} # DOCTYPE
1385     .= chr $self->{next_input_character};
1386     ## Stay in the state
1387     !!!next-input-character;
1388     redo A;
1389     }
1390     } elsif ($self->{state} eq 'DOCTYPE public identifier (single-quoted)') {
1391     if ($self->{next_input_character} == 0x0027) { # '
1392     $self->{state} = 'after DOCTYPE public identifier';
1393     !!!next-input-character;
1394     redo A;
1395     } elsif ($self->{next_input_character} == -1) {
1396     !!!parse-error (type => 'unclosed PUBLIC literal');
1397    
1398     $self->{state} = 'data';
1399     ## reconsume
1400    
1401     delete $self->{current_token}->{correct};
1402     !!!emit ($self->{current_token}); # DOCTYPE
1403    
1404     redo A;
1405     } else {
1406     $self->{current_token}->{public_identifier} # DOCTYPE
1407     .= chr $self->{next_input_character};
1408     ## Stay in the state
1409     !!!next-input-character;
1410     redo A;
1411     }
1412     } elsif ($self->{state} eq 'after DOCTYPE public identifier') {
1413     if ({
1414     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1415     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1416     }->{$self->{next_input_character}}) {
1417     ## Stay in the state
1418     !!!next-input-character;
1419     redo A;
1420     } elsif ($self->{next_input_character} == 0x0022) { # "
1421     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1422     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
1423     !!!next-input-character;
1424     redo A;
1425     } elsif ($self->{next_input_character} == 0x0027) { # '
1426     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1427     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
1428     !!!next-input-character;
1429     redo A;
1430     } elsif ($self->{next_input_character} == 0x003E) { # >
1431     $self->{state} = 'data';
1432     !!!next-input-character;
1433    
1434     !!!emit ($self->{current_token}); # DOCTYPE
1435    
1436     redo A;
1437     } elsif ($self->{next_input_character} == -1) {
1438     !!!parse-error (type => 'unclosed DOCTYPE');
1439    
1440     $self->{state} = 'data';
1441 wakaba 1.26 ## reconsume
1442 wakaba 1.18
1443     delete $self->{current_token}->{correct};
1444     !!!emit ($self->{current_token}); # DOCTYPE
1445    
1446     redo A;
1447     } else {
1448     !!!parse-error (type => 'string after PUBLIC literal');
1449     $self->{state} = 'bogus DOCTYPE';
1450     !!!next-input-character;
1451     redo A;
1452     }
1453     } elsif ($self->{state} eq 'before DOCTYPE system identifier') {
1454     if ({
1455     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1456     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1457     }->{$self->{next_input_character}}) {
1458     ## Stay in the state
1459     !!!next-input-character;
1460     redo A;
1461     } elsif ($self->{next_input_character} == 0x0022) { # "
1462     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1463     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
1464     !!!next-input-character;
1465     redo A;
1466     } elsif ($self->{next_input_character} == 0x0027) { # '
1467     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1468     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
1469     !!!next-input-character;
1470     redo A;
1471     } elsif ($self->{next_input_character} == 0x003E) { # >
1472     !!!parse-error (type => 'no SYSTEM literal');
1473     $self->{state} = 'data';
1474     !!!next-input-character;
1475    
1476     delete $self->{current_token}->{correct};
1477     !!!emit ($self->{current_token}); # DOCTYPE
1478    
1479     redo A;
1480     } elsif ($self->{next_input_character} == -1) {
1481     !!!parse-error (type => 'unclosed DOCTYPE');
1482    
1483     $self->{state} = 'data';
1484 wakaba 1.26 ## reconsume
1485 wakaba 1.18
1486     delete $self->{current_token}->{correct};
1487     !!!emit ($self->{current_token}); # DOCTYPE
1488    
1489     redo A;
1490     } else {
1491 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
1492 wakaba 1.18 $self->{state} = 'bogus DOCTYPE';
1493     !!!next-input-character;
1494     redo A;
1495     }
1496     } elsif ($self->{state} eq 'DOCTYPE system identifier (double-quoted)') {
1497     if ($self->{next_input_character} == 0x0022) { # "
1498     $self->{state} = 'after DOCTYPE system identifier';
1499     !!!next-input-character;
1500     redo A;
1501     } elsif ($self->{next_input_character} == -1) {
1502     !!!parse-error (type => 'unclosed SYSTEM literal');
1503    
1504     $self->{state} = 'data';
1505     ## reconsume
1506    
1507     delete $self->{current_token}->{correct};
1508     !!!emit ($self->{current_token}); # DOCTYPE
1509    
1510     redo A;
1511     } else {
1512     $self->{current_token}->{system_identifier} # DOCTYPE
1513     .= chr $self->{next_input_character};
1514     ## Stay in the state
1515     !!!next-input-character;
1516     redo A;
1517     }
1518     } elsif ($self->{state} eq 'DOCTYPE system identifier (single-quoted)') {
1519     if ($self->{next_input_character} == 0x0027) { # '
1520     $self->{state} = 'after DOCTYPE system identifier';
1521     !!!next-input-character;
1522     redo A;
1523     } elsif ($self->{next_input_character} == -1) {
1524     !!!parse-error (type => 'unclosed SYSTEM literal');
1525    
1526     $self->{state} = 'data';
1527     ## reconsume
1528    
1529     delete $self->{current_token}->{correct};
1530 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
1531    
1532     redo A;
1533     } else {
1534 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
1535     .= chr $self->{next_input_character};
1536     ## Stay in the state
1537     !!!next-input-character;
1538     redo A;
1539     }
1540     } elsif ($self->{state} eq 'after DOCTYPE system identifier') {
1541     if ({
1542     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1543     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1544     }->{$self->{next_input_character}}) {
1545     ## Stay in the state
1546     !!!next-input-character;
1547     redo A;
1548     } elsif ($self->{next_input_character} == 0x003E) { # >
1549     $self->{state} = 'data';
1550     !!!next-input-character;
1551    
1552     !!!emit ($self->{current_token}); # DOCTYPE
1553    
1554     redo A;
1555     } elsif ($self->{next_input_character} == -1) {
1556     !!!parse-error (type => 'unclosed DOCTYPE');
1557    
1558     $self->{state} = 'data';
1559 wakaba 1.26 ## reconsume
1560 wakaba 1.18
1561     delete $self->{current_token}->{correct};
1562     !!!emit ($self->{current_token}); # DOCTYPE
1563    
1564     redo A;
1565     } else {
1566     !!!parse-error (type => 'string after SYSTEM literal');
1567 wakaba 1.1 $self->{state} = 'bogus DOCTYPE';
1568     !!!next-input-character;
1569     redo A;
1570     }
1571     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1572     if ($self->{next_input_character} == 0x003E) { # >
1573     $self->{state} = 'data';
1574     !!!next-input-character;
1575    
1576 wakaba 1.18 delete $self->{current_token}->{correct};
1577 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
1578    
1579     redo A;
1580     } elsif ($self->{next_input_character} == -1) {
1581 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1582 wakaba 1.1 $self->{state} = 'data';
1583     ## reconsume
1584    
1585 wakaba 1.18 delete $self->{current_token}->{correct};
1586 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
1587    
1588     redo A;
1589     } else {
1590     ## Stay in the state
1591     !!!next-input-character;
1592     redo A;
1593     }
1594     } else {
1595     die "$0: $self->{state}: Unknown state";
1596     }
1597     } # A
1598    
1599     die "$0: _get_next_token: unexpected case";
1600     } # _get_next_token
1601    
1602 wakaba 1.26 sub _tokenize_attempt_to_consume_an_entity ($$) {
1603     my ($self, $in_attr) = @_;
1604 wakaba 1.20
1605     if ({
1606     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
1607     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
1608     }->{$self->{next_input_character}}) {
1609     ## Don't consume
1610     ## No error
1611     return undef;
1612     } elsif ($self->{next_input_character} == 0x0023) { # #
1613 wakaba 1.1 !!!next-input-character;
1614     if ($self->{next_input_character} == 0x0078 or # x
1615     $self->{next_input_character} == 0x0058) { # X
1616 wakaba 1.26 my $code;
1617 wakaba 1.1 X: {
1618     my $x_char = $self->{next_input_character};
1619     !!!next-input-character;
1620     if (0x0030 <= $self->{next_input_character} and
1621     $self->{next_input_character} <= 0x0039) { # 0..9
1622 wakaba 1.26 $code ||= 0;
1623     $code *= 0x10;
1624     $code += $self->{next_input_character} - 0x0030;
1625 wakaba 1.1 redo X;
1626     } elsif (0x0061 <= $self->{next_input_character} and
1627     $self->{next_input_character} <= 0x0066) { # a..f
1628 wakaba 1.26 $code ||= 0;
1629     $code *= 0x10;
1630     $code += $self->{next_input_character} - 0x0060 + 9;
1631 wakaba 1.1 redo X;
1632     } elsif (0x0041 <= $self->{next_input_character} and
1633     $self->{next_input_character} <= 0x0046) { # A..F
1634 wakaba 1.26 $code ||= 0;
1635     $code *= 0x10;
1636     $code += $self->{next_input_character} - 0x0040 + 9;
1637 wakaba 1.1 redo X;
1638 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
1639 wakaba 1.3 !!!parse-error (type => 'bare hcro');
1640 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
1641     !!!back-next-input-character ($x_char);
1642     return undef;
1643     } elsif ($self->{next_input_character} == 0x003B) { # ;
1644     !!!next-input-character;
1645     } else {
1646 wakaba 1.3 !!!parse-error (type => 'no refc');
1647 wakaba 1.1 }
1648    
1649 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
1650     !!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code);
1651     $code = 0xFFFD;
1652     } elsif ($code > 0x10FFFF) {
1653     !!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code);
1654     $code = 0xFFFD;
1655     } elsif ($code == 0x000D) {
1656     !!!parse-error (type => 'CR character reference');
1657     $code = 0x000A;
1658     } elsif (0x80 <= $code and $code <= 0x9F) {
1659 wakaba 1.30 !!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code);
1660 wakaba 1.26 $code = $c1_entity_char->{$code};
1661 wakaba 1.1 }
1662    
1663 wakaba 1.26 return {type => 'character', data => chr $code};
1664 wakaba 1.1 } # X
1665     } elsif (0x0030 <= $self->{next_input_character} and
1666     $self->{next_input_character} <= 0x0039) { # 0..9
1667     my $code = $self->{next_input_character} - 0x0030;
1668     !!!next-input-character;
1669    
1670     while (0x0030 <= $self->{next_input_character} and
1671     $self->{next_input_character} <= 0x0039) { # 0..9
1672     $code *= 10;
1673     $code += $self->{next_input_character} - 0x0030;
1674    
1675     !!!next-input-character;
1676     }
1677    
1678     if ($self->{next_input_character} == 0x003B) { # ;
1679     !!!next-input-character;
1680     } else {
1681 wakaba 1.3 !!!parse-error (type => 'no refc');
1682 wakaba 1.1 }
1683    
1684 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
1685     !!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code);
1686     $code = 0xFFFD;
1687     } elsif ($code > 0x10FFFF) {
1688     !!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code);
1689     $code = 0xFFFD;
1690     } elsif ($code == 0x000D) {
1691     !!!parse-error (type => 'CR character reference');
1692     $code = 0x000A;
1693 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
1694 wakaba 1.30 !!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code);
1695 wakaba 1.4 $code = $c1_entity_char->{$code};
1696 wakaba 1.1 }
1697    
1698     return {type => 'character', data => chr $code};
1699     } else {
1700 wakaba 1.3 !!!parse-error (type => 'bare nero');
1701 wakaba 1.1 !!!back-next-input-character ($self->{next_input_character});
1702     $self->{next_input_character} = 0x0023; # #
1703     return undef;
1704     }
1705     } elsif ((0x0041 <= $self->{next_input_character} and
1706     $self->{next_input_character} <= 0x005A) or
1707     (0x0061 <= $self->{next_input_character} and
1708     $self->{next_input_character} <= 0x007A)) {
1709     my $entity_name = chr $self->{next_input_character};
1710     !!!next-input-character;
1711    
1712     my $value = $entity_name;
1713     my $match;
1714 wakaba 1.16 require Whatpm::_NamedEntityList;
1715     our $EntityChar;
1716 wakaba 1.1
1717     while (length $entity_name < 10 and
1718     ## NOTE: Some number greater than the maximum length of entity name
1719 wakaba 1.16 ((0x0041 <= $self->{next_input_character} and # a
1720     $self->{next_input_character} <= 0x005A) or # x
1721     (0x0061 <= $self->{next_input_character} and # a
1722     $self->{next_input_character} <= 0x007A) or # z
1723     (0x0030 <= $self->{next_input_character} and # 0
1724     $self->{next_input_character} <= 0x0039) or # 9
1725     $self->{next_input_character} == 0x003B)) { # ;
1726 wakaba 1.1 $entity_name .= chr $self->{next_input_character};
1727 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
1728     if ($self->{next_input_character} == 0x003B) { # ;
1729 wakaba 1.26 $value = $EntityChar->{$entity_name};
1730 wakaba 1.16 $match = 1;
1731     !!!next-input-character;
1732     last;
1733 wakaba 1.26 } elsif (not $in_attr) {
1734     $value = $EntityChar->{$entity_name};
1735     $match = -1;
1736 wakaba 1.16 } else {
1737 wakaba 1.26 $value .= chr $self->{next_input_character};
1738 wakaba 1.16 }
1739 wakaba 1.1 } else {
1740     $value .= chr $self->{next_input_character};
1741     }
1742     !!!next-input-character;
1743     }
1744    
1745 wakaba 1.16 if ($match > 0) {
1746     return {type => 'character', data => $value};
1747     } elsif ($match < 0) {
1748 wakaba 1.30 !!!parse-error (type => 'no refc');
1749 wakaba 1.1 return {type => 'character', data => $value};
1750     } else {
1751 wakaba 1.3 !!!parse-error (type => 'bare ero');
1752 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
1753 wakaba 1.26 return {type => 'character', data => '&'.$value};
1754 wakaba 1.1 }
1755     } else {
1756     ## no characters are consumed
1757 wakaba 1.3 !!!parse-error (type => 'bare ero');
1758 wakaba 1.1 return undef;
1759     }
1760     } # _tokenize_attempt_to_consume_an_entity
1761    
1762     sub _initialize_tree_constructor ($) {
1763     my $self = shift;
1764     ## NOTE: $self->{document} MUST be specified before this method is called
1765     $self->{document}->strict_error_checking (0);
1766     ## TODO: Turn mutation events off # MUST
1767     ## TODO: Turn loose Document option (manakai extension) on
1768 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
1769 wakaba 1.1 } # _initialize_tree_constructor
1770    
1771     sub _terminate_tree_constructor ($) {
1772     my $self = shift;
1773     $self->{document}->strict_error_checking (1);
1774     ## TODO: Turn mutation events on
1775     } # _terminate_tree_constructor
1776    
1777     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1778    
1779 wakaba 1.3 { # tree construction stage
1780     my $token;
1781    
1782 wakaba 1.1 sub _construct_tree ($) {
1783     my ($self) = @_;
1784    
1785     ## When an interactive UA render the $self->{document} available
1786     ## to the user, or when it begin accepting user input, are
1787     ## not defined.
1788    
1789     ## Append a character: collect it and all subsequent consecutive
1790     ## characters and insert one Text node whose data is concatenation
1791     ## of all those characters. # MUST
1792    
1793     !!!next-token;
1794    
1795 wakaba 1.3 $self->{insertion_mode} = 'before head';
1796     undef $self->{form_element};
1797     undef $self->{head_element};
1798     $self->{open_elements} = [];
1799     undef $self->{inner_html_node};
1800    
1801     $self->_tree_construction_initial; # MUST
1802     $self->_tree_construction_root_element;
1803     $self->_tree_construction_main;
1804     } # _construct_tree
1805    
1806     sub _tree_construction_initial ($) {
1807     my $self = shift;
1808 wakaba 1.18 INITIAL: {
1809     if ($token->{type} eq 'DOCTYPE') {
1810     ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
1811     ## error, switch to a conformance checking mode for another
1812     ## language.
1813     my $doctype_name = $token->{name};
1814     $doctype_name = '' unless defined $doctype_name;
1815     $doctype_name =~ tr/a-z/A-Z/;
1816     if (not defined $token->{name} or # <!DOCTYPE>
1817     defined $token->{public_identifier} or
1818     defined $token->{system_identifier}) {
1819     !!!parse-error (type => 'not HTML5');
1820     } elsif ($doctype_name ne 'HTML') {
1821     ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
1822     !!!parse-error (type => 'not HTML5');
1823     }
1824    
1825     my $doctype = $self->{document}->create_document_type_definition
1826     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
1827     $doctype->public_id ($token->{public_identifier})
1828     if defined $token->{public_identifier};
1829     $doctype->system_id ($token->{system_identifier})
1830     if defined $token->{system_identifier};
1831     ## NOTE: Other DocumentType attributes are null or empty lists.
1832     ## ISSUE: internalSubset = null??
1833     $self->{document}->append_child ($doctype);
1834    
1835     if (not $token->{correct} or $doctype_name ne 'HTML') {
1836     $self->{document}->manakai_compat_mode ('quirks');
1837     } elsif (defined $token->{public_identifier}) {
1838     my $pubid = $token->{public_identifier};
1839     $pubid =~ tr/a-z/A-z/;
1840     if ({
1841     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
1842     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
1843     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
1844     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
1845     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
1846     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
1847     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
1848     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
1849     "-//IETF//DTD HTML 2.0//EN" => 1,
1850     "-//IETF//DTD HTML 2.1E//EN" => 1,
1851     "-//IETF//DTD HTML 3.0//EN" => 1,
1852     "-//IETF//DTD HTML 3.0//EN//" => 1,
1853     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
1854     "-//IETF//DTD HTML 3.2//EN" => 1,
1855     "-//IETF//DTD HTML 3//EN" => 1,
1856     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
1857     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
1858     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
1859     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
1860     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
1861     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
1862     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
1863     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
1864     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
1865     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
1866     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
1867     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
1868     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
1869     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
1870     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
1871     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
1872     "-//IETF//DTD HTML STRICT//EN" => 1,
1873     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
1874     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
1875     "-//IETF//DTD HTML//EN" => 1,
1876     "-//IETF//DTD HTML//EN//2.0" => 1,
1877     "-//IETF//DTD HTML//EN//3.0" => 1,
1878     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
1879     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
1880     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
1881     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
1882     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
1883     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
1884     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
1885     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
1886     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
1887     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
1888     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
1889     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
1890     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
1891     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
1892     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
1893     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
1894     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
1895     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
1896     "-//W3C//DTD HTML 3.2//EN" => 1,
1897     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
1898     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
1899     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
1900     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
1901     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
1902     "-//W3C//DTD W3 HTML//EN" => 1,
1903     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
1904     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
1905     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
1906     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
1907     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
1908     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
1909     "HTML" => 1,
1910     }->{$pubid}) {
1911     $self->{document}->manakai_compat_mode ('quirks');
1912     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
1913     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
1914     if (defined $token->{system_identifier}) {
1915     $self->{document}->manakai_compat_mode ('quirks');
1916     } else {
1917     $self->{document}->manakai_compat_mode ('limited quirks');
1918 wakaba 1.3 }
1919 wakaba 1.18 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 Frameset//EN" or
1920     $pubid eq "-//W3C//DTD XHTML 1.0 Transitional//EN") {
1921     $self->{document}->manakai_compat_mode ('limited quirks');
1922     }
1923     }
1924     if (defined $token->{system_identifier}) {
1925     my $sysid = $token->{system_identifier};
1926     $sysid =~ tr/A-Z/a-z/;
1927     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1928     $self->{document}->manakai_compat_mode ('quirks');
1929     }
1930     }
1931    
1932     ## Go to the root element phase.
1933     !!!next-token;
1934     return;
1935     } elsif ({
1936     'start tag' => 1,
1937     'end tag' => 1,
1938     'end-of-file' => 1,
1939     }->{$token->{type}}) {
1940     !!!parse-error (type => 'no DOCTYPE');
1941     $self->{document}->manakai_compat_mode ('quirks');
1942     ## Go to the root element phase
1943     ## reprocess
1944     return;
1945     } elsif ($token->{type} eq 'character') {
1946     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
1947     ## Ignore the token
1948 wakaba 1.26
1949 wakaba 1.18 unless (length $token->{data}) {
1950     ## Stay in the phase
1951     !!!next-token;
1952     redo INITIAL;
1953 wakaba 1.3 }
1954     }
1955 wakaba 1.18
1956     !!!parse-error (type => 'no DOCTYPE');
1957     $self->{document}->manakai_compat_mode ('quirks');
1958     ## Go to the root element phase
1959     ## reprocess
1960     return;
1961     } elsif ($token->{type} eq 'comment') {
1962     my $comment = $self->{document}->create_comment ($token->{data});
1963     $self->{document}->append_child ($comment);
1964    
1965     ## Stay in the phase.
1966     !!!next-token;
1967     redo INITIAL;
1968     } else {
1969     die "$0: $token->{type}: Unknown token";
1970     }
1971     } # INITIAL
1972 wakaba 1.3 } # _tree_construction_initial
1973    
1974     sub _tree_construction_root_element ($) {
1975     my $self = shift;
1976    
1977     B: {
1978     if ($token->{type} eq 'DOCTYPE') {
1979     !!!parse-error (type => 'in html:#DOCTYPE');
1980     ## Ignore the token
1981     ## Stay in the phase
1982     !!!next-token;
1983     redo B;
1984     } elsif ($token->{type} eq 'comment') {
1985     my $comment = $self->{document}->create_comment ($token->{data});
1986     $self->{document}->append_child ($comment);
1987     ## Stay in the phase
1988     !!!next-token;
1989     redo B;
1990     } elsif ($token->{type} eq 'character') {
1991 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
1992     ## Ignore the token.
1993    
1994 wakaba 1.3 unless (length $token->{data}) {
1995     ## Stay in the phase
1996     !!!next-token;
1997     redo B;
1998     }
1999     }
2000     #
2001     } elsif ({
2002     'start tag' => 1,
2003     'end tag' => 1,
2004     'end-of-file' => 1,
2005     }->{$token->{type}}) {
2006     ## ISSUE: There is an issue in the spec
2007     #
2008     } else {
2009     die "$0: $token->{type}: Unknown token";
2010     }
2011     my $root_element; !!!create-element ($root_element, 'html');
2012     $self->{document}->append_child ($root_element);
2013     push @{$self->{open_elements}}, [$root_element, 'html'];
2014     #$phase = 'main';
2015     ## reprocess
2016     #redo B;
2017     return;
2018     } # B
2019     } # _tree_construction_root_element
2020    
2021     sub _reset_insertion_mode ($) {
2022     my $self = shift;
2023    
2024     ## Step 1
2025     my $last;
2026    
2027     ## Step 2
2028     my $i = -1;
2029     my $node = $self->{open_elements}->[$i];
2030    
2031     ## Step 3
2032     S3: {
2033 wakaba 1.29 ## ISSUE: Oops! "If node is the first node in the stack of open
2034     ## elements, then set last to true. If the context element of the
2035     ## HTML fragment parsing algorithm is neither a td element nor a
2036     ## th element, then set node to the context element. (fragment case)":
2037     ## The second "if" is in the scope of the first "if"!?
2038     if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
2039     $last = 1;
2040     if (defined $self->{inner_html_node}) {
2041     if ($self->{inner_html_node}->[1] eq 'td' or
2042     $self->{inner_html_node}->[1] eq 'th') {
2043     #
2044     } else {
2045     $node = $self->{inner_html_node};
2046     }
2047 wakaba 1.3 }
2048     }
2049    
2050     ## Step 4..13
2051     my $new_mode = {
2052     select => 'in select',
2053     td => 'in cell',
2054     th => 'in cell',
2055     tr => 'in row',
2056     tbody => 'in table body',
2057     thead => 'in table head',
2058     tfoot => 'in table foot',
2059     caption => 'in caption',
2060     colgroup => 'in column group',
2061     table => 'in table',
2062     head => 'in body', # not in head!
2063     body => 'in body',
2064     frameset => 'in frameset',
2065     }->{$node->[1]};
2066     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2067    
2068     ## Step 14
2069     if ($node->[1] eq 'html') {
2070     unless (defined $self->{head_element}) {
2071     $self->{insertion_mode} = 'before head';
2072     } else {
2073     $self->{insertion_mode} = 'after head';
2074     }
2075     return;
2076     }
2077    
2078     ## Step 15
2079     $self->{insertion_mode} = 'in body' and return if $last;
2080    
2081     ## Step 16
2082     $i--;
2083     $node = $self->{open_elements}->[$i];
2084    
2085     ## Step 17
2086     redo S3;
2087     } # S3
2088     } # _reset_insertion_mode
2089    
2090     sub _tree_construction_main ($) {
2091     my $self = shift;
2092    
2093     my $phase = 'main';
2094 wakaba 1.1
2095     my $active_formatting_elements = [];
2096    
2097     my $reconstruct_active_formatting_elements = sub { # MUST
2098     my $insert = shift;
2099    
2100     ## Step 1
2101     return unless @$active_formatting_elements;
2102    
2103     ## Step 3
2104     my $i = -1;
2105     my $entry = $active_formatting_elements->[$i];
2106    
2107     ## Step 2
2108     return if $entry->[0] eq '#marker';
2109 wakaba 1.3 for (@{$self->{open_elements}}) {
2110 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2111     return;
2112     }
2113     }
2114    
2115     S4: {
2116     ## Step 4
2117     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2118    
2119     ## Step 5
2120     $i--;
2121     $entry = $active_formatting_elements->[$i];
2122    
2123     ## Step 6
2124     if ($entry->[0] eq '#marker') {
2125     #
2126     } else {
2127     my $in_open_elements;
2128 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
2129 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2130     $in_open_elements = 1;
2131     last OE;
2132     }
2133     }
2134     if ($in_open_elements) {
2135     #
2136     } else {
2137     redo S4;
2138     }
2139     }
2140    
2141     ## Step 7
2142     $i++;
2143     $entry = $active_formatting_elements->[$i];
2144     } # S4
2145    
2146     S7: {
2147     ## Step 8
2148     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2149    
2150     ## Step 9
2151     $insert->($clone->[0]);
2152 wakaba 1.3 push @{$self->{open_elements}}, $clone;
2153 wakaba 1.1
2154     ## Step 10
2155 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2156 wakaba 1.1
2157     ## Step 11
2158     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2159     ## Step 7'
2160     $i++;
2161     $entry = $active_formatting_elements->[$i];
2162    
2163     redo S7;
2164     }
2165     } # S7
2166     }; # $reconstruct_active_formatting_elements
2167    
2168     my $clear_up_to_marker = sub {
2169     for (reverse 0..$#$active_formatting_elements) {
2170     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2171     splice @$active_formatting_elements, $_;
2172     return;
2173     }
2174     }
2175     }; # $clear_up_to_marker
2176    
2177 wakaba 1.25 my $parse_rcdata = sub ($$) {
2178     my ($content_model_flag, $insert) = @_;
2179    
2180     ## Step 1
2181     my $start_tag_name = $token->{tag_name};
2182     my $el;
2183     !!!create-element ($el, $start_tag_name, $token->{attributes});
2184    
2185     ## Step 2
2186     $insert->($el); # /context node/->append_child ($el)
2187    
2188     ## Step 3
2189     $self->{content_model_flag} = $content_model_flag; # CDATA or RCDATA
2190 wakaba 1.13 delete $self->{escape}; # MUST
2191 wakaba 1.25
2192     ## Step 4
2193 wakaba 1.1 my $text = '';
2194     !!!next-token;
2195 wakaba 1.25 while ($token->{type} eq 'character') { # or until stop tokenizing
2196 wakaba 1.1 $text .= $token->{data};
2197     !!!next-token;
2198 wakaba 1.25 }
2199    
2200     ## Step 5
2201 wakaba 1.1 if (length $text) {
2202 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
2203     $el->append_child ($text);
2204 wakaba 1.1 }
2205 wakaba 1.25
2206     ## Step 6
2207 wakaba 1.1 $self->{content_model_flag} = 'PCDATA';
2208 wakaba 1.25
2209     ## Step 7
2210     if ($token->{type} eq 'end tag' and $token->{tag_name} eq $start_tag_name) {
2211 wakaba 1.1 ## Ignore the token
2212     } else {
2213 wakaba 1.25 !!!parse-error (type => 'in '.$content_model_flag.':#'.$token->{type});
2214 wakaba 1.1 }
2215     !!!next-token;
2216 wakaba 1.25 }; # $parse_rcdata
2217 wakaba 1.1
2218 wakaba 1.25 my $script_start_tag = sub ($) {
2219     my $insert = $_[0];
2220 wakaba 1.1 my $script_el;
2221     !!!create-element ($script_el, 'script', $token->{attributes});
2222     ## TODO: mark as "parser-inserted"
2223    
2224     $self->{content_model_flag} = 'CDATA';
2225 wakaba 1.13 delete $self->{escape}; # MUST
2226 wakaba 1.1
2227     my $text = '';
2228     !!!next-token;
2229     while ($token->{type} eq 'character') {
2230     $text .= $token->{data};
2231     !!!next-token;
2232     } # stop if non-character token or tokenizer stops tokenising
2233     if (length $text) {
2234     $script_el->manakai_append_text ($text);
2235     }
2236    
2237     $self->{content_model_flag} = 'PCDATA';
2238    
2239     if ($token->{type} eq 'end tag' and
2240     $token->{tag_name} eq 'script') {
2241     ## Ignore the token
2242     } else {
2243 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2244 wakaba 1.1 ## ISSUE: And ignore?
2245     ## TODO: mark as "already executed"
2246     }
2247    
2248 wakaba 1.3 if (defined $self->{inner_html_node}) {
2249     ## TODO: mark as "already executed"
2250     } else {
2251 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
2252     ## TODO: insertion point = just before the next input character
2253 wakaba 1.25
2254     $insert->($script_el);
2255 wakaba 1.1
2256     ## TODO: insertion point = $old_insertion_point (might be "undefined")
2257    
2258     ## TODO: if there is a script that will execute as soon as the parser resume, then...
2259     }
2260    
2261     !!!next-token;
2262     }; # $script_start_tag
2263    
2264     my $formatting_end_tag = sub {
2265     my $tag_name = shift;
2266    
2267     FET: {
2268     ## Step 1
2269     my $formatting_element;
2270     my $formatting_element_i_in_active;
2271     AFE: for (reverse 0..$#$active_formatting_elements) {
2272     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2273     $formatting_element = $active_formatting_elements->[$_];
2274     $formatting_element_i_in_active = $_;
2275     last AFE;
2276     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2277     last AFE;
2278     }
2279     } # AFE
2280     unless (defined $formatting_element) {
2281 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
2282 wakaba 1.1 ## Ignore the token
2283     !!!next-token;
2284     return;
2285     }
2286     ## has an element in scope
2287     my $in_scope = 1;
2288     my $formatting_element_i_in_open;
2289 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2290     my $node = $self->{open_elements}->[$_];
2291 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
2292     if ($in_scope) {
2293     $formatting_element_i_in_open = $_;
2294     last INSCOPE;
2295     } else { # in open elements but not in scope
2296 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2297 wakaba 1.1 ## Ignore the token
2298     !!!next-token;
2299     return;
2300     }
2301     } elsif ({
2302     table => 1, caption => 1, td => 1, th => 1,
2303     button => 1, marquee => 1, object => 1, html => 1,
2304     }->{$node->[1]}) {
2305     $in_scope = 0;
2306     }
2307     } # INSCOPE
2308     unless (defined $formatting_element_i_in_open) {
2309 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2310 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
2311     !!!next-token; ## TODO: ok?
2312     return;
2313     }
2314 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
2315 wakaba 1.4 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2316 wakaba 1.1 }
2317    
2318     ## Step 2
2319     my $furthest_block;
2320     my $furthest_block_i_in_open;
2321 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2322     my $node = $self->{open_elements}->[$_];
2323 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
2324     #not $phrasing_category->{$node->[1]} and
2325     ($special_category->{$node->[1]} or
2326     $scoping_category->{$node->[1]})) {
2327     $furthest_block = $node;
2328     $furthest_block_i_in_open = $_;
2329     } elsif ($node->[0] eq $formatting_element->[0]) {
2330     last OE;
2331     }
2332     } # OE
2333    
2334     ## Step 3
2335     unless (defined $furthest_block) { # MUST
2336 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
2337 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
2338     !!!next-token;
2339     return;
2340     }
2341    
2342     ## Step 4
2343 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
2344 wakaba 1.1
2345     ## Step 5
2346     my $furthest_block_parent = $furthest_block->[0]->parent_node;
2347     if (defined $furthest_block_parent) {
2348     $furthest_block_parent->remove_child ($furthest_block->[0]);
2349     }
2350    
2351     ## Step 6
2352     my $bookmark_prev_el
2353     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
2354     ->[0];
2355    
2356     ## Step 7
2357     my $node = $furthest_block;
2358     my $node_i_in_open = $furthest_block_i_in_open;
2359     my $last_node = $furthest_block;
2360     S7: {
2361     ## Step 1
2362     $node_i_in_open--;
2363 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
2364 wakaba 1.1
2365     ## Step 2
2366     my $node_i_in_active;
2367     S7S2: {
2368     for (reverse 0..$#$active_formatting_elements) {
2369     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2370     $node_i_in_active = $_;
2371     last S7S2;
2372     }
2373     }
2374 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
2375 wakaba 1.1 redo S7;
2376     } # S7S2
2377    
2378     ## Step 3
2379     last S7 if $node->[0] eq $formatting_element->[0];
2380    
2381     ## Step 4
2382     if ($last_node->[0] eq $furthest_block->[0]) {
2383     $bookmark_prev_el = $node->[0];
2384     }
2385    
2386     ## Step 5
2387     if ($node->[0]->has_child_nodes ()) {
2388     my $clone = [$node->[0]->clone_node (0), $node->[1]];
2389     $active_formatting_elements->[$node_i_in_active] = $clone;
2390 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
2391 wakaba 1.1 $node = $clone;
2392     }
2393    
2394     ## Step 6
2395     $node->[0]->append_child ($last_node->[0]);
2396    
2397     ## Step 7
2398     $last_node = $node;
2399    
2400     ## Step 8
2401     redo S7;
2402     } # S7
2403    
2404     ## Step 8
2405     $common_ancestor_node->[0]->append_child ($last_node->[0]);
2406    
2407     ## Step 9
2408     my $clone = [$formatting_element->[0]->clone_node (0),
2409     $formatting_element->[1]];
2410    
2411     ## Step 10
2412     my @cn = @{$furthest_block->[0]->child_nodes};
2413     $clone->[0]->append_child ($_) for @cn;
2414    
2415     ## Step 11
2416     $furthest_block->[0]->append_child ($clone->[0]);
2417    
2418     ## Step 12
2419     my $i;
2420     AFE: for (reverse 0..$#$active_formatting_elements) {
2421     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
2422     splice @$active_formatting_elements, $_, 1;
2423     $i-- and last AFE if defined $i;
2424     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
2425     $i = $_;
2426     }
2427     } # AFE
2428     splice @$active_formatting_elements, $i + 1, 0, $clone;
2429    
2430     ## Step 13
2431     undef $i;
2432 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2433     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
2434     splice @{$self->{open_elements}}, $_, 1;
2435 wakaba 1.1 $i-- and last OE if defined $i;
2436 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
2437 wakaba 1.1 $i = $_;
2438     }
2439     } # OE
2440 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
2441 wakaba 1.1
2442     ## Step 14
2443     redo FET;
2444     } # FET
2445     }; # $formatting_end_tag
2446    
2447     my $insert_to_current = sub {
2448 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
2449 wakaba 1.1 }; # $insert_to_current
2450    
2451     my $insert_to_foster = sub {
2452     my $child = shift;
2453     if ({
2454     table => 1, tbody => 1, tfoot => 1,
2455     thead => 1, tr => 1,
2456 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2457 wakaba 1.1 # MUST
2458     my $foster_parent_element;
2459     my $next_sibling;
2460 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2461     if ($self->{open_elements}->[$_]->[1] eq 'table') {
2462     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
2463 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
2464     $foster_parent_element = $parent;
2465 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
2466 wakaba 1.1 } else {
2467     $foster_parent_element
2468 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
2469 wakaba 1.1 }
2470     last OE;
2471     }
2472     } # OE
2473 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
2474 wakaba 1.1 unless defined $foster_parent_element;
2475     $foster_parent_element->insert_before
2476     ($child, $next_sibling);
2477     } else {
2478 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
2479 wakaba 1.1 }
2480     }; # $insert_to_foster
2481    
2482     my $in_body = sub {
2483     my $insert = shift;
2484     if ($token->{type} eq 'start tag') {
2485     if ($token->{tag_name} eq 'script') {
2486 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
2487     $script_start_tag->($insert);
2488 wakaba 1.1 return;
2489     } elsif ($token->{tag_name} eq 'style') {
2490 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
2491     $parse_rcdata->('CDATA', $insert);
2492 wakaba 1.1 return;
2493     } elsif ({
2494     base => 1, link => 1, meta => 1,
2495     }->{$token->{tag_name}}) {
2496 wakaba 1.25 ## NOTE: This is an "as if in head" code clone, only "-t" differs
2497     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2498     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
2499 wakaba 1.1 !!!next-token;
2500 wakaba 1.26 ## TODO: Extracting |charset| from |meta|.
2501 wakaba 1.1 return;
2502     } elsif ($token->{tag_name} eq 'title') {
2503 wakaba 1.3 !!!parse-error (type => 'in body:title');
2504 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
2505     $parse_rcdata->('RCDATA', $insert);
2506 wakaba 1.1 return;
2507     } elsif ($token->{tag_name} eq 'body') {
2508 wakaba 1.3 !!!parse-error (type => 'in body:body');
2509 wakaba 1.1
2510 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
2511     $self->{open_elements}->[1]->[1] ne 'body') {
2512 wakaba 1.1 ## Ignore the token
2513     } else {
2514 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
2515 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2516     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2517     $body_el->set_attribute_ns
2518     (undef, [undef, $attr_name],
2519     $token->{attributes}->{$attr_name}->{value});
2520     }
2521     }
2522     }
2523     !!!next-token;
2524     return;
2525     } elsif ({
2526     address => 1, blockquote => 1, center => 1, dir => 1,
2527     div => 1, dl => 1, fieldset => 1, listing => 1,
2528     menu => 1, ol => 1, p => 1, ul => 1,
2529     pre => 1,
2530     }->{$token->{tag_name}}) {
2531     ## has a p element in scope
2532 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2533 wakaba 1.1 if ($_->[1] eq 'p') {
2534     !!!back-token;
2535     $token = {type => 'end tag', tag_name => 'p'};
2536     return;
2537     } elsif ({
2538     table => 1, caption => 1, td => 1, th => 1,
2539     button => 1, marquee => 1, object => 1, html => 1,
2540     }->{$_->[1]}) {
2541     last INSCOPE;
2542     }
2543     } # INSCOPE
2544    
2545     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2546     if ($token->{tag_name} eq 'pre') {
2547     !!!next-token;
2548     if ($token->{type} eq 'character') {
2549     $token->{data} =~ s/^\x0A//;
2550     unless (length $token->{data}) {
2551     !!!next-token;
2552     }
2553     }
2554     } else {
2555     !!!next-token;
2556     }
2557     return;
2558     } elsif ($token->{tag_name} eq 'form') {
2559 wakaba 1.3 if (defined $self->{form_element}) {
2560     !!!parse-error (type => 'in form:form');
2561 wakaba 1.1 ## Ignore the token
2562 wakaba 1.7 !!!next-token;
2563     return;
2564 wakaba 1.1 } else {
2565     ## has a p element in scope
2566 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2567 wakaba 1.1 if ($_->[1] eq 'p') {
2568     !!!back-token;
2569     $token = {type => 'end tag', tag_name => 'p'};
2570     return;
2571     } elsif ({
2572     table => 1, caption => 1, td => 1, th => 1,
2573     button => 1, marquee => 1, object => 1, html => 1,
2574     }->{$_->[1]}) {
2575     last INSCOPE;
2576     }
2577     } # INSCOPE
2578    
2579     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2580 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2581 wakaba 1.1 !!!next-token;
2582     return;
2583     }
2584     } elsif ($token->{tag_name} eq 'li') {
2585     ## has a p element in scope
2586 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2587 wakaba 1.1 if ($_->[1] eq 'p') {
2588     !!!back-token;
2589     $token = {type => 'end tag', tag_name => 'p'};
2590     return;
2591     } elsif ({
2592     table => 1, caption => 1, td => 1, th => 1,
2593     button => 1, marquee => 1, object => 1, html => 1,
2594     }->{$_->[1]}) {
2595     last INSCOPE;
2596     }
2597     } # INSCOPE
2598    
2599     ## Step 1
2600     my $i = -1;
2601 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2602 wakaba 1.1 LI: {
2603     ## Step 2
2604     if ($node->[1] eq 'li') {
2605 wakaba 1.8 if ($i != -1) {
2606     !!!parse-error (type => 'end tag missing:'.
2607     $self->{open_elements}->[-1]->[1]);
2608     }
2609 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2610 wakaba 1.1 last LI;
2611     }
2612    
2613     ## Step 3
2614     if (not $formatting_category->{$node->[1]} and
2615     #not $phrasing_category->{$node->[1]} and
2616     ($special_category->{$node->[1]} or
2617     $scoping_category->{$node->[1]}) and
2618     $node->[1] ne 'address' and $node->[1] ne 'div') {
2619     last LI;
2620     }
2621    
2622     ## Step 4
2623     $i--;
2624 wakaba 1.3 $node = $self->{open_elements}->[$i];
2625 wakaba 1.1 redo LI;
2626     } # LI
2627    
2628     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2629     !!!next-token;
2630     return;
2631     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2632     ## has a p element in scope
2633 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2634 wakaba 1.1 if ($_->[1] eq 'p') {
2635     !!!back-token;
2636     $token = {type => 'end tag', tag_name => 'p'};
2637     return;
2638     } elsif ({
2639     table => 1, caption => 1, td => 1, th => 1,
2640     button => 1, marquee => 1, object => 1, html => 1,
2641     }->{$_->[1]}) {
2642     last INSCOPE;
2643     }
2644     } # INSCOPE
2645    
2646     ## Step 1
2647     my $i = -1;
2648 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2649 wakaba 1.1 LI: {
2650     ## Step 2
2651     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2652 wakaba 1.8 if ($i != -1) {
2653     !!!parse-error (type => 'end tag missing:'.
2654     $self->{open_elements}->[-1]->[1]);
2655     }
2656 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2657 wakaba 1.1 last LI;
2658     }
2659    
2660     ## Step 3
2661     if (not $formatting_category->{$node->[1]} and
2662     #not $phrasing_category->{$node->[1]} and
2663     ($special_category->{$node->[1]} or
2664     $scoping_category->{$node->[1]}) and
2665     $node->[1] ne 'address' and $node->[1] ne 'div') {
2666     last LI;
2667     }
2668    
2669     ## Step 4
2670     $i--;
2671 wakaba 1.3 $node = $self->{open_elements}->[$i];
2672 wakaba 1.1 redo LI;
2673     } # LI
2674    
2675     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2676     !!!next-token;
2677     return;
2678     } elsif ($token->{tag_name} eq 'plaintext') {
2679     ## has a p element in scope
2680 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2681 wakaba 1.1 if ($_->[1] eq 'p') {
2682     !!!back-token;
2683     $token = {type => 'end tag', tag_name => 'p'};
2684     return;
2685     } elsif ({
2686     table => 1, caption => 1, td => 1, th => 1,
2687     button => 1, marquee => 1, object => 1, html => 1,
2688     }->{$_->[1]}) {
2689     last INSCOPE;
2690     }
2691     } # INSCOPE
2692    
2693     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2694    
2695     $self->{content_model_flag} = 'PLAINTEXT';
2696    
2697     !!!next-token;
2698     return;
2699     } elsif ({
2700     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2701     }->{$token->{tag_name}}) {
2702     ## has a p element in scope
2703 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2704     my $node = $self->{open_elements}->[$_];
2705 wakaba 1.1 if ($node->[1] eq 'p') {
2706     !!!back-token;
2707     $token = {type => 'end tag', tag_name => 'p'};
2708     return;
2709     } elsif ({
2710     table => 1, caption => 1, td => 1, th => 1,
2711     button => 1, marquee => 1, object => 1, html => 1,
2712     }->{$node->[1]}) {
2713     last INSCOPE;
2714     }
2715     } # INSCOPE
2716    
2717 wakaba 1.23 ## NOTE: See <http://html5.org/tools/web-apps-tracker?from=925&to=926>
2718 wakaba 1.1 ## has an element in scope
2719 wakaba 1.23 #my $i;
2720     #INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2721     # my $node = $self->{open_elements}->[$_];
2722     # if ({
2723     # h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2724     # }->{$node->[1]}) {
2725     # $i = $_;
2726     # last INSCOPE;
2727     # } elsif ({
2728     # table => 1, caption => 1, td => 1, th => 1,
2729     # button => 1, marquee => 1, object => 1, html => 1,
2730     # }->{$node->[1]}) {
2731     # last INSCOPE;
2732     # }
2733     #} # INSCOPE
2734     #
2735     #if (defined $i) {
2736     # !!! parse-error (type => 'in hn:hn');
2737     # splice @{$self->{open_elements}}, $i;
2738     #}
2739 wakaba 1.1
2740     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2741    
2742     !!!next-token;
2743     return;
2744     } elsif ($token->{tag_name} eq 'a') {
2745     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2746     my $node = $active_formatting_elements->[$i];
2747     if ($node->[1] eq 'a') {
2748 wakaba 1.3 !!!parse-error (type => 'in a:a');
2749 wakaba 1.1
2750     !!!back-token;
2751     $token = {type => 'end tag', tag_name => 'a'};
2752     $formatting_end_tag->($token->{tag_name});
2753    
2754     AFE2: for (reverse 0..$#$active_formatting_elements) {
2755     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2756     splice @$active_formatting_elements, $_, 1;
2757     last AFE2;
2758     }
2759     } # AFE2
2760 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2761     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2762     splice @{$self->{open_elements}}, $_, 1;
2763 wakaba 1.1 last OE;
2764     }
2765     } # OE
2766     last AFE;
2767     } elsif ($node->[0] eq '#marker') {
2768     last AFE;
2769     }
2770     } # AFE
2771    
2772     $reconstruct_active_formatting_elements->($insert_to_current);
2773    
2774     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2775 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2776 wakaba 1.1
2777     !!!next-token;
2778     return;
2779     } elsif ({
2780     b => 1, big => 1, em => 1, font => 1, i => 1,
2781 wakaba 1.19 s => 1, small => 1, strile => 1,
2782 wakaba 1.1 strong => 1, tt => 1, u => 1,
2783     }->{$token->{tag_name}}) {
2784     $reconstruct_active_formatting_elements->($insert_to_current);
2785    
2786     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2787 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2788 wakaba 1.1
2789     !!!next-token;
2790     return;
2791 wakaba 1.19 } elsif ($token->{tag_name} eq 'nobr') {
2792     $reconstruct_active_formatting_elements->($insert_to_current);
2793    
2794     ## has a |nobr| element in scope
2795     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2796     my $node = $self->{open_elements}->[$_];
2797     if ($node->[1] eq 'nobr') {
2798     !!!back-token;
2799     $token = {type => 'end tag', tag_name => 'nobr'};
2800     return;
2801     } elsif ({
2802     table => 1, caption => 1, td => 1, th => 1,
2803     button => 1, marquee => 1, object => 1, html => 1,
2804     }->{$node->[1]}) {
2805     last INSCOPE;
2806     }
2807     } # INSCOPE
2808    
2809     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2810     push @$active_formatting_elements, $self->{open_elements}->[-1];
2811    
2812     !!!next-token;
2813     return;
2814 wakaba 1.1 } elsif ($token->{tag_name} eq 'button') {
2815     ## has a button element in scope
2816 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2817     my $node = $self->{open_elements}->[$_];
2818 wakaba 1.1 if ($node->[1] eq 'button') {
2819 wakaba 1.3 !!!parse-error (type => 'in button:button');
2820 wakaba 1.1 !!!back-token;
2821     $token = {type => 'end tag', tag_name => 'button'};
2822     return;
2823     } elsif ({
2824     table => 1, caption => 1, td => 1, th => 1,
2825     button => 1, marquee => 1, object => 1, html => 1,
2826     }->{$node->[1]}) {
2827     last INSCOPE;
2828     }
2829     } # INSCOPE
2830    
2831     $reconstruct_active_formatting_elements->($insert_to_current);
2832    
2833     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2834     push @$active_formatting_elements, ['#marker', ''];
2835    
2836     !!!next-token;
2837     return;
2838     } elsif ($token->{tag_name} eq 'marquee' or
2839     $token->{tag_name} eq 'object') {
2840     $reconstruct_active_formatting_elements->($insert_to_current);
2841    
2842     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2843     push @$active_formatting_elements, ['#marker', ''];
2844    
2845     !!!next-token;
2846     return;
2847     } elsif ($token->{tag_name} eq 'xmp') {
2848     $reconstruct_active_formatting_elements->($insert_to_current);
2849 wakaba 1.25 $parse_rcdata->('CDATA', $insert);
2850 wakaba 1.1 return;
2851     } elsif ($token->{tag_name} eq 'table') {
2852     ## has a p element in scope
2853 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2854 wakaba 1.1 if ($_->[1] eq 'p') {
2855     !!!back-token;
2856     $token = {type => 'end tag', tag_name => 'p'};
2857     return;
2858     } elsif ({
2859     table => 1, caption => 1, td => 1, th => 1,
2860     button => 1, marquee => 1, object => 1, html => 1,
2861     }->{$_->[1]}) {
2862     last INSCOPE;
2863     }
2864     } # INSCOPE
2865    
2866     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2867    
2868 wakaba 1.3 $self->{insertion_mode} = 'in table';
2869 wakaba 1.1
2870     !!!next-token;
2871     return;
2872     } elsif ({
2873     area => 1, basefont => 1, bgsound => 1, br => 1,
2874     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2875     image => 1,
2876     }->{$token->{tag_name}}) {
2877     if ($token->{tag_name} eq 'image') {
2878 wakaba 1.3 !!!parse-error (type => 'image');
2879 wakaba 1.1 $token->{tag_name} = 'img';
2880     }
2881    
2882     $reconstruct_active_formatting_elements->($insert_to_current);
2883    
2884     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2885 wakaba 1.3 pop @{$self->{open_elements}};
2886 wakaba 1.1
2887     !!!next-token;
2888     return;
2889     } elsif ($token->{tag_name} eq 'hr') {
2890     ## has a p element in scope
2891 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2892 wakaba 1.1 if ($_->[1] eq 'p') {
2893     !!!back-token;
2894     $token = {type => 'end tag', tag_name => 'p'};
2895     return;
2896     } elsif ({
2897     table => 1, caption => 1, td => 1, th => 1,
2898     button => 1, marquee => 1, object => 1, html => 1,
2899     }->{$_->[1]}) {
2900     last INSCOPE;
2901     }
2902     } # INSCOPE
2903    
2904     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2905 wakaba 1.3 pop @{$self->{open_elements}};
2906 wakaba 1.1
2907     !!!next-token;
2908     return;
2909     } elsif ($token->{tag_name} eq 'input') {
2910     $reconstruct_active_formatting_elements->($insert_to_current);
2911    
2912     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2913 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
2914     pop @{$self->{open_elements}};
2915 wakaba 1.1
2916     !!!next-token;
2917     return;
2918     } elsif ($token->{tag_name} eq 'isindex') {
2919 wakaba 1.3 !!!parse-error (type => 'isindex');
2920 wakaba 1.1
2921 wakaba 1.3 if (defined $self->{form_element}) {
2922 wakaba 1.1 ## Ignore the token
2923     !!!next-token;
2924     return;
2925     } else {
2926     my $at = $token->{attributes};
2927 wakaba 1.22 my $form_attrs;
2928     $form_attrs->{action} = $at->{action} if $at->{action};
2929     my $prompt_attr = $at->{prompt};
2930 wakaba 1.1 $at->{name} = {name => 'name', value => 'isindex'};
2931 wakaba 1.22 delete $at->{action};
2932     delete $at->{prompt};
2933 wakaba 1.1 my @tokens = (
2934 wakaba 1.22 {type => 'start tag', tag_name => 'form',
2935     attributes => $form_attrs},
2936 wakaba 1.1 {type => 'start tag', tag_name => 'hr'},
2937     {type => 'start tag', tag_name => 'p'},
2938     {type => 'start tag', tag_name => 'label'},
2939 wakaba 1.22 );
2940     if ($prompt_attr) {
2941     push @tokens, {type => 'character', data => $prompt_attr->{value}};
2942     } else {
2943     push @tokens, {type => 'character',
2944     data => 'This is a searchable index. Insert your search keywords here: '}; # SHOULD
2945     ## TODO: make this configurable
2946     }
2947     push @tokens,
2948 wakaba 1.1 {type => 'start tag', tag_name => 'input', attributes => $at},
2949     #{type => 'character', data => ''}, # SHOULD
2950     {type => 'end tag', tag_name => 'label'},
2951     {type => 'end tag', tag_name => 'p'},
2952     {type => 'start tag', tag_name => 'hr'},
2953 wakaba 1.22 {type => 'end tag', tag_name => 'form'};
2954 wakaba 1.1 $token = shift @tokens;
2955     !!!back-token (@tokens);
2956     return;
2957     }
2958 wakaba 1.25 } elsif ($token->{tag_name} eq 'textarea') {
2959 wakaba 1.1 my $tag_name = $token->{tag_name};
2960     my $el;
2961     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2962    
2963 wakaba 1.25 ## TODO: $self->{form_element} if defined
2964     $self->{content_model_flag} = 'RCDATA';
2965 wakaba 1.13 delete $self->{escape}; # MUST
2966 wakaba 1.1
2967     $insert->($el);
2968    
2969     my $text = '';
2970 wakaba 1.25 !!!next-token;
2971     if ($token->{type} eq 'character') {
2972     $token->{data} =~ s/^\x0A//;
2973     unless (length $token->{data}) {
2974     !!!next-token;
2975 wakaba 1.9 }
2976     }
2977 wakaba 1.1 while ($token->{type} eq 'character') {
2978     $text .= $token->{data};
2979     !!!next-token;
2980     }
2981     if (length $text) {
2982     $el->manakai_append_text ($text);
2983     }
2984    
2985     $self->{content_model_flag} = 'PCDATA';
2986    
2987     if ($token->{type} eq 'end tag' and
2988     $token->{tag_name} eq $tag_name) {
2989     ## Ignore the token
2990     } else {
2991 wakaba 1.25 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2992 wakaba 1.1 }
2993     !!!next-token;
2994     return;
2995 wakaba 1.25 } elsif ({
2996     iframe => 1,
2997     noembed => 1,
2998     noframes => 1,
2999     noscript => 0, ## TODO: 1 if scripting is enabled
3000     }->{$token->{tag_name}}) {
3001     $parse_rcdata->('CDATA', $insert);
3002     return;
3003 wakaba 1.1 } elsif ($token->{tag_name} eq 'select') {
3004     $reconstruct_active_formatting_elements->($insert_to_current);
3005    
3006     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
3007    
3008 wakaba 1.3 $self->{insertion_mode} = 'in select';
3009 wakaba 1.1 !!!next-token;
3010     return;
3011     } elsif ({
3012     caption => 1, col => 1, colgroup => 1, frame => 1,
3013     frameset => 1, head => 1, option => 1, optgroup => 1,
3014     tbody => 1, td => 1, tfoot => 1, th => 1,
3015     thead => 1, tr => 1,
3016     }->{$token->{tag_name}}) {
3017 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
3018 wakaba 1.1 ## Ignore the token
3019     !!!next-token;
3020     return;
3021    
3022     ## ISSUE: An issue on HTML5 new elements in the spec.
3023     } else {
3024     $reconstruct_active_formatting_elements->($insert_to_current);
3025    
3026     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
3027    
3028     !!!next-token;
3029     return;
3030     }
3031     } elsif ($token->{type} eq 'end tag') {
3032     if ($token->{tag_name} eq 'body') {
3033 wakaba 1.20 if (@{$self->{open_elements}} > 1 and
3034     $self->{open_elements}->[1]->[1] eq 'body') {
3035     for (@{$self->{open_elements}}) {
3036     unless ({
3037     dd => 1, dt => 1, li => 1, p => 1, td => 1,
3038     th => 1, tr => 1, body => 1, html => 1,
3039     }->{$_->[1]}) {
3040     !!!parse-error (type => 'not closed:'.$_->[1]);
3041     }
3042 wakaba 1.1 }
3043 wakaba 1.20
3044 wakaba 1.3 $self->{insertion_mode} = 'after body';
3045 wakaba 1.1 !!!next-token;
3046     return;
3047     } else {
3048 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3049 wakaba 1.1 ## Ignore the token
3050     !!!next-token;
3051     return;
3052     }
3053     } elsif ($token->{tag_name} eq 'html') {
3054 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
3055 wakaba 1.1 ## ISSUE: There is an issue in the spec.
3056 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
3057     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
3058 wakaba 1.1 }
3059 wakaba 1.3 $self->{insertion_mode} = 'after body';
3060 wakaba 1.1 ## reprocess
3061     return;
3062     } else {
3063 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3064 wakaba 1.1 ## Ignore the token
3065     !!!next-token;
3066     return;
3067     }
3068     } elsif ({
3069     address => 1, blockquote => 1, center => 1, dir => 1,
3070     div => 1, dl => 1, fieldset => 1, listing => 1,
3071     menu => 1, ol => 1, pre => 1, ul => 1,
3072     p => 1,
3073     dd => 1, dt => 1, li => 1,
3074     button => 1, marquee => 1, object => 1,
3075     }->{$token->{tag_name}}) {
3076     ## has an element in scope
3077     my $i;
3078 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3079     my $node = $self->{open_elements}->[$_];
3080 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3081     ## generate implied end tags
3082     if ({
3083     dd => ($token->{tag_name} ne 'dd'),
3084     dt => ($token->{tag_name} ne 'dt'),
3085     li => ($token->{tag_name} ne 'li'),
3086     p => ($token->{tag_name} ne 'p'),
3087     td => 1, th => 1, tr => 1,
3088 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3089 wakaba 1.1 !!!back-token;
3090     $token = {type => 'end tag',
3091 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3092 wakaba 1.1 return;
3093     }
3094     $i = $_;
3095     last INSCOPE unless $token->{tag_name} eq 'p';
3096     } elsif ({
3097     table => 1, caption => 1, td => 1, th => 1,
3098     button => 1, marquee => 1, object => 1, html => 1,
3099     }->{$node->[1]}) {
3100     last INSCOPE;
3101     }
3102     } # INSCOPE
3103    
3104 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
3105     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3106 wakaba 1.1 }
3107    
3108 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
3109 wakaba 1.1 $clear_up_to_marker->()
3110     if {
3111     button => 1, marquee => 1, object => 1,
3112     }->{$token->{tag_name}};
3113     !!!next-token;
3114     return;
3115 wakaba 1.12 } elsif ($token->{tag_name} eq 'form') {
3116     ## has an element in scope
3117     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3118     my $node = $self->{open_elements}->[$_];
3119     if ($node->[1] eq $token->{tag_name}) {
3120     ## generate implied end tags
3121     if ({
3122     dd => 1, dt => 1, li => 1, p => 1,
3123     td => 1, th => 1, tr => 1,
3124     }->{$self->{open_elements}->[-1]->[1]}) {
3125     !!!back-token;
3126     $token = {type => 'end tag',
3127     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3128     return;
3129     }
3130     last INSCOPE;
3131     } elsif ({
3132     table => 1, caption => 1, td => 1, th => 1,
3133     button => 1, marquee => 1, object => 1, html => 1,
3134     }->{$node->[1]}) {
3135     last INSCOPE;
3136     }
3137     } # INSCOPE
3138    
3139     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
3140     pop @{$self->{open_elements}};
3141     } else {
3142     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3143     }
3144    
3145     undef $self->{form_element};
3146     !!!next-token;
3147     return;
3148 wakaba 1.1 } elsif ({
3149     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3150     }->{$token->{tag_name}}) {
3151     ## has an element in scope
3152     my $i;
3153 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3154     my $node = $self->{open_elements}->[$_];
3155 wakaba 1.1 if ({
3156     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3157     }->{$node->[1]}) {
3158     ## generate implied end tags
3159     if ({
3160     dd => 1, dt => 1, li => 1, p => 1,
3161     td => 1, th => 1, tr => 1,
3162 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3163 wakaba 1.1 !!!back-token;
3164     $token = {type => 'end tag',
3165 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3166 wakaba 1.1 return;
3167     }
3168     $i = $_;
3169     last INSCOPE;
3170     } elsif ({
3171     table => 1, caption => 1, td => 1, th => 1,
3172     button => 1, marquee => 1, object => 1, html => 1,
3173     }->{$node->[1]}) {
3174     last INSCOPE;
3175     }
3176     } # INSCOPE
3177    
3178 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
3179     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3180 wakaba 1.1 }
3181    
3182 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
3183 wakaba 1.1 !!!next-token;
3184     return;
3185     } elsif ({
3186     a => 1,
3187     b => 1, big => 1, em => 1, font => 1, i => 1,
3188     nobr => 1, s => 1, small => 1, strile => 1,
3189     strong => 1, tt => 1, u => 1,
3190     }->{$token->{tag_name}}) {
3191     $formatting_end_tag->($token->{tag_name});
3192 wakaba 1.8 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
3193 wakaba 1.1 return;
3194     } elsif ({
3195     caption => 1, col => 1, colgroup => 1, frame => 1,
3196     frameset => 1, head => 1, option => 1, optgroup => 1,
3197     tbody => 1, td => 1, tfoot => 1, th => 1,
3198     thead => 1, tr => 1,
3199     area => 1, basefont => 1, bgsound => 1, br => 1,
3200     embed => 1, hr => 1, iframe => 1, image => 1,
3201 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
3202 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
3203     table => 1, textarea => 1, wbr => 1,
3204     noscript => 0, ## TODO: if scripting is enabled
3205     }->{$token->{tag_name}}) {
3206 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3207 wakaba 1.1 ## Ignore the token
3208     !!!next-token;
3209     return;
3210    
3211     ## ISSUE: Issue on HTML5 new elements in spec
3212    
3213     } else {
3214     ## Step 1
3215     my $node_i = -1;
3216 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
3217 wakaba 1.1
3218     ## Step 2
3219     S2: {
3220     if ($node->[1] eq $token->{tag_name}) {
3221     ## Step 1
3222     ## generate implied end tags
3223     if ({
3224     dd => 1, dt => 1, li => 1, p => 1,
3225     td => 1, th => 1, tr => 1,
3226 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3227 wakaba 1.1 !!!back-token;
3228     $token = {type => 'end tag',
3229 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3230 wakaba 1.1 return;
3231     }
3232    
3233     ## Step 2
3234 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
3235     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3236 wakaba 1.1 }
3237    
3238     ## Step 3
3239 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
3240    
3241     !!!next-token;
3242 wakaba 1.1 last S2;
3243     } else {
3244     ## Step 3
3245     if (not $formatting_category->{$node->[1]} and
3246     #not $phrasing_category->{$node->[1]} and
3247     ($special_category->{$node->[1]} or
3248     $scoping_category->{$node->[1]})) {
3249 wakaba 1.25 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3250 wakaba 1.1 ## Ignore the token
3251     !!!next-token;
3252     last S2;
3253     }
3254     }
3255    
3256     ## Step 4
3257     $node_i--;
3258 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
3259 wakaba 1.1
3260     ## Step 5;
3261     redo S2;
3262     } # S2
3263 wakaba 1.3 return;
3264 wakaba 1.1 }
3265     }
3266     }; # $in_body
3267    
3268     B: {
3269 wakaba 1.3 if ($phase eq 'main') {
3270 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
3271 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
3272 wakaba 1.1 ## Ignore the token
3273     ## Stay in the phase
3274     !!!next-token;
3275     redo B;
3276     } elsif ($token->{type} eq 'start tag' and
3277     $token->{tag_name} eq 'html') {
3278 wakaba 1.28 ## ISSUE: "aa<html>" is not a parse error.
3279     ## ISSUE: "<html>" in fragment is not a parse error.
3280     unless ($token->{first_start_tag}) {
3281     !!!parse-error (type => 'not first start tag');
3282     }
3283 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
3284 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
3285     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3286     $top_el->set_attribute_ns
3287     (undef, [undef, $attr_name],
3288     $token->{attributes}->{$attr_name}->{value});
3289     }
3290     }
3291     !!!next-token;
3292     redo B;
3293     } elsif ($token->{type} eq 'end-of-file') {
3294     ## Generate implied end tags
3295     if ({
3296     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
3297 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3298 wakaba 1.1 !!!back-token;
3299 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
3300 wakaba 1.1 redo B;
3301     }
3302    
3303 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
3304     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
3305     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3306     } elsif (defined $self->{inner_html_node} and
3307     @{$self->{open_elements}} > 1 and
3308     $self->{open_elements}->[1]->[1] ne 'body') {
3309     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3310 wakaba 1.1 }
3311    
3312     ## Stop parsing
3313     last B;
3314    
3315     ## ISSUE: There is an issue in the spec.
3316     } else {
3317 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
3318 wakaba 1.1 if ($token->{type} eq 'character') {
3319     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3320 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3321 wakaba 1.1 unless (length $token->{data}) {
3322     !!!next-token;
3323     redo B;
3324     }
3325     }
3326     ## As if <head>
3327 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3328     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3329     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3330     $self->{insertion_mode} = 'in head';
3331 wakaba 1.1 ## reprocess
3332     redo B;
3333     } elsif ($token->{type} eq 'comment') {
3334     my $comment = $self->{document}->create_comment ($token->{data});
3335 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3336 wakaba 1.1 !!!next-token;
3337     redo B;
3338     } elsif ($token->{type} eq 'start tag') {
3339     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
3340 wakaba 1.3 !!!create-element ($self->{head_element}, 'head', $attr);
3341     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3342     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3343     $self->{insertion_mode} = 'in head';
3344 wakaba 1.1 if ($token->{tag_name} eq 'head') {
3345     !!!next-token;
3346     #} elsif ({
3347     # base => 1, link => 1, meta => 1,
3348     # script => 1, style => 1, title => 1,
3349     # }->{$token->{tag_name}}) {
3350     # ## reprocess
3351     } else {
3352     ## reprocess
3353     }
3354     redo B;
3355     } elsif ($token->{type} eq 'end tag') {
3356 wakaba 1.21 if ({head => 1, body => 1, html => 1}->{$token->{tag_name}}) {
3357 wakaba 1.1 ## As if <head>
3358 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3359     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3360     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3361     $self->{insertion_mode} = 'in head';
3362 wakaba 1.1 ## reprocess
3363     redo B;
3364     } else {
3365 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3366 wakaba 1.21 ## Ignore the token ## ISSUE: An issue in the spec.
3367 wakaba 1.1 !!!next-token;
3368     redo B;
3369     }
3370     } else {
3371     die "$0: $token->{type}: Unknown type";
3372     }
3373 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'in head' or
3374     $self->{insertion_mode} eq 'in head noscript' or
3375     $self->{insertion_mode} eq 'after head') {
3376 wakaba 1.1 if ($token->{type} eq 'character') {
3377     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3378 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3379 wakaba 1.1 unless (length $token->{data}) {
3380     !!!next-token;
3381     redo B;
3382     }
3383     }
3384    
3385     #
3386     } elsif ($token->{type} eq 'comment') {
3387     my $comment = $self->{document}->create_comment ($token->{data});
3388 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3389 wakaba 1.1 !!!next-token;
3390     redo B;
3391     } elsif ($token->{type} eq 'start tag') {
3392 wakaba 1.25 if ({base => ($self->{insertion_mode} eq 'in head' or
3393     $self->{insertion_mode} eq 'after head'),
3394     link => 1, meta => 1}->{$token->{tag_name}}) {
3395     ## NOTE: There is a "as if in head" code clone.
3396     if ($self->{insertion_mode} eq 'after head') {
3397     !!!parse-error (type => 'after head:'.$token->{tag_name});
3398     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3399     }
3400     !!!insert-element ($token->{tag_name}, $token->{attributes});
3401     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3402 wakaba 1.26 ## TODO: Extracting |charset| from |meta|.
3403 wakaba 1.25 pop @{$self->{open_elements}}
3404     if $self->{insertion_mode} eq 'after head';
3405 wakaba 1.1 !!!next-token;
3406 wakaba 1.25 redo B;
3407     } elsif ($token->{tag_name} eq 'title' and
3408     $self->{insertion_mode} eq 'in head') {
3409     ## NOTE: There is a "as if in head" code clone.
3410     if ($self->{insertion_mode} eq 'after head') {
3411     !!!parse-error (type => 'after head:'.$token->{tag_name});
3412     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3413     }
3414     $parse_rcdata->('RCDATA', $insert_to_current);
3415     pop @{$self->{open_elements}}
3416     if $self->{insertion_mode} eq 'after head';
3417     redo B;
3418     } elsif ($token->{tag_name} eq 'style') {
3419     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
3420     ## insertion mode 'in head')
3421     ## NOTE: There is a "as if in head" code clone.
3422     if ($self->{insertion_mode} eq 'after head') {
3423     !!!parse-error (type => 'after head:'.$token->{tag_name});
3424     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3425     }
3426     $parse_rcdata->('CDATA', $insert_to_current);
3427     pop @{$self->{open_elements}}
3428     if $self->{insertion_mode} eq 'after head';
3429     redo B;
3430     } elsif ($token->{tag_name} eq 'noscript') {
3431     if ($self->{insertion_mode} eq 'in head') {
3432     ## NOTE: and scripting is disalbed
3433     !!!insert-element ($token->{tag_name}, $token->{attributes});
3434     $self->{insertion_mode} = 'in head noscript';
3435 wakaba 1.1 !!!next-token;
3436 wakaba 1.25 redo B;
3437     } elsif ($self->{insertion_mode} eq 'in head noscript') {
3438 wakaba 1.30 !!!parse-error (type => 'in noscript:noscript');
3439 wakaba 1.1 ## Ignore the token
3440 wakaba 1.25 redo B;
3441 wakaba 1.1 } else {
3442 wakaba 1.25 #
3443 wakaba 1.1 }
3444 wakaba 1.25 } elsif ($token->{tag_name} eq 'head' and
3445     $self->{insertion_mode} ne 'after head') {
3446     !!!parse-error (type => 'in head:head'); # or in head noscript
3447     ## Ignore the token
3448 wakaba 1.1 !!!next-token;
3449     redo B;
3450 wakaba 1.25 } elsif ($self->{insertion_mode} ne 'in head noscript' and
3451     $token->{tag_name} eq 'script') {
3452     if ($self->{insertion_mode} eq 'after head') {
3453     !!!parse-error (type => 'after head:'.$token->{tag_name});
3454     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3455     }
3456     ## NOTE: There is a "as if in head" code clone.
3457     $script_start_tag->($insert_to_current);
3458     pop @{$self->{open_elements}}
3459     if $self->{insertion_mode} eq 'after head';
3460 wakaba 1.1 redo B;
3461 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'after head' and
3462     $token->{tag_name} eq 'body') {
3463     !!!insert-element ('body', $token->{attributes});
3464     $self->{insertion_mode} = 'in body';
3465 wakaba 1.1 !!!next-token;
3466     redo B;
3467 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'after head' and
3468     $token->{tag_name} eq 'frameset') {
3469     !!!insert-element ('frameset', $token->{attributes});
3470     $self->{insertion_mode} = 'in frameset';
3471 wakaba 1.1 !!!next-token;
3472     redo B;
3473     } else {
3474     #
3475     }
3476     } elsif ($token->{type} eq 'end tag') {
3477 wakaba 1.25 if ($self->{insertion_mode} eq 'in head' and
3478     $token->{tag_name} eq 'head') {
3479     pop @{$self->{open_elements}};
3480 wakaba 1.3 $self->{insertion_mode} = 'after head';
3481 wakaba 1.1 !!!next-token;
3482     redo B;
3483 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'in head noscript' and
3484     $token->{tag_name} eq 'noscript') {
3485     pop @{$self->{open_elements}};
3486     $self->{insertion_mode} = 'in head';
3487     !!!next-token;
3488     redo B;
3489     } elsif ($self->{insertion_mode} eq 'in head' and
3490     ($token->{tag_name} eq 'body' or
3491     $token->{tag_name} eq 'html')) {
3492 wakaba 1.1 #
3493 wakaba 1.25 } elsif ($self->{insertion_mode} ne 'after head') {
3494 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3495 wakaba 1.1 ## Ignore the token
3496     !!!next-token;
3497     redo B;
3498 wakaba 1.25 } else {
3499     #
3500 wakaba 1.1 }
3501     } else {
3502     #
3503     }
3504    
3505 wakaba 1.25 ## As if </head> or </noscript> or <body>
3506     if ($self->{insertion_mode} eq 'in head') {
3507 wakaba 1.3 pop @{$self->{open_elements}};
3508 wakaba 1.25 $self->{insertion_mode} = 'after head';
3509     } elsif ($self->{insertion_mode} eq 'in head noscript') {
3510     pop @{$self->{open_elements}};
3511     !!!parse-error (type => 'in noscript:'.(defined $token->{tag_name} ? ($token->{type} eq 'end tag' ? '/' : '') . $token->{tag_name} : '#' . $token->{type}));
3512     $self->{insertion_mode} = 'in head';
3513     } else { # 'after head'
3514     !!!insert-element ('body');
3515     $self->{insertion_mode} = 'in body';
3516 wakaba 1.1 }
3517     ## reprocess
3518     redo B;
3519    
3520     ## ISSUE: An issue in the spec.
3521 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
3522 wakaba 1.1 if ($token->{type} eq 'character') {
3523     ## NOTE: There is a code clone of "character in body".
3524     $reconstruct_active_formatting_elements->($insert_to_current);
3525    
3526 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3527 wakaba 1.1
3528     !!!next-token;
3529     redo B;
3530     } elsif ($token->{type} eq 'comment') {
3531     ## NOTE: There is a code clone of "comment in body".
3532     my $comment = $self->{document}->create_comment ($token->{data});
3533 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3534 wakaba 1.1 !!!next-token;
3535     redo B;
3536     } else {
3537     $in_body->($insert_to_current);
3538     redo B;
3539     }
3540 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
3541 wakaba 1.1 if ($token->{type} eq 'character') {
3542     ## NOTE: There are "character in table" code clones.
3543     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3544 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3545 wakaba 1.1
3546     unless (length $token->{data}) {
3547     !!!next-token;
3548     redo B;
3549     }
3550     }
3551    
3552 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3553    
3554 wakaba 1.1 ## As if in body, but insert into foster parent element
3555     ## ISSUE: Spec says that "whenever a node would be inserted
3556     ## into the current node" while characters might not be
3557     ## result in a new Text node.
3558     $reconstruct_active_formatting_elements->($insert_to_foster);
3559    
3560     if ({
3561     table => 1, tbody => 1, tfoot => 1,
3562     thead => 1, tr => 1,
3563 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3564 wakaba 1.1 # MUST
3565     my $foster_parent_element;
3566     my $next_sibling;
3567     my $prev_sibling;
3568 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3569     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3570     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3571 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3572     $foster_parent_element = $parent;
3573 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3574 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3575     } else {
3576 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3577 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3578     }
3579     last OE;
3580     }
3581     } # OE
3582 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3583 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3584     unless defined $foster_parent_element;
3585     if (defined $prev_sibling and
3586     $prev_sibling->node_type == 3) {
3587     $prev_sibling->manakai_append_text ($token->{data});
3588     } else {
3589     $foster_parent_element->insert_before
3590     ($self->{document}->create_text_node ($token->{data}),
3591     $next_sibling);
3592     }
3593     } else {
3594 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3595 wakaba 1.1 }
3596    
3597     !!!next-token;
3598     redo B;
3599     } elsif ($token->{type} eq 'comment') {
3600     my $comment = $self->{document}->create_comment ($token->{data});
3601 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3602 wakaba 1.1 !!!next-token;
3603     redo B;
3604     } elsif ($token->{type} eq 'start tag') {
3605     if ({
3606     caption => 1,
3607     colgroup => 1,
3608     tbody => 1, tfoot => 1, thead => 1,
3609     }->{$token->{tag_name}}) {
3610     ## Clear back to table context
3611 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3612     $self->{open_elements}->[-1]->[1] ne 'html') {
3613     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3614     pop @{$self->{open_elements}};
3615 wakaba 1.1 }
3616    
3617     push @$active_formatting_elements, ['#marker', '']
3618     if $token->{tag_name} eq 'caption';
3619    
3620     !!!insert-element ($token->{tag_name}, $token->{attributes});
3621 wakaba 1.3 $self->{insertion_mode} = {
3622 wakaba 1.1 caption => 'in caption',
3623     colgroup => 'in column group',
3624     tbody => 'in table body',
3625     tfoot => 'in table body',
3626     thead => 'in table body',
3627     }->{$token->{tag_name}};
3628     !!!next-token;
3629     redo B;
3630     } elsif ({
3631     col => 1,
3632     td => 1, th => 1, tr => 1,
3633     }->{$token->{tag_name}}) {
3634     ## Clear back to table context
3635 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3636     $self->{open_elements}->[-1]->[1] ne 'html') {
3637     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3638     pop @{$self->{open_elements}};
3639 wakaba 1.1 }
3640    
3641     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3642 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3643 wakaba 1.1 ? 'in column group' : 'in table body';
3644     ## reprocess
3645     redo B;
3646     } elsif ($token->{tag_name} eq 'table') {
3647     ## NOTE: There are code clones for this "table in table"
3648 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3649 wakaba 1.1
3650     ## As if </table>
3651     ## have a table element in table scope
3652     my $i;
3653 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3654     my $node = $self->{open_elements}->[$_];
3655 wakaba 1.1 if ($node->[1] eq 'table') {
3656     $i = $_;
3657     last INSCOPE;
3658     } elsif ({
3659     table => 1, html => 1,
3660     }->{$node->[1]}) {
3661     last INSCOPE;
3662     }
3663     } # INSCOPE
3664     unless (defined $i) {
3665 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3666 wakaba 1.1 ## Ignore tokens </table><table>
3667     !!!next-token;
3668     redo B;
3669     }
3670    
3671     ## generate implied end tags
3672     if ({
3673     dd => 1, dt => 1, li => 1, p => 1,
3674     td => 1, th => 1, tr => 1,
3675 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3676 wakaba 1.1 !!!back-token; # <table>
3677     $token = {type => 'end tag', tag_name => 'table'};
3678     !!!back-token;
3679     $token = {type => 'end tag',
3680 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3681 wakaba 1.1 redo B;
3682     }
3683    
3684 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3685     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3686 wakaba 1.1 }
3687    
3688 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3689 wakaba 1.1
3690 wakaba 1.3 $self->_reset_insertion_mode;
3691 wakaba 1.1
3692     ## reprocess
3693     redo B;
3694     } else {
3695     #
3696     }
3697     } elsif ($token->{type} eq 'end tag') {
3698     if ($token->{tag_name} eq 'table') {
3699     ## have a table element in table scope
3700     my $i;
3701 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3702     my $node = $self->{open_elements}->[$_];
3703 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3704     $i = $_;
3705     last INSCOPE;
3706     } elsif ({
3707     table => 1, html => 1,
3708     }->{$node->[1]}) {
3709     last INSCOPE;
3710     }
3711     } # INSCOPE
3712     unless (defined $i) {
3713 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3714 wakaba 1.1 ## Ignore the token
3715     !!!next-token;
3716     redo B;
3717     }
3718    
3719     ## generate implied end tags
3720     if ({
3721     dd => 1, dt => 1, li => 1, p => 1,
3722     td => 1, th => 1, tr => 1,
3723 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3724 wakaba 1.1 !!!back-token;
3725     $token = {type => 'end tag',
3726 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3727 wakaba 1.1 redo B;
3728     }
3729    
3730 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3731     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3732 wakaba 1.1 }
3733    
3734 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3735 wakaba 1.1
3736 wakaba 1.3 $self->_reset_insertion_mode;
3737 wakaba 1.1
3738     !!!next-token;
3739     redo B;
3740     } elsif ({
3741     body => 1, caption => 1, col => 1, colgroup => 1,
3742     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3743     thead => 1, tr => 1,
3744     }->{$token->{tag_name}}) {
3745 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3746 wakaba 1.1 ## Ignore the token
3747     !!!next-token;
3748     redo B;
3749     } else {
3750     #
3751     }
3752     } else {
3753     #
3754     }
3755    
3756 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3757 wakaba 1.1 $in_body->($insert_to_foster);
3758     redo B;
3759 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
3760 wakaba 1.1 if ($token->{type} eq 'character') {
3761     ## NOTE: This is a code clone of "character in body".
3762     $reconstruct_active_formatting_elements->($insert_to_current);
3763    
3764 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3765 wakaba 1.1
3766     !!!next-token;
3767     redo B;
3768     } elsif ($token->{type} eq 'comment') {
3769     ## NOTE: This is a code clone of "comment in body".
3770     my $comment = $self->{document}->create_comment ($token->{data});
3771 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3772 wakaba 1.1 !!!next-token;
3773     redo B;
3774     } elsif ($token->{type} eq 'start tag') {
3775     if ({
3776     caption => 1, col => 1, colgroup => 1, tbody => 1,
3777     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3778     }->{$token->{tag_name}}) {
3779 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3780 wakaba 1.1
3781     ## As if </caption>
3782     ## have a table element in table scope
3783     my $i;
3784 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3785     my $node = $self->{open_elements}->[$_];
3786 wakaba 1.1 if ($node->[1] eq 'caption') {
3787     $i = $_;
3788     last INSCOPE;
3789     } elsif ({
3790     table => 1, html => 1,
3791     }->{$node->[1]}) {
3792     last INSCOPE;
3793     }
3794     } # INSCOPE
3795     unless (defined $i) {
3796 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3797 wakaba 1.1 ## Ignore the token
3798     !!!next-token;
3799     redo B;
3800     }
3801    
3802     ## generate implied end tags
3803     if ({
3804     dd => 1, dt => 1, li => 1, p => 1,
3805     td => 1, th => 1, tr => 1,
3806 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3807 wakaba 1.1 !!!back-token; # <?>
3808     $token = {type => 'end tag', tag_name => 'caption'};
3809     !!!back-token;
3810     $token = {type => 'end tag',
3811 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3812 wakaba 1.1 redo B;
3813     }
3814    
3815 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3816     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3817 wakaba 1.1 }
3818    
3819 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3820 wakaba 1.1
3821     $clear_up_to_marker->();
3822    
3823 wakaba 1.3 $self->{insertion_mode} = 'in table';
3824 wakaba 1.1
3825     ## reprocess
3826     redo B;
3827     } else {
3828     #
3829     }
3830     } elsif ($token->{type} eq 'end tag') {
3831     if ($token->{tag_name} eq 'caption') {
3832     ## have a table element in table scope
3833     my $i;
3834 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3835     my $node = $self->{open_elements}->[$_];
3836 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3837     $i = $_;
3838     last INSCOPE;
3839     } elsif ({
3840     table => 1, html => 1,
3841     }->{$node->[1]}) {
3842     last INSCOPE;
3843     }
3844     } # INSCOPE
3845     unless (defined $i) {
3846 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3847 wakaba 1.1 ## Ignore the token
3848     !!!next-token;
3849     redo B;
3850     }
3851    
3852     ## generate implied end tags
3853     if ({
3854     dd => 1, dt => 1, li => 1, p => 1,
3855     td => 1, th => 1, tr => 1,
3856 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3857 wakaba 1.1 !!!back-token;
3858     $token = {type => 'end tag',
3859 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3860 wakaba 1.1 redo B;
3861     }
3862    
3863 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3864     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3865 wakaba 1.1 }
3866    
3867 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3868 wakaba 1.1
3869     $clear_up_to_marker->();
3870    
3871 wakaba 1.3 $self->{insertion_mode} = 'in table';
3872 wakaba 1.1
3873     !!!next-token;
3874     redo B;
3875     } elsif ($token->{tag_name} eq 'table') {
3876 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3877 wakaba 1.1
3878     ## As if </caption>
3879     ## have a table element in table scope
3880     my $i;
3881 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3882     my $node = $self->{open_elements}->[$_];
3883 wakaba 1.1 if ($node->[1] eq 'caption') {
3884     $i = $_;
3885     last INSCOPE;
3886     } elsif ({
3887     table => 1, html => 1,
3888     }->{$node->[1]}) {
3889     last INSCOPE;
3890     }
3891     } # INSCOPE
3892     unless (defined $i) {
3893 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3894 wakaba 1.1 ## Ignore the token
3895     !!!next-token;
3896     redo B;
3897     }
3898    
3899     ## generate implied end tags
3900     if ({
3901     dd => 1, dt => 1, li => 1, p => 1,
3902     td => 1, th => 1, tr => 1,
3903 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3904 wakaba 1.1 !!!back-token; # </table>
3905     $token = {type => 'end tag', tag_name => 'caption'};
3906     !!!back-token;
3907     $token = {type => 'end tag',
3908 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3909 wakaba 1.1 redo B;
3910     }
3911    
3912 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3913     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3914 wakaba 1.1 }
3915    
3916 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3917 wakaba 1.1
3918     $clear_up_to_marker->();
3919    
3920 wakaba 1.3 $self->{insertion_mode} = 'in table';
3921 wakaba 1.1
3922     ## reprocess
3923     redo B;
3924     } elsif ({
3925     body => 1, col => 1, colgroup => 1,
3926     html => 1, tbody => 1, td => 1, tfoot => 1,
3927     th => 1, thead => 1, tr => 1,
3928     }->{$token->{tag_name}}) {
3929 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3930 wakaba 1.1 ## Ignore the token
3931     redo B;
3932     } else {
3933     #
3934     }
3935     } else {
3936     #
3937     }
3938    
3939     $in_body->($insert_to_current);
3940     redo B;
3941 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
3942 wakaba 1.1 if ($token->{type} eq 'character') {
3943     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3944 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3945 wakaba 1.1 unless (length $token->{data}) {
3946     !!!next-token;
3947     redo B;
3948     }
3949     }
3950    
3951     #
3952     } elsif ($token->{type} eq 'comment') {
3953     my $comment = $self->{document}->create_comment ($token->{data});
3954 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3955 wakaba 1.1 !!!next-token;
3956     redo B;
3957     } elsif ($token->{type} eq 'start tag') {
3958     if ($token->{tag_name} eq 'col') {
3959     !!!insert-element ($token->{tag_name}, $token->{attributes});
3960 wakaba 1.3 pop @{$self->{open_elements}};
3961 wakaba 1.1 !!!next-token;
3962     redo B;
3963     } else {
3964     #
3965     }
3966     } elsif ($token->{type} eq 'end tag') {
3967     if ($token->{tag_name} eq 'colgroup') {
3968 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3969     !!!parse-error (type => 'unmatched end tag:colgroup');
3970 wakaba 1.1 ## Ignore the token
3971     !!!next-token;
3972     redo B;
3973     } else {
3974 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3975     $self->{insertion_mode} = 'in table';
3976 wakaba 1.1 !!!next-token;
3977     redo B;
3978     }
3979     } elsif ($token->{tag_name} eq 'col') {
3980 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:col');
3981 wakaba 1.1 ## Ignore the token
3982     !!!next-token;
3983     redo B;
3984     } else {
3985     #
3986     }
3987     } else {
3988     #
3989     }
3990    
3991     ## As if </colgroup>
3992 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3993     !!!parse-error (type => 'unmatched end tag:colgroup');
3994 wakaba 1.1 ## Ignore the token
3995     !!!next-token;
3996     redo B;
3997     } else {
3998 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3999     $self->{insertion_mode} = 'in table';
4000 wakaba 1.1 ## reprocess
4001     redo B;
4002     }
4003 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
4004 wakaba 1.1 if ($token->{type} eq 'character') {
4005     ## NOTE: This is a "character in table" code clone.
4006     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4007 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4008 wakaba 1.1
4009     unless (length $token->{data}) {
4010     !!!next-token;
4011     redo B;
4012     }
4013     }
4014    
4015 wakaba 1.3 !!!parse-error (type => 'in table:#character');
4016    
4017 wakaba 1.1 ## As if in body, but insert into foster parent element
4018     ## ISSUE: Spec says that "whenever a node would be inserted
4019     ## into the current node" while characters might not be
4020     ## result in a new Text node.
4021     $reconstruct_active_formatting_elements->($insert_to_foster);
4022    
4023     if ({
4024     table => 1, tbody => 1, tfoot => 1,
4025     thead => 1, tr => 1,
4026 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4027 wakaba 1.1 # MUST
4028     my $foster_parent_element;
4029     my $next_sibling;
4030     my $prev_sibling;
4031 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4032     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4033     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4034 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4035     $foster_parent_element = $parent;
4036 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4037 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4038     } else {
4039 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4040 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4041     }
4042     last OE;
4043     }
4044     } # OE
4045 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4046 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4047     unless defined $foster_parent_element;
4048     if (defined $prev_sibling and
4049     $prev_sibling->node_type == 3) {
4050     $prev_sibling->manakai_append_text ($token->{data});
4051     } else {
4052     $foster_parent_element->insert_before
4053     ($self->{document}->create_text_node ($token->{data}),
4054     $next_sibling);
4055     }
4056     } else {
4057 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4058 wakaba 1.1 }
4059    
4060     !!!next-token;
4061     redo B;
4062     } elsif ($token->{type} eq 'comment') {
4063     ## Copied from 'in table'
4064     my $comment = $self->{document}->create_comment ($token->{data});
4065 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4066 wakaba 1.1 !!!next-token;
4067     redo B;
4068     } elsif ($token->{type} eq 'start tag') {
4069     if ({
4070     tr => 1,
4071     th => 1, td => 1,
4072     }->{$token->{tag_name}}) {
4073 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
4074     !!!parse-error (type => 'missing start tag:tr');
4075     }
4076    
4077 wakaba 1.1 ## Clear back to table body context
4078     while (not {
4079     tbody => 1, tfoot => 1, thead => 1, html => 1,
4080 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4081     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4082     pop @{$self->{open_elements}};
4083 wakaba 1.1 }
4084    
4085 wakaba 1.3 $self->{insertion_mode} = 'in row';
4086 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
4087     !!!insert-element ($token->{tag_name}, $token->{attributes});
4088     !!!next-token;
4089     } else {
4090     !!!insert-element ('tr');
4091     ## reprocess
4092     }
4093     redo B;
4094     } elsif ({
4095     caption => 1, col => 1, colgroup => 1,
4096     tbody => 1, tfoot => 1, thead => 1,
4097     }->{$token->{tag_name}}) {
4098     ## have an element in table scope
4099     my $i;
4100 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4101     my $node = $self->{open_elements}->[$_];
4102 wakaba 1.1 if ({
4103     tbody => 1, thead => 1, tfoot => 1,
4104     }->{$node->[1]}) {
4105     $i = $_;
4106     last INSCOPE;
4107     } elsif ({
4108     table => 1, html => 1,
4109     }->{$node->[1]}) {
4110     last INSCOPE;
4111     }
4112     } # INSCOPE
4113     unless (defined $i) {
4114 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4115 wakaba 1.1 ## Ignore the token
4116     !!!next-token;
4117     redo B;
4118     }
4119    
4120     ## Clear back to table body context
4121     while (not {
4122     tbody => 1, tfoot => 1, thead => 1, html => 1,
4123 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4124     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4125     pop @{$self->{open_elements}};
4126 wakaba 1.1 }
4127    
4128     ## As if <{current node}>
4129     ## have an element in table scope
4130     ## true by definition
4131    
4132     ## Clear back to table body context
4133     ## nop by definition
4134    
4135 wakaba 1.3 pop @{$self->{open_elements}};
4136     $self->{insertion_mode} = 'in table';
4137 wakaba 1.1 ## reprocess
4138     redo B;
4139     } elsif ($token->{tag_name} eq 'table') {
4140     ## NOTE: This is a code clone of "table in table"
4141 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4142 wakaba 1.1
4143     ## As if </table>
4144     ## have a table element in table scope
4145     my $i;
4146 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4147     my $node = $self->{open_elements}->[$_];
4148 wakaba 1.1 if ($node->[1] eq 'table') {
4149     $i = $_;
4150     last INSCOPE;
4151     } elsif ({
4152     table => 1, html => 1,
4153     }->{$node->[1]}) {
4154     last INSCOPE;
4155     }
4156     } # INSCOPE
4157     unless (defined $i) {
4158 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4159 wakaba 1.1 ## Ignore tokens </table><table>
4160     !!!next-token;
4161     redo B;
4162     }
4163    
4164     ## generate implied end tags
4165     if ({
4166     dd => 1, dt => 1, li => 1, p => 1,
4167     td => 1, th => 1, tr => 1,
4168 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4169 wakaba 1.1 !!!back-token; # <table>
4170     $token = {type => 'end tag', tag_name => 'table'};
4171     !!!back-token;
4172     $token = {type => 'end tag',
4173 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4174 wakaba 1.1 redo B;
4175     }
4176    
4177 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4178     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4179 wakaba 1.1 }
4180    
4181 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4182 wakaba 1.1
4183 wakaba 1.3 $self->_reset_insertion_mode;
4184 wakaba 1.1
4185     ## reprocess
4186     redo B;
4187     } else {
4188     #
4189     }
4190     } elsif ($token->{type} eq 'end tag') {
4191     if ({
4192     tbody => 1, tfoot => 1, thead => 1,
4193     }->{$token->{tag_name}}) {
4194     ## have an element in table scope
4195     my $i;
4196 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4197     my $node = $self->{open_elements}->[$_];
4198 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4199     $i = $_;
4200     last INSCOPE;
4201     } elsif ({
4202     table => 1, html => 1,
4203     }->{$node->[1]}) {
4204     last INSCOPE;
4205     }
4206     } # INSCOPE
4207     unless (defined $i) {
4208 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4209 wakaba 1.1 ## Ignore the token
4210     !!!next-token;
4211     redo B;
4212     }
4213    
4214     ## Clear back to table body context
4215     while (not {
4216     tbody => 1, tfoot => 1, thead => 1, html => 1,
4217 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4218     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4219     pop @{$self->{open_elements}};
4220 wakaba 1.1 }
4221    
4222 wakaba 1.3 pop @{$self->{open_elements}};
4223     $self->{insertion_mode} = 'in table';
4224 wakaba 1.1 !!!next-token;
4225     redo B;
4226     } elsif ($token->{tag_name} eq 'table') {
4227     ## have an element in table scope
4228     my $i;
4229 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4230     my $node = $self->{open_elements}->[$_];
4231 wakaba 1.1 if ({
4232     tbody => 1, thead => 1, tfoot => 1,
4233     }->{$node->[1]}) {
4234     $i = $_;
4235     last INSCOPE;
4236     } elsif ({
4237     table => 1, html => 1,
4238     }->{$node->[1]}) {
4239     last INSCOPE;
4240     }
4241     } # INSCOPE
4242     unless (defined $i) {
4243 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4244 wakaba 1.1 ## Ignore the token
4245     !!!next-token;
4246     redo B;
4247     }
4248    
4249     ## Clear back to table body context
4250     while (not {
4251     tbody => 1, tfoot => 1, thead => 1, html => 1,
4252 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4253     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4254     pop @{$self->{open_elements}};
4255 wakaba 1.1 }
4256    
4257     ## As if <{current node}>
4258     ## have an element in table scope
4259     ## true by definition
4260    
4261     ## Clear back to table body context
4262     ## nop by definition
4263    
4264 wakaba 1.3 pop @{$self->{open_elements}};
4265     $self->{insertion_mode} = 'in table';
4266 wakaba 1.1 ## reprocess
4267     redo B;
4268     } elsif ({
4269     body => 1, caption => 1, col => 1, colgroup => 1,
4270     html => 1, td => 1, th => 1, tr => 1,
4271     }->{$token->{tag_name}}) {
4272 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4273 wakaba 1.1 ## Ignore the token
4274     !!!next-token;
4275     redo B;
4276     } else {
4277     #
4278     }
4279     } else {
4280     #
4281     }
4282    
4283     ## As if in table
4284 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4285 wakaba 1.1 $in_body->($insert_to_foster);
4286     redo B;
4287 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
4288 wakaba 1.1 if ($token->{type} eq 'character') {
4289     ## NOTE: This is a "character in table" code clone.
4290     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4291 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4292 wakaba 1.1
4293     unless (length $token->{data}) {
4294     !!!next-token;
4295     redo B;
4296     }
4297     }
4298    
4299 wakaba 1.3 !!!parse-error (type => 'in table:#character');
4300    
4301 wakaba 1.1 ## As if in body, but insert into foster parent element
4302     ## ISSUE: Spec says that "whenever a node would be inserted
4303     ## into the current node" while characters might not be
4304     ## result in a new Text node.
4305     $reconstruct_active_formatting_elements->($insert_to_foster);
4306    
4307     if ({
4308     table => 1, tbody => 1, tfoot => 1,
4309     thead => 1, tr => 1,
4310 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4311 wakaba 1.1 # MUST
4312     my $foster_parent_element;
4313     my $next_sibling;
4314     my $prev_sibling;
4315 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4316     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4317     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4318 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4319     $foster_parent_element = $parent;
4320 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4321 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4322     } else {
4323 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4324 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4325     }
4326     last OE;
4327     }
4328     } # OE
4329 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4330 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4331     unless defined $foster_parent_element;
4332     if (defined $prev_sibling and
4333     $prev_sibling->node_type == 3) {
4334     $prev_sibling->manakai_append_text ($token->{data});
4335     } else {
4336     $foster_parent_element->insert_before
4337     ($self->{document}->create_text_node ($token->{data}),
4338     $next_sibling);
4339     }
4340     } else {
4341 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4342 wakaba 1.1 }
4343    
4344     !!!next-token;
4345     redo B;
4346     } elsif ($token->{type} eq 'comment') {
4347     ## Copied from 'in table'
4348     my $comment = $self->{document}->create_comment ($token->{data});
4349 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4350 wakaba 1.1 !!!next-token;
4351     redo B;
4352     } elsif ($token->{type} eq 'start tag') {
4353     if ($token->{tag_name} eq 'th' or
4354     $token->{tag_name} eq 'td') {
4355     ## Clear back to table row context
4356     while (not {
4357     tr => 1, html => 1,
4358 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4359     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4360     pop @{$self->{open_elements}};
4361 wakaba 1.1 }
4362    
4363     !!!insert-element ($token->{tag_name}, $token->{attributes});
4364 wakaba 1.3 $self->{insertion_mode} = 'in cell';
4365 wakaba 1.1
4366     push @$active_formatting_elements, ['#marker', ''];
4367    
4368     !!!next-token;
4369     redo B;
4370     } elsif ({
4371     caption => 1, col => 1, colgroup => 1,
4372     tbody => 1, tfoot => 1, thead => 1, tr => 1,
4373     }->{$token->{tag_name}}) {
4374     ## As if </tr>
4375     ## have an element in table scope
4376     my $i;
4377 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4378     my $node = $self->{open_elements}->[$_];
4379 wakaba 1.1 if ($node->[1] eq 'tr') {
4380     $i = $_;
4381     last INSCOPE;
4382     } elsif ({
4383     table => 1, html => 1,
4384     }->{$node->[1]}) {
4385     last INSCOPE;
4386     }
4387     } # INSCOPE
4388     unless (defined $i) {
4389 wakaba 1.3 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4390 wakaba 1.1 ## Ignore the token
4391     !!!next-token;
4392     redo B;
4393     }
4394    
4395     ## Clear back to table row context
4396     while (not {
4397     tr => 1, html => 1,
4398 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4399     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4400     pop @{$self->{open_elements}};
4401 wakaba 1.1 }
4402    
4403 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4404     $self->{insertion_mode} = 'in table body';
4405 wakaba 1.1 ## reprocess
4406     redo B;
4407     } elsif ($token->{tag_name} eq 'table') {
4408     ## NOTE: This is a code clone of "table in table"
4409 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4410 wakaba 1.1
4411     ## As if </table>
4412     ## have a table element in table scope
4413     my $i;
4414 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4415     my $node = $self->{open_elements}->[$_];
4416 wakaba 1.1 if ($node->[1] eq 'table') {
4417     $i = $_;
4418     last INSCOPE;
4419     } elsif ({
4420     table => 1, html => 1,
4421     }->{$node->[1]}) {
4422     last INSCOPE;
4423     }
4424     } # INSCOPE
4425     unless (defined $i) {
4426 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4427 wakaba 1.1 ## Ignore tokens </table><table>
4428     !!!next-token;
4429     redo B;
4430     }
4431    
4432     ## generate implied end tags
4433     if ({
4434     dd => 1, dt => 1, li => 1, p => 1,
4435     td => 1, th => 1, tr => 1,
4436 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4437 wakaba 1.1 !!!back-token; # <table>
4438     $token = {type => 'end tag', tag_name => 'table'};
4439     !!!back-token;
4440     $token = {type => 'end tag',
4441 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4442 wakaba 1.1 redo B;
4443     }
4444    
4445 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4446     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4447 wakaba 1.1 }
4448    
4449 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4450 wakaba 1.1
4451 wakaba 1.3 $self->_reset_insertion_mode;
4452 wakaba 1.1
4453     ## reprocess
4454     redo B;
4455     } else {
4456     #
4457     }
4458     } elsif ($token->{type} eq 'end tag') {
4459     if ($token->{tag_name} eq 'tr') {
4460     ## have an element in table scope
4461     my $i;
4462 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4463     my $node = $self->{open_elements}->[$_];
4464 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4465     $i = $_;
4466     last INSCOPE;
4467     } elsif ({
4468     table => 1, html => 1,
4469     }->{$node->[1]}) {
4470     last INSCOPE;
4471     }
4472     } # INSCOPE
4473     unless (defined $i) {
4474 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4475 wakaba 1.1 ## Ignore the token
4476     !!!next-token;
4477     redo B;
4478     }
4479    
4480     ## Clear back to table row context
4481     while (not {
4482     tr => 1, html => 1,
4483 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4484     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4485     pop @{$self->{open_elements}};
4486 wakaba 1.1 }
4487    
4488 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4489     $self->{insertion_mode} = 'in table body';
4490 wakaba 1.1 !!!next-token;
4491     redo B;
4492     } elsif ($token->{tag_name} eq 'table') {
4493     ## As if </tr>
4494     ## have an element in table scope
4495     my $i;
4496 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4497     my $node = $self->{open_elements}->[$_];
4498 wakaba 1.1 if ($node->[1] eq 'tr') {
4499     $i = $_;
4500     last INSCOPE;
4501     } elsif ({
4502     table => 1, html => 1,
4503     }->{$node->[1]}) {
4504     last INSCOPE;
4505     }
4506     } # INSCOPE
4507     unless (defined $i) {
4508 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4509 wakaba 1.1 ## Ignore the token
4510     !!!next-token;
4511     redo B;
4512     }
4513    
4514     ## Clear back to table row context
4515     while (not {
4516     tr => 1, html => 1,
4517 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4518     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4519     pop @{$self->{open_elements}};
4520 wakaba 1.1 }
4521    
4522 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4523     $self->{insertion_mode} = 'in table body';
4524 wakaba 1.1 ## reprocess
4525     redo B;
4526     } elsif ({
4527     tbody => 1, tfoot => 1, thead => 1,
4528     }->{$token->{tag_name}}) {
4529     ## have an element in table scope
4530     my $i;
4531 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4532     my $node = $self->{open_elements}->[$_];
4533 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4534     $i = $_;
4535     last INSCOPE;
4536     } elsif ({
4537     table => 1, html => 1,
4538     }->{$node->[1]}) {
4539     last INSCOPE;
4540     }
4541     } # INSCOPE
4542     unless (defined $i) {
4543 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4544 wakaba 1.1 ## Ignore the token
4545     !!!next-token;
4546     redo B;
4547     }
4548    
4549     ## As if </tr>
4550     ## have an element in table scope
4551     my $i;
4552 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4553     my $node = $self->{open_elements}->[$_];
4554 wakaba 1.1 if ($node->[1] eq 'tr') {
4555     $i = $_;
4556     last INSCOPE;
4557     } elsif ({
4558     table => 1, html => 1,
4559     }->{$node->[1]}) {
4560     last INSCOPE;
4561     }
4562     } # INSCOPE
4563     unless (defined $i) {
4564 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:tr');
4565 wakaba 1.1 ## Ignore the token
4566     !!!next-token;
4567     redo B;
4568     }
4569    
4570     ## Clear back to table row context
4571     while (not {
4572     tr => 1, html => 1,
4573 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4574     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4575     pop @{$self->{open_elements}};
4576 wakaba 1.1 }
4577    
4578 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4579     $self->{insertion_mode} = 'in table body';
4580 wakaba 1.1 ## reprocess
4581     redo B;
4582     } elsif ({
4583     body => 1, caption => 1, col => 1,
4584     colgroup => 1, html => 1, td => 1, th => 1,
4585     }->{$token->{tag_name}}) {
4586 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4587 wakaba 1.1 ## Ignore the token
4588     !!!next-token;
4589     redo B;
4590     } else {
4591     #
4592     }
4593     } else {
4594     #
4595     }
4596    
4597     ## As if in table
4598 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4599 wakaba 1.1 $in_body->($insert_to_foster);
4600     redo B;
4601 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
4602 wakaba 1.1 if ($token->{type} eq 'character') {
4603     ## NOTE: This is a code clone of "character in body".
4604     $reconstruct_active_formatting_elements->($insert_to_current);
4605    
4606 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4607 wakaba 1.1
4608     !!!next-token;
4609     redo B;
4610     } elsif ($token->{type} eq 'comment') {
4611     ## NOTE: This is a code clone of "comment in body".
4612     my $comment = $self->{document}->create_comment ($token->{data});
4613 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4614 wakaba 1.1 !!!next-token;
4615     redo B;
4616     } elsif ($token->{type} eq 'start tag') {
4617     if ({
4618     caption => 1, col => 1, colgroup => 1,
4619     tbody => 1, td => 1, tfoot => 1, th => 1,
4620     thead => 1, tr => 1,
4621     }->{$token->{tag_name}}) {
4622     ## have an element in table scope
4623     my $tn;
4624 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4625     my $node = $self->{open_elements}->[$_];
4626 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4627     $tn = $node->[1];
4628     last INSCOPE;
4629     } elsif ({
4630     table => 1, html => 1,
4631     }->{$node->[1]}) {
4632     last INSCOPE;
4633     }
4634     } # INSCOPE
4635     unless (defined $tn) {
4636 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4637 wakaba 1.1 ## Ignore the token
4638     !!!next-token;
4639     redo B;
4640     }
4641    
4642     ## Close the cell
4643     !!!back-token; # <?>
4644     $token = {type => 'end tag', tag_name => $tn};
4645     redo B;
4646     } else {
4647     #
4648     }
4649     } elsif ($token->{type} eq 'end tag') {
4650     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4651     ## have an element in table scope
4652     my $i;
4653 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4654     my $node = $self->{open_elements}->[$_];
4655 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4656     $i = $_;
4657     last INSCOPE;
4658     } elsif ({
4659     table => 1, html => 1,
4660     }->{$node->[1]}) {
4661     last INSCOPE;
4662     }
4663     } # INSCOPE
4664     unless (defined $i) {
4665 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4666 wakaba 1.1 ## Ignore the token
4667     !!!next-token;
4668     redo B;
4669     }
4670    
4671     ## generate implied end tags
4672     if ({
4673     dd => 1, dt => 1, li => 1, p => 1,
4674     td => ($token->{tag_name} eq 'th'),
4675     th => ($token->{tag_name} eq 'td'),
4676     tr => 1,
4677 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4678 wakaba 1.1 !!!back-token;
4679     $token = {type => 'end tag',
4680 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4681 wakaba 1.1 redo B;
4682     }
4683    
4684 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4685     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4686 wakaba 1.1 }
4687    
4688 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4689 wakaba 1.1
4690     $clear_up_to_marker->();
4691    
4692 wakaba 1.3 $self->{insertion_mode} = 'in row';
4693 wakaba 1.1
4694     !!!next-token;
4695     redo B;
4696     } elsif ({
4697     body => 1, caption => 1, col => 1,
4698     colgroup => 1, html => 1,
4699     }->{$token->{tag_name}}) {
4700 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4701 wakaba 1.1 ## Ignore the token
4702     !!!next-token;
4703     redo B;
4704     } elsif ({
4705     table => 1, tbody => 1, tfoot => 1,
4706     thead => 1, tr => 1,
4707     }->{$token->{tag_name}}) {
4708     ## have an element in table scope
4709     my $i;
4710     my $tn;
4711 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4712     my $node = $self->{open_elements}->[$_];
4713 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4714     $i = $_;
4715     last INSCOPE;
4716     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4717     $tn = $node->[1];
4718     ## NOTE: There is exactly one |td| or |th| element
4719     ## in scope in the stack of open elements by definition.
4720     } elsif ({
4721     table => 1, html => 1,
4722     }->{$node->[1]}) {
4723     last INSCOPE;
4724     }
4725     } # INSCOPE
4726     unless (defined $i) {
4727 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4728 wakaba 1.1 ## Ignore the token
4729     !!!next-token;
4730     redo B;
4731     }
4732    
4733     ## Close the cell
4734     !!!back-token; # </?>
4735     $token = {type => 'end tag', tag_name => $tn};
4736     redo B;
4737     } else {
4738     #
4739     }
4740     } else {
4741     #
4742     }
4743    
4744     $in_body->($insert_to_current);
4745     redo B;
4746 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
4747 wakaba 1.1 if ($token->{type} eq 'character') {
4748 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4749 wakaba 1.1 !!!next-token;
4750     redo B;
4751     } elsif ($token->{type} eq 'comment') {
4752     my $comment = $self->{document}->create_comment ($token->{data});
4753 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4754 wakaba 1.1 !!!next-token;
4755     redo B;
4756     } elsif ($token->{type} eq 'start tag') {
4757     if ($token->{tag_name} eq 'option') {
4758 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4759 wakaba 1.1 ## As if </option>
4760 wakaba 1.3 pop @{$self->{open_elements}};
4761 wakaba 1.1 }
4762    
4763     !!!insert-element ($token->{tag_name}, $token->{attributes});
4764     !!!next-token;
4765     redo B;
4766     } elsif ($token->{tag_name} eq 'optgroup') {
4767 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4768 wakaba 1.1 ## As if </option>
4769 wakaba 1.3 pop @{$self->{open_elements}};
4770 wakaba 1.1 }
4771    
4772 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4773 wakaba 1.1 ## As if </optgroup>
4774 wakaba 1.3 pop @{$self->{open_elements}};
4775 wakaba 1.1 }
4776    
4777     !!!insert-element ($token->{tag_name}, $token->{attributes});
4778     !!!next-token;
4779     redo B;
4780     } elsif ($token->{tag_name} eq 'select') {
4781 wakaba 1.3 !!!parse-error (type => 'not closed:select');
4782 wakaba 1.1 ## As if </select> instead
4783     ## have an element in table scope
4784     my $i;
4785 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4786     my $node = $self->{open_elements}->[$_];
4787 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4788     $i = $_;
4789     last INSCOPE;
4790     } elsif ({
4791     table => 1, html => 1,
4792     }->{$node->[1]}) {
4793     last INSCOPE;
4794     }
4795     } # INSCOPE
4796     unless (defined $i) {
4797 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4798 wakaba 1.1 ## Ignore the token
4799     !!!next-token;
4800     redo B;
4801     }
4802    
4803 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4804 wakaba 1.1
4805 wakaba 1.3 $self->_reset_insertion_mode;
4806 wakaba 1.1
4807     !!!next-token;
4808     redo B;
4809     } else {
4810     #
4811     }
4812     } elsif ($token->{type} eq 'end tag') {
4813     if ($token->{tag_name} eq 'optgroup') {
4814 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4815     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4816 wakaba 1.1 ## As if </option>
4817 wakaba 1.3 splice @{$self->{open_elements}}, -2;
4818     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4819     pop @{$self->{open_elements}};
4820 wakaba 1.1 } else {
4821 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4822 wakaba 1.1 ## Ignore the token
4823     }
4824     !!!next-token;
4825     redo B;
4826     } elsif ($token->{tag_name} eq 'option') {
4827 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4828     pop @{$self->{open_elements}};
4829 wakaba 1.1 } else {
4830 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4831 wakaba 1.1 ## Ignore the token
4832     }
4833     !!!next-token;
4834     redo B;
4835     } elsif ($token->{tag_name} eq 'select') {
4836     ## have an element in table scope
4837     my $i;
4838 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4839     my $node = $self->{open_elements}->[$_];
4840 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4841     $i = $_;
4842     last INSCOPE;
4843     } elsif ({
4844     table => 1, html => 1,
4845     }->{$node->[1]}) {
4846     last INSCOPE;
4847     }
4848     } # INSCOPE
4849     unless (defined $i) {
4850 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4851 wakaba 1.1 ## Ignore the token
4852     !!!next-token;
4853     redo B;
4854     }
4855    
4856 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4857 wakaba 1.1
4858 wakaba 1.3 $self->_reset_insertion_mode;
4859 wakaba 1.1
4860     !!!next-token;
4861     redo B;
4862     } elsif ({
4863     caption => 1, table => 1, tbody => 1,
4864     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4865     }->{$token->{tag_name}}) {
4866 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4867 wakaba 1.1
4868     ## have an element in table scope
4869     my $i;
4870 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4871     my $node = $self->{open_elements}->[$_];
4872 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4873     $i = $_;
4874     last INSCOPE;
4875     } elsif ({
4876     table => 1, html => 1,
4877     }->{$node->[1]}) {
4878     last INSCOPE;
4879     }
4880     } # INSCOPE
4881     unless (defined $i) {
4882     ## Ignore the token
4883     !!!next-token;
4884     redo B;
4885     }
4886    
4887     ## As if </select>
4888     ## have an element in table scope
4889     undef $i;
4890 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4891     my $node = $self->{open_elements}->[$_];
4892 wakaba 1.1 if ($node->[1] eq 'select') {
4893     $i = $_;
4894     last INSCOPE;
4895     } elsif ({
4896     table => 1, html => 1,
4897     }->{$node->[1]}) {
4898     last INSCOPE;
4899     }
4900     } # INSCOPE
4901     unless (defined $i) {
4902 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4903 wakaba 1.1 ## Ignore the </select> token
4904     !!!next-token; ## TODO: ok?
4905     redo B;
4906     }
4907    
4908 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4909 wakaba 1.1
4910 wakaba 1.3 $self->_reset_insertion_mode;
4911 wakaba 1.1
4912     ## reprocess
4913     redo B;
4914     } else {
4915     #
4916     }
4917     } else {
4918     #
4919     }
4920    
4921 wakaba 1.3 !!!parse-error (type => 'in select:'.$token->{tag_name});
4922 wakaba 1.1 ## Ignore the token
4923     !!!next-token;
4924     redo B;
4925 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
4926 wakaba 1.1 if ($token->{type} eq 'character') {
4927     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4928     ## As if in body
4929     $reconstruct_active_formatting_elements->($insert_to_current);
4930    
4931 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4932 wakaba 1.1
4933     unless (length $token->{data}) {
4934     !!!next-token;
4935     redo B;
4936     }
4937     }
4938    
4939     #
4940 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4941 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
4942     my $comment = $self->{document}->create_comment ($token->{data});
4943 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
4944 wakaba 1.1 !!!next-token;
4945     redo B;
4946 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
4947     !!!parse-error (type => 'after body:'.$token->{tag_name});
4948     #
4949 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
4950     if ($token->{tag_name} eq 'html') {
4951 wakaba 1.3 if (defined $self->{inner_html_node}) {
4952     !!!parse-error (type => 'unmatched end tag:html');
4953     ## Ignore the token
4954     !!!next-token;
4955     redo B;
4956     } else {
4957     $phase = 'trailing end';
4958     !!!next-token;
4959     redo B;
4960     }
4961 wakaba 1.1 } else {
4962 wakaba 1.3 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4963 wakaba 1.1 }
4964     } else {
4965 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4966 wakaba 1.1 }
4967    
4968 wakaba 1.3 $self->{insertion_mode} = 'in body';
4969 wakaba 1.1 ## reprocess
4970     redo B;
4971 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
4972 wakaba 1.1 if ($token->{type} eq 'character') {
4973     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4974 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4975 wakaba 1.1
4976     unless (length $token->{data}) {
4977     !!!next-token;
4978     redo B;
4979     }
4980     }
4981    
4982     #
4983     } elsif ($token->{type} eq 'comment') {
4984     my $comment = $self->{document}->create_comment ($token->{data});
4985 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4986 wakaba 1.1 !!!next-token;
4987     redo B;
4988     } elsif ($token->{type} eq 'start tag') {
4989     if ($token->{tag_name} eq 'frameset') {
4990     !!!insert-element ($token->{tag_name}, $token->{attributes});
4991     !!!next-token;
4992     redo B;
4993     } elsif ($token->{tag_name} eq 'frame') {
4994     !!!insert-element ($token->{tag_name}, $token->{attributes});
4995 wakaba 1.3 pop @{$self->{open_elements}};
4996 wakaba 1.1 !!!next-token;
4997     redo B;
4998     } elsif ($token->{tag_name} eq 'noframes') {
4999     $in_body->($insert_to_current);
5000     redo B;
5001     } else {
5002     #
5003     }
5004     } elsif ($token->{type} eq 'end tag') {
5005     if ($token->{tag_name} eq 'frameset') {
5006 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
5007     @{$self->{open_elements}} == 1) {
5008     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
5009 wakaba 1.1 ## Ignore the token
5010     !!!next-token;
5011     } else {
5012 wakaba 1.3 pop @{$self->{open_elements}};
5013 wakaba 1.1 !!!next-token;
5014     }
5015    
5016     ## if not inner_html and
5017 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
5018     $self->{insertion_mode} = 'after frameset';
5019 wakaba 1.1 }
5020     redo B;
5021     } else {
5022     #
5023     }
5024     } else {
5025     #
5026     }
5027    
5028 wakaba 1.3 if (defined $token->{tag_name}) {
5029 wakaba 1.30 !!!parse-error (type => 'in frameset:'.($token->{type} eq 'end tag' ? '/' : '').$token->{tag_name});
5030 wakaba 1.3 } else {
5031     !!!parse-error (type => 'in frameset:#'.$token->{type});
5032     }
5033 wakaba 1.1 ## Ignore the token
5034     !!!next-token;
5035     redo B;
5036 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
5037 wakaba 1.1 if ($token->{type} eq 'character') {
5038     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5039 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5040 wakaba 1.1
5041     unless (length $token->{data}) {
5042     !!!next-token;
5043     redo B;
5044     }
5045     }
5046    
5047     #
5048     } elsif ($token->{type} eq 'comment') {
5049     my $comment = $self->{document}->create_comment ($token->{data});
5050 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5051 wakaba 1.1 !!!next-token;
5052     redo B;
5053     } elsif ($token->{type} eq 'start tag') {
5054     if ($token->{tag_name} eq 'noframes') {
5055     $in_body->($insert_to_current);
5056     redo B;
5057     } else {
5058     #
5059     }
5060     } elsif ($token->{type} eq 'end tag') {
5061     if ($token->{tag_name} eq 'html') {
5062     $phase = 'trailing end';
5063     !!!next-token;
5064     redo B;
5065     } else {
5066     #
5067     }
5068     } else {
5069     #
5070     }
5071    
5072 wakaba 1.3 if (defined $token->{tag_name}) {
5073 wakaba 1.30 !!!parse-error (type => 'after frameset:'.($token->{tag_name} eq 'end tag' ? '/' : '').$token->{tag_name});
5074 wakaba 1.3 } else {
5075     !!!parse-error (type => 'after frameset:#'.$token->{type});
5076     }
5077 wakaba 1.1 ## Ignore the token
5078     !!!next-token;
5079     redo B;
5080    
5081     ## ISSUE: An issue in spec there
5082     } else {
5083 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
5084 wakaba 1.1 }
5085     }
5086     } elsif ($phase eq 'trailing end') {
5087     ## states in the main stage is preserved yet # MUST
5088    
5089     if ($token->{type} eq 'DOCTYPE') {
5090 wakaba 1.3 !!!parse-error (type => 'after html:#DOCTYPE');
5091 wakaba 1.1 ## Ignore the token
5092     !!!next-token;
5093     redo B;
5094     } elsif ($token->{type} eq 'comment') {
5095     my $comment = $self->{document}->create_comment ($token->{data});
5096     $self->{document}->append_child ($comment);
5097     !!!next-token;
5098     redo B;
5099     } elsif ($token->{type} eq 'character') {
5100     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5101     my $data = $1;
5102     ## As if in the main phase.
5103     ## NOTE: The insertion mode in the main phase
5104     ## just before the phase has been changed to the trailing
5105     ## end phase is either "after body" or "after frameset".
5106     $reconstruct_active_formatting_elements->($insert_to_current)
5107     if $phase eq 'main';
5108    
5109 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
5110 wakaba 1.1
5111     unless (length $token->{data}) {
5112     !!!next-token;
5113     redo B;
5114     }
5115     }
5116    
5117 wakaba 1.3 !!!parse-error (type => 'after html:#character');
5118 wakaba 1.1 $phase = 'main';
5119     ## reprocess
5120     redo B;
5121     } elsif ($token->{type} eq 'start tag' or
5122     $token->{type} eq 'end tag') {
5123 wakaba 1.30 !!!parse-error (type => 'after html:'.($token->{type} eq 'end tag' ? '/' : '').$token->{tag_name});
5124 wakaba 1.1 $phase = 'main';
5125     ## reprocess
5126     redo B;
5127     } elsif ($token->{type} eq 'end-of-file') {
5128     ## Stop parsing
5129     last B;
5130     } else {
5131     die "$0: $token->{type}: Unknown token";
5132     }
5133     }
5134     } # B
5135    
5136     ## Stop parsing # MUST
5137    
5138     ## TODO: script stuffs
5139 wakaba 1.3 } # _tree_construct_main
5140    
5141     sub set_inner_html ($$$) {
5142     my $class = shift;
5143     my $node = shift;
5144     my $s = \$_[0];
5145     my $onerror = $_[1];
5146    
5147     my $nt = $node->node_type;
5148     if ($nt == 9) {
5149     # MUST
5150    
5151     ## Step 1 # MUST
5152     ## TODO: If the document has an active parser, ...
5153     ## ISSUE: There is an issue in the spec.
5154    
5155     ## Step 2 # MUST
5156     my @cn = @{$node->child_nodes};
5157     for (@cn) {
5158     $node->remove_child ($_);
5159     }
5160    
5161     ## Step 3, 4, 5 # MUST
5162     $class->parse_string ($$s => $node, $onerror);
5163     } elsif ($nt == 1) {
5164     ## TODO: If non-html element
5165    
5166     ## NOTE: Most of this code is copied from |parse_string|
5167    
5168     ## Step 1 # MUST
5169 wakaba 1.14 my $this_doc = $node->owner_document;
5170     my $doc = $this_doc->implementation->create_document;
5171 wakaba 1.18 $doc->manakai_is_html (1);
5172 wakaba 1.3 my $p = $class->new;
5173     $p->{document} = $doc;
5174    
5175     ## Step 9 # MUST
5176     my $i = 0;
5177     my $line = 1;
5178     my $column = 0;
5179     $p->{set_next_input_character} = sub {
5180     my $self = shift;
5181 wakaba 1.14
5182     pop @{$self->{prev_input_character}};
5183     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
5184    
5185 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
5186     $self->{next_input_character} = ord substr $$s, $i++, 1;
5187     $column++;
5188 wakaba 1.4
5189     if ($self->{next_input_character} == 0x000A) { # LF
5190     $line++;
5191     $column = 0;
5192     } elsif ($self->{next_input_character} == 0x000D) { # CR
5193 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
5194 wakaba 1.3 $self->{next_input_character} = 0x000A; # LF # MUST
5195     $line++;
5196 wakaba 1.4 $column = 0;
5197 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
5198     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5199     } elsif ($self->{next_input_character} == 0x0000) { # NULL
5200 wakaba 1.14 !!!parse-error (type => 'NULL');
5201 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5202     }
5203     };
5204 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
5205     $p->{next_input_character} = -1;
5206 wakaba 1.3
5207     my $ponerror = $onerror || sub {
5208     my (%opt) = @_;
5209     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
5210     };
5211     $p->{parse_error} = sub {
5212     $ponerror->(@_, line => $line, column => $column);
5213     };
5214    
5215     $p->_initialize_tokenizer;
5216     $p->_initialize_tree_constructor;
5217    
5218     ## Step 2
5219     my $node_ln = $node->local_name;
5220     $p->{content_model_flag} = {
5221     title => 'RCDATA',
5222     textarea => 'RCDATA',
5223     style => 'CDATA',
5224     script => 'CDATA',
5225     xmp => 'CDATA',
5226     iframe => 'CDATA',
5227     noembed => 'CDATA',
5228     noframes => 'CDATA',
5229     noscript => 'CDATA',
5230     plaintext => 'PLAINTEXT',
5231     }->{$node_ln} || 'PCDATA';
5232     ## ISSUE: What is "the name of the element"? local name?
5233    
5234     $p->{inner_html_node} = [$node, $node_ln];
5235    
5236     ## Step 4
5237     my $root = $doc->create_element_ns
5238     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5239    
5240     ## Step 5 # MUST
5241     $doc->append_child ($root);
5242    
5243     ## Step 6 # MUST
5244     push @{$p->{open_elements}}, [$root, 'html'];
5245    
5246     undef $p->{head_element};
5247    
5248     ## Step 7 # MUST
5249     $p->_reset_insertion_mode;
5250    
5251     ## Step 8 # MUST
5252     my $anode = $node;
5253     AN: while (defined $anode) {
5254     if ($anode->node_type == 1) {
5255     my $nsuri = $anode->namespace_uri;
5256     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5257     if ($anode->local_name eq 'form') { ## TODO: case?
5258     $p->{form_element} = $anode;
5259     last AN;
5260     }
5261     }
5262     }
5263     $anode = $anode->parent_node;
5264     } # AN
5265    
5266     ## Step 3 # MUST
5267     ## Step 10 # MUST
5268     {
5269     my $self = $p;
5270     !!!next-token;
5271     }
5272     $p->_tree_construction_main;
5273    
5274     ## Step 11 # MUST
5275     my @cn = @{$node->child_nodes};
5276     for (@cn) {
5277     $node->remove_child ($_);
5278     }
5279     ## ISSUE: mutation events? read-only?
5280    
5281     ## Step 12 # MUST
5282     @cn = @{$root->child_nodes};
5283     for (@cn) {
5284 wakaba 1.14 $this_doc->adopt_node ($_);
5285 wakaba 1.3 $node->append_child ($_);
5286     }
5287 wakaba 1.14 ## ISSUE: mutation events?
5288 wakaba 1.3
5289     $p->_terminate_tree_constructor;
5290     } else {
5291     die "$0: |set_inner_html| is not defined for node of type $nt";
5292     }
5293     } # set_inner_html
5294    
5295     } # tree construction stage
5296 wakaba 1.1
5297     sub get_inner_html ($$$) {
5298 wakaba 1.3 my (undef, $node, $on_error) = @_;
5299 wakaba 1.1
5300     ## Step 1
5301     my $s = '';
5302    
5303     my $in_cdata;
5304     my $parent = $node;
5305     while (defined $parent) {
5306     if ($parent->node_type == 1 and
5307     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
5308     {
5309     style => 1, script => 1, xmp => 1, iframe => 1,
5310     noembed => 1, noframes => 1, noscript => 1,
5311     }->{$parent->local_name}) { ## TODO: case thingy
5312     $in_cdata = 1;
5313     }
5314     $parent = $parent->parent_node;
5315     }
5316    
5317     ## Step 2
5318     my @node = @{$node->child_nodes};
5319     C: while (@node) {
5320     my $child = shift @node;
5321     unless (ref $child) {
5322     if ($child eq 'cdata-out') {
5323     $in_cdata = 0;
5324     } else {
5325     $s .= $child; # end tag
5326     }
5327     next C;
5328     }
5329    
5330     my $nt = $child->node_type;
5331     if ($nt == 1) { # Element
5332 wakaba 1.27 my $tag_name = $child->tag_name; ## TODO: manakai_tag_name
5333 wakaba 1.1 $s .= '<' . $tag_name;
5334 wakaba 1.27 ## NOTE: Non-HTML case:
5335     ## <http://permalink.gmane.org/gmane.org.w3c.whatwg.discuss/11191>
5336 wakaba 1.1
5337     my @attrs = @{$child->attributes}; # sort order MUST be stable
5338     for my $attr (@attrs) { # order is implementation dependent
5339 wakaba 1.27 my $attr_name = $attr->name; ## TODO: manakai_name
5340 wakaba 1.1 $s .= ' ' . $attr_name . '="';
5341     my $attr_value = $attr->value;
5342     ## escape
5343     $attr_value =~ s/&/&amp;/g;
5344     $attr_value =~ s/</&lt;/g;
5345     $attr_value =~ s/>/&gt;/g;
5346     $attr_value =~ s/"/&quot;/g;
5347     $s .= $attr_value . '"';
5348     }
5349     $s .= '>';
5350    
5351     next C if {
5352     area => 1, base => 1, basefont => 1, bgsound => 1,
5353     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
5354     img => 1, input => 1, link => 1, meta => 1, param => 1,
5355     spacer => 1, wbr => 1,
5356     }->{$tag_name};
5357    
5358 wakaba 1.23 $s .= "\x0A" if $tag_name eq 'pre' or $tag_name eq 'textarea';
5359    
5360 wakaba 1.1 if (not $in_cdata and {
5361     style => 1, script => 1, xmp => 1, iframe => 1,
5362     noembed => 1, noframes => 1, noscript => 1,
5363 wakaba 1.26 plaintext => 1,
5364 wakaba 1.1 }->{$tag_name}) {
5365     unshift @node, 'cdata-out';
5366     $in_cdata = 1;
5367     }
5368    
5369     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
5370     } elsif ($nt == 3 or $nt == 4) {
5371     if ($in_cdata) {
5372     $s .= $child->data;
5373     } else {
5374     my $value = $child->data;
5375     $value =~ s/&/&amp;/g;
5376     $value =~ s/</&lt;/g;
5377     $value =~ s/>/&gt;/g;
5378     $value =~ s/"/&quot;/g;
5379     $s .= $value;
5380     }
5381     } elsif ($nt == 8) {
5382     $s .= '<!--' . $child->data . '-->';
5383     } elsif ($nt == 10) {
5384     $s .= '<!DOCTYPE ' . $child->name . '>';
5385     } elsif ($nt == 5) { # entrefs
5386     push @node, @{$child->child_nodes};
5387     } else {
5388     $on_error->($child) if defined $on_error;
5389     }
5390     ## ISSUE: This code does not support PIs.
5391     } # C
5392    
5393     ## Step 3
5394     return \$s;
5395     } # get_inner_html
5396    
5397     1;
5398 wakaba 1.30 # $Date: 2007/06/25 11:05:57 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24