/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.17 - (hide annotations) (download) (as text)
Sat Jun 23 08:15:21 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.16: +8 -15 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	23 Jun 2007 08:10:55 -0000
	* tokenizer-test-1.test: Tests for |<| in tags are
	added.  (HTML5 revisions 900, 901, 902, and 911.)

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	23 Jun 2007 07:51:12 -0000
	* HTML.pm.src: HTML5 revisions 900, 901, 902, and 911 (<
	in tags).

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.17 our $VERSION=do{my @r=(q$Revision: 1.16 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is an early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21 wakaba 1.4 my $c1_entity_char = {
22 wakaba 1.10 0x80 => 0x20AC,
23     0x81 => 0xFFFD,
24     0x82 => 0x201A,
25     0x83 => 0x0192,
26     0x84 => 0x201E,
27     0x85 => 0x2026,
28     0x86 => 0x2020,
29     0x87 => 0x2021,
30     0x88 => 0x02C6,
31     0x89 => 0x2030,
32     0x8A => 0x0160,
33     0x8B => 0x2039,
34     0x8C => 0x0152,
35     0x8D => 0xFFFD,
36     0x8E => 0x017D,
37     0x8F => 0xFFFD,
38     0x90 => 0xFFFD,
39     0x91 => 0x2018,
40     0x92 => 0x2019,
41     0x93 => 0x201C,
42     0x94 => 0x201D,
43     0x95 => 0x2022,
44     0x96 => 0x2013,
45     0x97 => 0x2014,
46     0x98 => 0x02DC,
47     0x99 => 0x2122,
48     0x9A => 0x0161,
49     0x9B => 0x203A,
50     0x9C => 0x0153,
51     0x9D => 0xFFFD,
52     0x9E => 0x017E,
53     0x9F => 0x0178,
54 wakaba 1.4 }; # $c1_entity_char
55 wakaba 1.1
56     my $special_category = {
57     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
58     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
59     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
60     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
61     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
62     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
63     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
64     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
65     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
66     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
67     };
68     my $scoping_category = {
69     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
70     table => 1, td => 1, th => 1,
71     };
72     my $formatting_category = {
73     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
74     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
75     };
76     # $phrasing_category: all other elements
77    
78     sub parse_string ($$$;$) {
79     my $self = shift->new;
80     my $s = \$_[0];
81     $self->{document} = $_[1];
82    
83 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
84    
85 wakaba 1.1 my $i = 0;
86 wakaba 1.3 my $line = 1;
87     my $column = 0;
88 wakaba 1.1 $self->{set_next_input_character} = sub {
89     my $self = shift;
90 wakaba 1.13
91     pop @{$self->{prev_input_character}};
92     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
93    
94 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
95     $self->{next_input_character} = ord substr $$s, $i++, 1;
96 wakaba 1.3 $column++;
97 wakaba 1.1
98 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
99     $line++;
100     $column = 0;
101     } elsif ($self->{next_input_character} == 0x000D) { # CR
102 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
103 wakaba 1.1 $self->{next_input_character} = 0x000A; # LF # MUST
104 wakaba 1.3 $line++;
105 wakaba 1.4 $column = 0;
106 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
107     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
108     } elsif ($self->{next_input_character} == 0x0000) { # NULL
109 wakaba 1.8 !!!parse-error (type => 'NULL');
110 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
111     }
112     };
113 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
114     $self->{next_input_character} = -1;
115 wakaba 1.1
116 wakaba 1.3 my $onerror = $_[2] || sub {
117     my (%opt) = @_;
118     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
119     };
120     $self->{parse_error} = sub {
121     $onerror->(@_, line => $line, column => $column);
122 wakaba 1.1 };
123    
124     $self->_initialize_tokenizer;
125     $self->_initialize_tree_constructor;
126     $self->_construct_tree;
127     $self->_terminate_tree_constructor;
128    
129     return $self->{document};
130     } # parse_string
131    
132     sub new ($) {
133     my $class = shift;
134     my $self = bless {}, $class;
135     $self->{set_next_input_character} = sub {
136     $self->{next_input_character} = -1;
137     };
138     $self->{parse_error} = sub {
139     #
140     };
141     return $self;
142     } # new
143    
144     ## Implementations MUST act as if state machine in the spec
145    
146     sub _initialize_tokenizer ($) {
147     my $self = shift;
148     $self->{state} = 'data'; # MUST
149     $self->{content_model_flag} = 'PCDATA'; # be
150     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
151     undef $self->{current_attribute};
152     undef $self->{last_emitted_start_tag_name};
153     undef $self->{last_attribute_value_state};
154     $self->{char} = [];
155     # $self->{next_input_character}
156     !!!next-input-character;
157     $self->{token} = [];
158     } # _initialize_tokenizer
159    
160     ## A token has:
161     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
162     ## 'character', or 'end-of-file'
163     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
164     ## ISSUE: the spec need s/tagname/tag name/
165     ## ->{error} == 1 or 0 (DOCTYPE)
166     ## ->{attributes} isa HASH (start tag, end tag)
167     ## ->{data} (comment, character)
168    
169     ## Macros
170     ## Macros MUST be preceded by three EXCLAMATION MARKs.
171     ## emit ($token)
172     ## Emits the specified token.
173    
174     ## Emitted token MUST immediately be handled by the tree construction state.
175    
176     ## Before each step, UA MAY check to see if either one of the scripts in
177     ## "list of scripts that will execute as soon as possible" or the first
178     ## script in the "list of scripts that will execute asynchronously",
179     ## has completed loading. If one has, then it MUST be executed
180     ## and removed from the list.
181    
182     sub _get_next_token ($) {
183     my $self = shift;
184     if (@{$self->{token}}) {
185     return shift @{$self->{token}};
186     }
187    
188     A: {
189     if ($self->{state} eq 'data') {
190     if ($self->{next_input_character} == 0x0026) { # &
191     if ($self->{content_model_flag} eq 'PCDATA' or
192     $self->{content_model_flag} eq 'RCDATA') {
193     $self->{state} = 'entity data';
194     !!!next-input-character;
195     redo A;
196     } else {
197     #
198     }
199 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
200     if ($self->{content_model_flag} eq 'RCDATA' or
201     $self->{content_model_flag} eq 'CDATA') {
202     unless ($self->{escape}) {
203     if ($self->{prev_input_character}->[0] == 0x002D and # -
204     $self->{prev_input_character}->[1] == 0x0021 and # !
205     $self->{prev_input_character}->[2] == 0x003C) { # <
206     $self->{escape} = 1;
207     }
208     }
209     }
210    
211     #
212 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
213 wakaba 1.13 if ($self->{content_model_flag} eq 'PCDATA' or
214     (($self->{content_model_flag} eq 'CDATA' or
215     $self->{content_model_flag} eq 'RCDATA') and
216     not $self->{escape})) {
217 wakaba 1.1 $self->{state} = 'tag open';
218     !!!next-input-character;
219     redo A;
220     } else {
221     #
222     }
223 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
224     if ($self->{escape} and
225     ($self->{content_model_flag} eq 'RCDATA' or
226     $self->{content_model_flag} eq 'CDATA')) {
227     if ($self->{prev_input_character}->[0] == 0x002D and # -
228     $self->{prev_input_character}->[1] == 0x002D) { # -
229     delete $self->{escape};
230     }
231     }
232    
233     #
234 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
235     !!!emit ({type => 'end-of-file'});
236     last A; ## TODO: ok?
237     }
238     # Anything else
239     my $token = {type => 'character',
240     data => chr $self->{next_input_character}};
241     ## Stay in the data state
242     !!!next-input-character;
243    
244     !!!emit ($token);
245    
246     redo A;
247     } elsif ($self->{state} eq 'entity data') {
248     ## (cannot happen in CDATA state)
249    
250     my $token = $self->_tokenize_attempt_to_consume_an_entity;
251    
252     $self->{state} = 'data';
253     # next-input-character is already done
254    
255     unless (defined $token) {
256     !!!emit ({type => 'character', data => '&'});
257     } else {
258     !!!emit ($token);
259     }
260    
261     redo A;
262     } elsif ($self->{state} eq 'tag open') {
263     if ($self->{content_model_flag} eq 'RCDATA' or
264     $self->{content_model_flag} eq 'CDATA') {
265     if ($self->{next_input_character} == 0x002F) { # /
266     !!!next-input-character;
267     $self->{state} = 'close tag open';
268     redo A;
269     } else {
270     ## reconsume
271     $self->{state} = 'data';
272    
273     !!!emit ({type => 'character', data => '<'});
274    
275     redo A;
276     }
277     } elsif ($self->{content_model_flag} eq 'PCDATA') {
278     if ($self->{next_input_character} == 0x0021) { # !
279     $self->{state} = 'markup declaration open';
280     !!!next-input-character;
281     redo A;
282     } elsif ($self->{next_input_character} == 0x002F) { # /
283     $self->{state} = 'close tag open';
284     !!!next-input-character;
285     redo A;
286     } elsif (0x0041 <= $self->{next_input_character} and
287     $self->{next_input_character} <= 0x005A) { # A..Z
288     $self->{current_token}
289     = {type => 'start tag',
290     tag_name => chr ($self->{next_input_character} + 0x0020)};
291     $self->{state} = 'tag name';
292     !!!next-input-character;
293     redo A;
294     } elsif (0x0061 <= $self->{next_input_character} and
295     $self->{next_input_character} <= 0x007A) { # a..z
296     $self->{current_token} = {type => 'start tag',
297     tag_name => chr ($self->{next_input_character})};
298     $self->{state} = 'tag name';
299     !!!next-input-character;
300     redo A;
301     } elsif ($self->{next_input_character} == 0x003E) { # >
302 wakaba 1.3 !!!parse-error (type => 'empty start tag');
303 wakaba 1.1 $self->{state} = 'data';
304     !!!next-input-character;
305    
306     !!!emit ({type => 'character', data => '<>'});
307    
308     redo A;
309     } elsif ($self->{next_input_character} == 0x003F) { # ?
310 wakaba 1.3 !!!parse-error (type => 'pio');
311 wakaba 1.1 $self->{state} = 'bogus comment';
312     ## $self->{next_input_character} is intentionally left as is
313     redo A;
314     } else {
315 wakaba 1.3 !!!parse-error (type => 'bare stago');
316 wakaba 1.1 $self->{state} = 'data';
317     ## reconsume
318    
319     !!!emit ({type => 'character', data => '<'});
320    
321     redo A;
322     }
323     } else {
324     die "$0: $self->{content_model_flag}: Unknown content model flag";
325     }
326     } elsif ($self->{state} eq 'close tag open') {
327     if ($self->{content_model_flag} eq 'RCDATA' or
328     $self->{content_model_flag} eq 'CDATA') {
329     my @next_char;
330     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
331     push @next_char, $self->{next_input_character};
332     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
333     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
334     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
335     !!!next-input-character;
336     next TAGNAME;
337     } else {
338 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
339 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
340     !!!back-next-input-character (@next_char);
341     $self->{state} = 'data';
342    
343     !!!emit ({type => 'character', data => '</'});
344    
345     redo A;
346     }
347     }
348     push @next_char, $self->{next_input_character};
349    
350     unless ($self->{next_input_character} == 0x0009 or # HT
351     $self->{next_input_character} == 0x000A or # LF
352     $self->{next_input_character} == 0x000B or # VT
353     $self->{next_input_character} == 0x000C or # FF
354     $self->{next_input_character} == 0x0020 or # SP
355     $self->{next_input_character} == 0x003E or # >
356     $self->{next_input_character} == 0x002F or # /
357     $self->{next_input_character} == -1) {
358 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
359 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
360     !!!back-next-input-character (@next_char);
361     $self->{state} = 'data';
362    
363     !!!emit ({type => 'character', data => '</'});
364    
365     redo A;
366     } else {
367     $self->{next_input_character} = shift @next_char;
368     !!!back-next-input-character (@next_char);
369     # and consume...
370     }
371     }
372    
373     if (0x0041 <= $self->{next_input_character} and
374     $self->{next_input_character} <= 0x005A) { # A..Z
375     $self->{current_token} = {type => 'end tag',
376     tag_name => chr ($self->{next_input_character} + 0x0020)};
377     $self->{state} = 'tag name';
378     !!!next-input-character;
379     redo A;
380     } elsif (0x0061 <= $self->{next_input_character} and
381     $self->{next_input_character} <= 0x007A) { # a..z
382     $self->{current_token} = {type => 'end tag',
383     tag_name => chr ($self->{next_input_character})};
384     $self->{state} = 'tag name';
385     !!!next-input-character;
386     redo A;
387     } elsif ($self->{next_input_character} == 0x003E) { # >
388 wakaba 1.3 !!!parse-error (type => 'empty end tag');
389 wakaba 1.1 $self->{state} = 'data';
390     !!!next-input-character;
391     redo A;
392     } elsif ($self->{next_input_character} == -1) {
393 wakaba 1.3 !!!parse-error (type => 'bare etago');
394 wakaba 1.1 $self->{state} = 'data';
395     # reconsume
396    
397     !!!emit ({type => 'character', data => '</'});
398    
399     redo A;
400     } else {
401 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
402 wakaba 1.1 $self->{state} = 'bogus comment';
403     ## $self->{next_input_character} is intentionally left as is
404     redo A;
405     }
406     } elsif ($self->{state} eq 'tag name') {
407     if ($self->{next_input_character} == 0x0009 or # HT
408     $self->{next_input_character} == 0x000A or # LF
409     $self->{next_input_character} == 0x000B or # VT
410     $self->{next_input_character} == 0x000C or # FF
411     $self->{next_input_character} == 0x0020) { # SP
412     $self->{state} = 'before attribute name';
413     !!!next-input-character;
414     redo A;
415     } elsif ($self->{next_input_character} == 0x003E) { # >
416     if ($self->{current_token}->{type} eq 'start tag') {
417     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
418     } elsif ($self->{current_token}->{type} eq 'end tag') {
419     $self->{content_model_flag} = 'PCDATA'; # MUST
420     if ($self->{current_token}->{attributes}) {
421 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
422 wakaba 1.1 }
423     } else {
424     die "$0: $self->{current_token}->{type}: Unknown token type";
425     }
426     $self->{state} = 'data';
427     !!!next-input-character;
428    
429     !!!emit ($self->{current_token}); # start tag or end tag
430     undef $self->{current_token};
431    
432     redo A;
433     } elsif (0x0041 <= $self->{next_input_character} and
434     $self->{next_input_character} <= 0x005A) { # A..Z
435     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
436     # start tag or end tag
437     ## Stay in this state
438     !!!next-input-character;
439     redo A;
440 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
441 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
442 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
443     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
444     } elsif ($self->{current_token}->{type} eq 'end tag') {
445     $self->{content_model_flag} = 'PCDATA'; # MUST
446     if ($self->{current_token}->{attributes}) {
447 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
448 wakaba 1.1 }
449     } else {
450     die "$0: $self->{current_token}->{type}: Unknown token type";
451     }
452     $self->{state} = 'data';
453     # reconsume
454    
455     !!!emit ($self->{current_token}); # start tag or end tag
456     undef $self->{current_token};
457    
458     redo A;
459     } elsif ($self->{next_input_character} == 0x002F) { # /
460     !!!next-input-character;
461     if ($self->{next_input_character} == 0x003E and # >
462     $self->{current_token}->{type} eq 'start tag' and
463     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
464     # permitted slash
465     #
466     } else {
467 wakaba 1.3 !!!parse-error (type => 'nestc');
468 wakaba 1.1 }
469     $self->{state} = 'before attribute name';
470     # next-input-character is already done
471     redo A;
472     } else {
473     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
474     # start tag or end tag
475     ## Stay in the state
476     !!!next-input-character;
477     redo A;
478     }
479     } elsif ($self->{state} eq 'before attribute name') {
480     if ($self->{next_input_character} == 0x0009 or # HT
481     $self->{next_input_character} == 0x000A or # LF
482     $self->{next_input_character} == 0x000B or # VT
483     $self->{next_input_character} == 0x000C or # FF
484     $self->{next_input_character} == 0x0020) { # SP
485     ## Stay in the state
486     !!!next-input-character;
487     redo A;
488     } elsif ($self->{next_input_character} == 0x003E) { # >
489     if ($self->{current_token}->{type} eq 'start tag') {
490     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
491     } elsif ($self->{current_token}->{type} eq 'end tag') {
492     $self->{content_model_flag} = 'PCDATA'; # MUST
493     if ($self->{current_token}->{attributes}) {
494 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
495 wakaba 1.1 }
496     } else {
497     die "$0: $self->{current_token}->{type}: Unknown token type";
498     }
499     $self->{state} = 'data';
500     !!!next-input-character;
501    
502     !!!emit ($self->{current_token}); # start tag or end tag
503     undef $self->{current_token};
504    
505     redo A;
506     } elsif (0x0041 <= $self->{next_input_character} and
507     $self->{next_input_character} <= 0x005A) { # A..Z
508     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
509     value => ''};
510     $self->{state} = 'attribute name';
511     !!!next-input-character;
512     redo A;
513     } elsif ($self->{next_input_character} == 0x002F) { # /
514     !!!next-input-character;
515     if ($self->{next_input_character} == 0x003E and # >
516     $self->{current_token}->{type} eq 'start tag' and
517     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
518     # permitted slash
519     #
520     } else {
521 wakaba 1.3 !!!parse-error (type => 'nestc');
522 wakaba 1.1 }
523     ## Stay in the state
524     # next-input-character is already done
525     redo A;
526 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
527 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
528 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
529     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
530     } elsif ($self->{current_token}->{type} eq 'end tag') {
531     $self->{content_model_flag} = 'PCDATA'; # MUST
532     if ($self->{current_token}->{attributes}) {
533 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
534 wakaba 1.1 }
535     } else {
536     die "$0: $self->{current_token}->{type}: Unknown token type";
537     }
538     $self->{state} = 'data';
539     # reconsume
540    
541     !!!emit ($self->{current_token}); # start tag or end tag
542     undef $self->{current_token};
543    
544     redo A;
545     } else {
546     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
547     value => ''};
548     $self->{state} = 'attribute name';
549     !!!next-input-character;
550     redo A;
551     }
552     } elsif ($self->{state} eq 'attribute name') {
553     my $before_leave = sub {
554     if (exists $self->{current_token}->{attributes} # start tag or end tag
555     ->{$self->{current_attribute}->{name}}) { # MUST
556 wakaba 1.3 !!!parse-error (type => 'dupulicate attribute');
557 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
558     } else {
559     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
560     = $self->{current_attribute};
561     }
562     }; # $before_leave
563    
564     if ($self->{next_input_character} == 0x0009 or # HT
565     $self->{next_input_character} == 0x000A or # LF
566     $self->{next_input_character} == 0x000B or # VT
567     $self->{next_input_character} == 0x000C or # FF
568     $self->{next_input_character} == 0x0020) { # SP
569     $before_leave->();
570     $self->{state} = 'after attribute name';
571     !!!next-input-character;
572     redo A;
573     } elsif ($self->{next_input_character} == 0x003D) { # =
574     $before_leave->();
575     $self->{state} = 'before attribute value';
576     !!!next-input-character;
577     redo A;
578     } elsif ($self->{next_input_character} == 0x003E) { # >
579     $before_leave->();
580     if ($self->{current_token}->{type} eq 'start tag') {
581     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
582     } elsif ($self->{current_token}->{type} eq 'end tag') {
583     $self->{content_model_flag} = 'PCDATA'; # MUST
584     if ($self->{current_token}->{attributes}) {
585 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
586 wakaba 1.1 }
587     } else {
588     die "$0: $self->{current_token}->{type}: Unknown token type";
589     }
590     $self->{state} = 'data';
591     !!!next-input-character;
592    
593     !!!emit ($self->{current_token}); # start tag or end tag
594     undef $self->{current_token};
595    
596     redo A;
597     } elsif (0x0041 <= $self->{next_input_character} and
598     $self->{next_input_character} <= 0x005A) { # A..Z
599     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
600     ## Stay in the state
601     !!!next-input-character;
602     redo A;
603     } elsif ($self->{next_input_character} == 0x002F) { # /
604     $before_leave->();
605     !!!next-input-character;
606     if ($self->{next_input_character} == 0x003E and # >
607     $self->{current_token}->{type} eq 'start tag' and
608     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
609     # permitted slash
610     #
611     } else {
612 wakaba 1.3 !!!parse-error (type => 'nestc');
613 wakaba 1.1 }
614     $self->{state} = 'before attribute name';
615     # next-input-character is already done
616     redo A;
617 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
618 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
619 wakaba 1.1 $before_leave->();
620     if ($self->{current_token}->{type} eq 'start tag') {
621     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
622     } elsif ($self->{current_token}->{type} eq 'end tag') {
623     $self->{content_model_flag} = 'PCDATA'; # MUST
624     if ($self->{current_token}->{attributes}) {
625 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
626 wakaba 1.1 }
627     } else {
628     die "$0: $self->{current_token}->{type}: Unknown token type";
629     }
630     $self->{state} = 'data';
631     # reconsume
632    
633     !!!emit ($self->{current_token}); # start tag or end tag
634     undef $self->{current_token};
635    
636     redo A;
637     } else {
638     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
639     ## Stay in the state
640     !!!next-input-character;
641     redo A;
642     }
643     } elsif ($self->{state} eq 'after attribute name') {
644     if ($self->{next_input_character} == 0x0009 or # HT
645     $self->{next_input_character} == 0x000A or # LF
646     $self->{next_input_character} == 0x000B or # VT
647     $self->{next_input_character} == 0x000C or # FF
648     $self->{next_input_character} == 0x0020) { # SP
649     ## Stay in the state
650     !!!next-input-character;
651     redo A;
652     } elsif ($self->{next_input_character} == 0x003D) { # =
653     $self->{state} = 'before attribute value';
654     !!!next-input-character;
655     redo A;
656     } elsif ($self->{next_input_character} == 0x003E) { # >
657     if ($self->{current_token}->{type} eq 'start tag') {
658     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
659     } elsif ($self->{current_token}->{type} eq 'end tag') {
660     $self->{content_model_flag} = 'PCDATA'; # MUST
661     if ($self->{current_token}->{attributes}) {
662 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
663 wakaba 1.1 }
664     } else {
665     die "$0: $self->{current_token}->{type}: Unknown token type";
666     }
667     $self->{state} = 'data';
668     !!!next-input-character;
669    
670     !!!emit ($self->{current_token}); # start tag or end tag
671     undef $self->{current_token};
672    
673     redo A;
674     } elsif (0x0041 <= $self->{next_input_character} and
675     $self->{next_input_character} <= 0x005A) { # A..Z
676     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
677     value => ''};
678     $self->{state} = 'attribute name';
679     !!!next-input-character;
680     redo A;
681     } elsif ($self->{next_input_character} == 0x002F) { # /
682     !!!next-input-character;
683     if ($self->{next_input_character} == 0x003E and # >
684     $self->{current_token}->{type} eq 'start tag' and
685     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
686     # permitted slash
687     #
688     } else {
689 wakaba 1.3 !!!parse-error (type => 'nestc');
690 wakaba 1.1 }
691     $self->{state} = 'before attribute name';
692     # next-input-character is already done
693     redo A;
694 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
695 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
696 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
697     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
698     } elsif ($self->{current_token}->{type} eq 'end tag') {
699     $self->{content_model_flag} = 'PCDATA'; # MUST
700     if ($self->{current_token}->{attributes}) {
701 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
702 wakaba 1.1 }
703     } else {
704     die "$0: $self->{current_token}->{type}: Unknown token type";
705     }
706     $self->{state} = 'data';
707     # reconsume
708    
709     !!!emit ($self->{current_token}); # start tag or end tag
710     undef $self->{current_token};
711    
712     redo A;
713     } else {
714     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
715     value => ''};
716     $self->{state} = 'attribute name';
717     !!!next-input-character;
718     redo A;
719     }
720     } elsif ($self->{state} eq 'before attribute value') {
721     if ($self->{next_input_character} == 0x0009 or # HT
722     $self->{next_input_character} == 0x000A or # LF
723     $self->{next_input_character} == 0x000B or # VT
724     $self->{next_input_character} == 0x000C or # FF
725     $self->{next_input_character} == 0x0020) { # SP
726     ## Stay in the state
727     !!!next-input-character;
728     redo A;
729     } elsif ($self->{next_input_character} == 0x0022) { # "
730     $self->{state} = 'attribute value (double-quoted)';
731     !!!next-input-character;
732     redo A;
733     } elsif ($self->{next_input_character} == 0x0026) { # &
734     $self->{state} = 'attribute value (unquoted)';
735     ## reconsume
736     redo A;
737     } elsif ($self->{next_input_character} == 0x0027) { # '
738     $self->{state} = 'attribute value (single-quoted)';
739     !!!next-input-character;
740     redo A;
741     } elsif ($self->{next_input_character} == 0x003E) { # >
742     if ($self->{current_token}->{type} eq 'start tag') {
743     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
744     } elsif ($self->{current_token}->{type} eq 'end tag') {
745     $self->{content_model_flag} = 'PCDATA'; # MUST
746     if ($self->{current_token}->{attributes}) {
747 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
748 wakaba 1.1 }
749     } else {
750     die "$0: $self->{current_token}->{type}: Unknown token type";
751     }
752     $self->{state} = 'data';
753     !!!next-input-character;
754    
755     !!!emit ($self->{current_token}); # start tag or end tag
756     undef $self->{current_token};
757    
758     redo A;
759 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
760 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
761 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
762     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
763     } elsif ($self->{current_token}->{type} eq 'end tag') {
764     $self->{content_model_flag} = 'PCDATA'; # MUST
765     if ($self->{current_token}->{attributes}) {
766 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
767 wakaba 1.1 }
768     } else {
769     die "$0: $self->{current_token}->{type}: Unknown token type";
770     }
771     $self->{state} = 'data';
772     ## reconsume
773    
774     !!!emit ($self->{current_token}); # start tag or end tag
775     undef $self->{current_token};
776    
777     redo A;
778     } else {
779     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
780     $self->{state} = 'attribute value (unquoted)';
781     !!!next-input-character;
782     redo A;
783     }
784     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
785     if ($self->{next_input_character} == 0x0022) { # "
786     $self->{state} = 'before attribute name';
787     !!!next-input-character;
788     redo A;
789     } elsif ($self->{next_input_character} == 0x0026) { # &
790     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
791     $self->{state} = 'entity in attribute value';
792     !!!next-input-character;
793     redo A;
794     } elsif ($self->{next_input_character} == -1) {
795 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
796 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
797     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
798     } elsif ($self->{current_token}->{type} eq 'end tag') {
799     $self->{content_model_flag} = 'PCDATA'; # MUST
800     if ($self->{current_token}->{attributes}) {
801 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
802 wakaba 1.1 }
803     } else {
804     die "$0: $self->{current_token}->{type}: Unknown token type";
805     }
806     $self->{state} = 'data';
807     ## reconsume
808    
809     !!!emit ($self->{current_token}); # start tag or end tag
810     undef $self->{current_token};
811    
812     redo A;
813     } else {
814     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
815     ## Stay in the state
816     !!!next-input-character;
817     redo A;
818     }
819     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
820     if ($self->{next_input_character} == 0x0027) { # '
821     $self->{state} = 'before attribute name';
822     !!!next-input-character;
823     redo A;
824     } elsif ($self->{next_input_character} == 0x0026) { # &
825     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
826     $self->{state} = 'entity in attribute value';
827     !!!next-input-character;
828     redo A;
829     } elsif ($self->{next_input_character} == -1) {
830 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
831 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
832     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
833     } elsif ($self->{current_token}->{type} eq 'end tag') {
834     $self->{content_model_flag} = 'PCDATA'; # MUST
835     if ($self->{current_token}->{attributes}) {
836 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
837 wakaba 1.1 }
838     } else {
839     die "$0: $self->{current_token}->{type}: Unknown token type";
840     }
841     $self->{state} = 'data';
842     ## reconsume
843    
844     !!!emit ($self->{current_token}); # start tag or end tag
845     undef $self->{current_token};
846    
847     redo A;
848     } else {
849     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
850     ## Stay in the state
851     !!!next-input-character;
852     redo A;
853     }
854     } elsif ($self->{state} eq 'attribute value (unquoted)') {
855     if ($self->{next_input_character} == 0x0009 or # HT
856     $self->{next_input_character} == 0x000A or # LF
857     $self->{next_input_character} == 0x000B or # HT
858     $self->{next_input_character} == 0x000C or # FF
859     $self->{next_input_character} == 0x0020) { # SP
860     $self->{state} = 'before attribute name';
861     !!!next-input-character;
862     redo A;
863     } elsif ($self->{next_input_character} == 0x0026) { # &
864     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
865     $self->{state} = 'entity in attribute value';
866     !!!next-input-character;
867     redo A;
868     } elsif ($self->{next_input_character} == 0x003E) { # >
869     if ($self->{current_token}->{type} eq 'start tag') {
870     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
871     } elsif ($self->{current_token}->{type} eq 'end tag') {
872     $self->{content_model_flag} = 'PCDATA'; # MUST
873     if ($self->{current_token}->{attributes}) {
874 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
875 wakaba 1.1 }
876     } else {
877     die "$0: $self->{current_token}->{type}: Unknown token type";
878     }
879     $self->{state} = 'data';
880     !!!next-input-character;
881    
882     !!!emit ($self->{current_token}); # start tag or end tag
883     undef $self->{current_token};
884    
885     redo A;
886 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
887 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
888 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
889     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
890     } elsif ($self->{current_token}->{type} eq 'end tag') {
891     $self->{content_model_flag} = 'PCDATA'; # MUST
892     if ($self->{current_token}->{attributes}) {
893 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
894 wakaba 1.1 }
895     } else {
896     die "$0: $self->{current_token}->{type}: Unknown token type";
897     }
898     $self->{state} = 'data';
899     ## reconsume
900    
901     !!!emit ($self->{current_token}); # start tag or end tag
902     undef $self->{current_token};
903    
904     redo A;
905     } else {
906     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
907     ## Stay in the state
908     !!!next-input-character;
909     redo A;
910     }
911     } elsif ($self->{state} eq 'entity in attribute value') {
912     my $token = $self->_tokenize_attempt_to_consume_an_entity;
913    
914     unless (defined $token) {
915     $self->{current_attribute}->{value} .= '&';
916     } else {
917     $self->{current_attribute}->{value} .= $token->{data};
918     ## ISSUE: spec says "append the returned character token to the current attribute's value"
919     }
920    
921     $self->{state} = $self->{last_attribute_value_state};
922     # next-input-character is already done
923     redo A;
924     } elsif ($self->{state} eq 'bogus comment') {
925     ## (only happen if PCDATA state)
926    
927     my $token = {type => 'comment', data => ''};
928    
929     BC: {
930     if ($self->{next_input_character} == 0x003E) { # >
931     $self->{state} = 'data';
932     !!!next-input-character;
933    
934     !!!emit ($token);
935    
936     redo A;
937     } elsif ($self->{next_input_character} == -1) {
938     $self->{state} = 'data';
939     ## reconsume
940    
941     !!!emit ($token);
942    
943     redo A;
944     } else {
945     $token->{data} .= chr ($self->{next_input_character});
946     !!!next-input-character;
947     redo BC;
948     }
949     } # BC
950     } elsif ($self->{state} eq 'markup declaration open') {
951     ## (only happen if PCDATA state)
952    
953     my @next_char;
954     push @next_char, $self->{next_input_character};
955    
956     if ($self->{next_input_character} == 0x002D) { # -
957     !!!next-input-character;
958     push @next_char, $self->{next_input_character};
959     if ($self->{next_input_character} == 0x002D) { # -
960     $self->{current_token} = {type => 'comment', data => ''};
961     $self->{state} = 'comment';
962     !!!next-input-character;
963     redo A;
964     }
965     } elsif ($self->{next_input_character} == 0x0044 or # D
966     $self->{next_input_character} == 0x0064) { # d
967     !!!next-input-character;
968     push @next_char, $self->{next_input_character};
969     if ($self->{next_input_character} == 0x004F or # O
970     $self->{next_input_character} == 0x006F) { # o
971     !!!next-input-character;
972     push @next_char, $self->{next_input_character};
973     if ($self->{next_input_character} == 0x0043 or # C
974     $self->{next_input_character} == 0x0063) { # c
975     !!!next-input-character;
976     push @next_char, $self->{next_input_character};
977     if ($self->{next_input_character} == 0x0054 or # T
978     $self->{next_input_character} == 0x0074) { # t
979     !!!next-input-character;
980     push @next_char, $self->{next_input_character};
981     if ($self->{next_input_character} == 0x0059 or # Y
982     $self->{next_input_character} == 0x0079) { # y
983     !!!next-input-character;
984     push @next_char, $self->{next_input_character};
985     if ($self->{next_input_character} == 0x0050 or # P
986     $self->{next_input_character} == 0x0070) { # p
987     !!!next-input-character;
988     push @next_char, $self->{next_input_character};
989     if ($self->{next_input_character} == 0x0045 or # E
990     $self->{next_input_character} == 0x0065) { # e
991     ## ISSUE: What a stupid code this is!
992     $self->{state} = 'DOCTYPE';
993     !!!next-input-character;
994     redo A;
995     }
996     }
997     }
998     }
999     }
1000     }
1001     }
1002    
1003 wakaba 1.3 !!!parse-error (type => 'bogus comment open');
1004 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1005     !!!back-next-input-character (@next_char);
1006     $self->{state} = 'bogus comment';
1007     redo A;
1008    
1009     ## ISSUE: typos in spec: chacacters, is is a parse error
1010     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1011     } elsif ($self->{state} eq 'comment') {
1012     if ($self->{next_input_character} == 0x002D) { # -
1013     $self->{state} = 'comment dash';
1014     !!!next-input-character;
1015     redo A;
1016     } elsif ($self->{next_input_character} == -1) {
1017 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1018 wakaba 1.1 $self->{state} = 'data';
1019     ## reconsume
1020    
1021     !!!emit ($self->{current_token}); # comment
1022     undef $self->{current_token};
1023    
1024     redo A;
1025     } else {
1026     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1027     ## Stay in the state
1028     !!!next-input-character;
1029     redo A;
1030     }
1031     } elsif ($self->{state} eq 'comment dash') {
1032     if ($self->{next_input_character} == 0x002D) { # -
1033     $self->{state} = 'comment end';
1034     !!!next-input-character;
1035     redo A;
1036     } elsif ($self->{next_input_character} == -1) {
1037 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1038 wakaba 1.1 $self->{state} = 'data';
1039     ## reconsume
1040    
1041     !!!emit ($self->{current_token}); # comment
1042     undef $self->{current_token};
1043    
1044     redo A;
1045     } else {
1046     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1047     $self->{state} = 'comment';
1048     !!!next-input-character;
1049     redo A;
1050     }
1051     } elsif ($self->{state} eq 'comment end') {
1052     if ($self->{next_input_character} == 0x003E) { # >
1053     $self->{state} = 'data';
1054     !!!next-input-character;
1055    
1056     !!!emit ($self->{current_token}); # comment
1057     undef $self->{current_token};
1058    
1059     redo A;
1060     } elsif ($self->{next_input_character} == 0x002D) { # -
1061 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1062 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1063     ## Stay in the state
1064     !!!next-input-character;
1065     redo A;
1066     } elsif ($self->{next_input_character} == -1) {
1067 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1068 wakaba 1.1 $self->{state} = 'data';
1069     ## reconsume
1070    
1071     !!!emit ($self->{current_token}); # comment
1072     undef $self->{current_token};
1073    
1074     redo A;
1075     } else {
1076 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1077 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1078     $self->{state} = 'comment';
1079     !!!next-input-character;
1080     redo A;
1081     }
1082     } elsif ($self->{state} eq 'DOCTYPE') {
1083     if ($self->{next_input_character} == 0x0009 or # HT
1084     $self->{next_input_character} == 0x000A or # LF
1085     $self->{next_input_character} == 0x000B or # VT
1086     $self->{next_input_character} == 0x000C or # FF
1087     $self->{next_input_character} == 0x0020) { # SP
1088     $self->{state} = 'before DOCTYPE name';
1089     !!!next-input-character;
1090     redo A;
1091     } else {
1092 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1093 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1094     ## reconsume
1095     redo A;
1096     }
1097     } elsif ($self->{state} eq 'before DOCTYPE name') {
1098     if ($self->{next_input_character} == 0x0009 or # HT
1099     $self->{next_input_character} == 0x000A or # LF
1100     $self->{next_input_character} == 0x000B or # VT
1101     $self->{next_input_character} == 0x000C or # FF
1102     $self->{next_input_character} == 0x0020) { # SP
1103     ## Stay in the state
1104     !!!next-input-character;
1105     redo A;
1106     } elsif (0x0061 <= $self->{next_input_character} and
1107     $self->{next_input_character} <= 0x007A) { # a..z
1108 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1109 wakaba 1.1 $self->{current_token} = {type => 'DOCTYPE',
1110     name => chr ($self->{next_input_character} - 0x0020),
1111     error => 1};
1112     $self->{state} = 'DOCTYPE name';
1113     !!!next-input-character;
1114     redo A;
1115     } elsif ($self->{next_input_character} == 0x003E) { # >
1116 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1117 wakaba 1.1 $self->{state} = 'data';
1118     !!!next-input-character;
1119    
1120     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1121    
1122     redo A;
1123     } elsif ($self->{next_input_character} == -1) {
1124 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1125 wakaba 1.1 $self->{state} = 'data';
1126     ## reconsume
1127    
1128     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1129    
1130     redo A;
1131     } else {
1132     $self->{current_token} = {type => 'DOCTYPE',
1133     name => chr ($self->{next_input_character}),
1134     error => 1};
1135 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1136 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1137     !!!next-input-character;
1138     redo A;
1139     }
1140     } elsif ($self->{state} eq 'DOCTYPE name') {
1141     if ($self->{next_input_character} == 0x0009 or # HT
1142     $self->{next_input_character} == 0x000A or # LF
1143     $self->{next_input_character} == 0x000B or # VT
1144     $self->{next_input_character} == 0x000C or # FF
1145     $self->{next_input_character} == 0x0020) { # SP
1146     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1147     $self->{state} = 'after DOCTYPE name';
1148     !!!next-input-character;
1149     redo A;
1150     } elsif ($self->{next_input_character} == 0x003E) { # >
1151     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1152     $self->{state} = 'data';
1153     !!!next-input-character;
1154    
1155     !!!emit ($self->{current_token}); # DOCTYPE
1156     undef $self->{current_token};
1157    
1158     redo A;
1159     } elsif (0x0061 <= $self->{next_input_character} and
1160     $self->{next_input_character} <= 0x007A) { # a..z
1161     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1162     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1163     ## Stay in the state
1164     !!!next-input-character;
1165     redo A;
1166     } elsif ($self->{next_input_character} == -1) {
1167 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1168 wakaba 1.1 $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1169     $self->{state} = 'data';
1170     ## reconsume
1171    
1172     !!!emit ($self->{current_token});
1173     undef $self->{current_token};
1174    
1175     redo A;
1176     } else {
1177     $self->{current_token}->{name}
1178     .= chr ($self->{next_input_character}); # DOCTYPE
1179     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1180     ## Stay in the state
1181     !!!next-input-character;
1182     redo A;
1183     }
1184     } elsif ($self->{state} eq 'after DOCTYPE name') {
1185     if ($self->{next_input_character} == 0x0009 or # HT
1186     $self->{next_input_character} == 0x000A or # LF
1187     $self->{next_input_character} == 0x000B or # VT
1188     $self->{next_input_character} == 0x000C or # FF
1189     $self->{next_input_character} == 0x0020) { # SP
1190     ## Stay in the state
1191     !!!next-input-character;
1192     redo A;
1193     } elsif ($self->{next_input_character} == 0x003E) { # >
1194     $self->{state} = 'data';
1195     !!!next-input-character;
1196    
1197     !!!emit ($self->{current_token}); # DOCTYPE
1198     undef $self->{current_token};
1199    
1200     redo A;
1201     } elsif ($self->{next_input_character} == -1) {
1202 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1203 wakaba 1.1 $self->{state} = 'data';
1204     ## reconsume
1205    
1206     !!!emit ($self->{current_token}); # DOCTYPE
1207     undef $self->{current_token};
1208    
1209     redo A;
1210     } else {
1211 wakaba 1.3 !!!parse-error (type => 'string after DOCTYPE name');
1212 wakaba 1.1 $self->{current_token}->{error} = 1; # DOCTYPE
1213     $self->{state} = 'bogus DOCTYPE';
1214     !!!next-input-character;
1215     redo A;
1216     }
1217     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1218     if ($self->{next_input_character} == 0x003E) { # >
1219     $self->{state} = 'data';
1220     !!!next-input-character;
1221    
1222     !!!emit ($self->{current_token}); # DOCTYPE
1223     undef $self->{current_token};
1224    
1225     redo A;
1226     } elsif ($self->{next_input_character} == -1) {
1227 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1228 wakaba 1.1 $self->{state} = 'data';
1229     ## reconsume
1230    
1231     !!!emit ($self->{current_token}); # DOCTYPE
1232     undef $self->{current_token};
1233    
1234     redo A;
1235     } else {
1236     ## Stay in the state
1237     !!!next-input-character;
1238     redo A;
1239     }
1240     } else {
1241     die "$0: $self->{state}: Unknown state";
1242     }
1243     } # A
1244    
1245     die "$0: _get_next_token: unexpected case";
1246     } # _get_next_token
1247    
1248     sub _tokenize_attempt_to_consume_an_entity ($) {
1249     my $self = shift;
1250    
1251     if ($self->{next_input_character} == 0x0023) { # #
1252     !!!next-input-character;
1253     if ($self->{next_input_character} == 0x0078 or # x
1254     $self->{next_input_character} == 0x0058) { # X
1255 wakaba 1.4 my $num;
1256 wakaba 1.1 X: {
1257     my $x_char = $self->{next_input_character};
1258     !!!next-input-character;
1259     if (0x0030 <= $self->{next_input_character} and
1260     $self->{next_input_character} <= 0x0039) { # 0..9
1261     $num ||= 0;
1262     $num *= 0x10;
1263     $num += $self->{next_input_character} - 0x0030;
1264     redo X;
1265     } elsif (0x0061 <= $self->{next_input_character} and
1266     $self->{next_input_character} <= 0x0066) { # a..f
1267     ## ISSUE: the spec says U+0078, which is apparently incorrect
1268     $num ||= 0;
1269     $num *= 0x10;
1270     $num += $self->{next_input_character} - 0x0060 + 9;
1271     redo X;
1272     } elsif (0x0041 <= $self->{next_input_character} and
1273     $self->{next_input_character} <= 0x0046) { # A..F
1274     ## ISSUE: the spec says U+0058, which is apparently incorrect
1275     $num ||= 0;
1276     $num *= 0x10;
1277     $num += $self->{next_input_character} - 0x0040 + 9;
1278     redo X;
1279     } elsif (not defined $num) { # no hexadecimal digit
1280 wakaba 1.3 !!!parse-error (type => 'bare hcro');
1281 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
1282     !!!back-next-input-character ($x_char);
1283     return undef;
1284     } elsif ($self->{next_input_character} == 0x003B) { # ;
1285     !!!next-input-character;
1286     } else {
1287 wakaba 1.3 !!!parse-error (type => 'no refc');
1288 wakaba 1.1 }
1289    
1290     ## TODO: check the definition for |a valid Unicode character|.
1291 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
1292 wakaba 1.1 if ($num > 1114111 or $num == 0) {
1293     $num = 0xFFFD; # REPLACEMENT CHARACTER
1294     ## ISSUE: Why this is not an error?
1295 wakaba 1.4 } elsif (0x80 <= $num and $num <= 0x9F) {
1296 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $num);
1297 wakaba 1.4 $num = $c1_entity_char->{$num};
1298 wakaba 1.1 }
1299    
1300     return {type => 'character', data => chr $num};
1301     } # X
1302     } elsif (0x0030 <= $self->{next_input_character} and
1303     $self->{next_input_character} <= 0x0039) { # 0..9
1304     my $code = $self->{next_input_character} - 0x0030;
1305     !!!next-input-character;
1306    
1307     while (0x0030 <= $self->{next_input_character} and
1308     $self->{next_input_character} <= 0x0039) { # 0..9
1309     $code *= 10;
1310     $code += $self->{next_input_character} - 0x0030;
1311    
1312     !!!next-input-character;
1313     }
1314    
1315     if ($self->{next_input_character} == 0x003B) { # ;
1316     !!!next-input-character;
1317     } else {
1318 wakaba 1.3 !!!parse-error (type => 'no refc');
1319 wakaba 1.1 }
1320    
1321     ## TODO: check the definition for |a valid Unicode character|.
1322     if ($code > 1114111 or $code == 0) {
1323     $code = 0xFFFD; # REPLACEMENT CHARACTER
1324     ## ISSUE: Why this is not an error?
1325 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
1326 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
1327 wakaba 1.4 $code = $c1_entity_char->{$code};
1328 wakaba 1.1 }
1329    
1330     return {type => 'character', data => chr $code};
1331     } else {
1332 wakaba 1.3 !!!parse-error (type => 'bare nero');
1333 wakaba 1.1 !!!back-next-input-character ($self->{next_input_character});
1334     $self->{next_input_character} = 0x0023; # #
1335     return undef;
1336     }
1337     } elsif ((0x0041 <= $self->{next_input_character} and
1338     $self->{next_input_character} <= 0x005A) or
1339     (0x0061 <= $self->{next_input_character} and
1340     $self->{next_input_character} <= 0x007A)) {
1341     my $entity_name = chr $self->{next_input_character};
1342     !!!next-input-character;
1343    
1344     my $value = $entity_name;
1345     my $match;
1346 wakaba 1.16 require Whatpm::_NamedEntityList;
1347     our $EntityChar;
1348 wakaba 1.1
1349     while (length $entity_name < 10 and
1350     ## NOTE: Some number greater than the maximum length of entity name
1351 wakaba 1.16 ((0x0041 <= $self->{next_input_character} and # a
1352     $self->{next_input_character} <= 0x005A) or # x
1353     (0x0061 <= $self->{next_input_character} and # a
1354     $self->{next_input_character} <= 0x007A) or # z
1355     (0x0030 <= $self->{next_input_character} and # 0
1356     $self->{next_input_character} <= 0x0039) or # 9
1357     $self->{next_input_character} == 0x003B)) { # ;
1358 wakaba 1.1 $entity_name .= chr $self->{next_input_character};
1359 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
1360     $value = $EntityChar->{$entity_name};
1361     if ($self->{next_input_character} == 0x003B) { # ;
1362     $match = 1;
1363     !!!next-input-character;
1364     last;
1365     } else {
1366     $match = -1;
1367     }
1368 wakaba 1.1 } else {
1369     $value .= chr $self->{next_input_character};
1370     }
1371     !!!next-input-character;
1372     }
1373    
1374 wakaba 1.16 if ($match > 0) {
1375     return {type => 'character', data => $value};
1376     } elsif ($match < 0) {
1377     !!!parse-error (type => 'refc');
1378 wakaba 1.1 return {type => 'character', data => $value};
1379     } else {
1380 wakaba 1.3 !!!parse-error (type => 'bare ero');
1381 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
1382     !!!back-token ({type => 'character', data => $value});
1383     return undef;
1384     }
1385     } else {
1386     ## no characters are consumed
1387 wakaba 1.3 !!!parse-error (type => 'bare ero');
1388 wakaba 1.1 return undef;
1389     }
1390     } # _tokenize_attempt_to_consume_an_entity
1391    
1392     sub _initialize_tree_constructor ($) {
1393     my $self = shift;
1394     ## NOTE: $self->{document} MUST be specified before this method is called
1395     $self->{document}->strict_error_checking (0);
1396     ## TODO: Turn mutation events off # MUST
1397     ## TODO: Turn loose Document option (manakai extension) on
1398     ## TODO: Mark the Document as an HTML document # MUST
1399     } # _initialize_tree_constructor
1400    
1401     sub _terminate_tree_constructor ($) {
1402     my $self = shift;
1403     $self->{document}->strict_error_checking (1);
1404     ## TODO: Turn mutation events on
1405     } # _terminate_tree_constructor
1406    
1407     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1408    
1409 wakaba 1.3 { # tree construction stage
1410     my $token;
1411    
1412 wakaba 1.1 sub _construct_tree ($) {
1413     my ($self) = @_;
1414    
1415     ## When an interactive UA render the $self->{document} available
1416     ## to the user, or when it begin accepting user input, are
1417     ## not defined.
1418    
1419     ## Append a character: collect it and all subsequent consecutive
1420     ## characters and insert one Text node whose data is concatenation
1421     ## of all those characters. # MUST
1422    
1423     !!!next-token;
1424    
1425 wakaba 1.3 $self->{insertion_mode} = 'before head';
1426     undef $self->{form_element};
1427     undef $self->{head_element};
1428     $self->{open_elements} = [];
1429     undef $self->{inner_html_node};
1430    
1431     $self->_tree_construction_initial; # MUST
1432     $self->_tree_construction_root_element;
1433     $self->_tree_construction_main;
1434     } # _construct_tree
1435    
1436     sub _tree_construction_initial ($) {
1437     my $self = shift;
1438     B: {
1439     if ($token->{type} eq 'DOCTYPE') {
1440     if ($token->{error}) {
1441     ## ISSUE: Spec currently left this case undefined.
1442     !!!parse-error (type => 'bogus DOCTYPE');
1443     }
1444     my $doctype = $self->{document}->create_document_type_definition
1445     ($token->{name});
1446     $self->{document}->append_child ($doctype);
1447     #$phase = 'root element';
1448     !!!next-token;
1449     #redo B;
1450     return;
1451     } elsif ({
1452     comment => 1,
1453     'start tag' => 1,
1454     'end tag' => 1,
1455     'end-of-file' => 1,
1456     }->{$token->{type}}) {
1457     ## ISSUE: Spec currently left this case undefined.
1458     !!!parse-error (type => 'missing DOCTYPE');
1459     #$phase = 'root element';
1460     ## reprocess
1461     #redo B;
1462     return;
1463     } elsif ($token->{type} eq 'character') {
1464     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1465     $self->{document}->manakai_append_text ($1);
1466     ## ISSUE: DOM3 Core does not allow Document > Text
1467     unless (length $token->{data}) {
1468     ## Stay in the phase
1469     !!!next-token;
1470     redo B;
1471     }
1472     }
1473     ## ISSUE: Spec currently left this case undefined.
1474     !!!parse-error (type => 'missing DOCTYPE');
1475     #$phase = 'root element';
1476     ## reprocess
1477     #redo B;
1478     return;
1479     } else {
1480     die "$0: $token->{type}: Unknown token";
1481     }
1482     } # B
1483     } # _tree_construction_initial
1484    
1485     sub _tree_construction_root_element ($) {
1486     my $self = shift;
1487    
1488     B: {
1489     if ($token->{type} eq 'DOCTYPE') {
1490     !!!parse-error (type => 'in html:#DOCTYPE');
1491     ## Ignore the token
1492     ## Stay in the phase
1493     !!!next-token;
1494     redo B;
1495     } elsif ($token->{type} eq 'comment') {
1496     my $comment = $self->{document}->create_comment ($token->{data});
1497     $self->{document}->append_child ($comment);
1498     ## Stay in the phase
1499     !!!next-token;
1500     redo B;
1501     } elsif ($token->{type} eq 'character') {
1502     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1503     $self->{document}->manakai_append_text ($1);
1504     ## ISSUE: DOM3 Core does not allow Document > Text
1505     unless (length $token->{data}) {
1506     ## Stay in the phase
1507     !!!next-token;
1508     redo B;
1509     }
1510     }
1511     #
1512     } elsif ({
1513     'start tag' => 1,
1514     'end tag' => 1,
1515     'end-of-file' => 1,
1516     }->{$token->{type}}) {
1517     ## ISSUE: There is an issue in the spec
1518     #
1519     } else {
1520     die "$0: $token->{type}: Unknown token";
1521     }
1522     my $root_element; !!!create-element ($root_element, 'html');
1523     $self->{document}->append_child ($root_element);
1524     push @{$self->{open_elements}}, [$root_element, 'html'];
1525     #$phase = 'main';
1526     ## reprocess
1527     #redo B;
1528     return;
1529     } # B
1530     } # _tree_construction_root_element
1531    
1532     sub _reset_insertion_mode ($) {
1533     my $self = shift;
1534    
1535     ## Step 1
1536     my $last;
1537    
1538     ## Step 2
1539     my $i = -1;
1540     my $node = $self->{open_elements}->[$i];
1541    
1542     ## Step 3
1543     S3: {
1544     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
1545     if (defined $self->{inner_html_node}) {
1546     if ($self->{inner_html_node}->[1] eq 'td' or
1547     $self->{inner_html_node}->[1] eq 'th') {
1548     #
1549     } else {
1550     $node = $self->{inner_html_node};
1551     }
1552     }
1553    
1554     ## Step 4..13
1555     my $new_mode = {
1556     select => 'in select',
1557     td => 'in cell',
1558     th => 'in cell',
1559     tr => 'in row',
1560     tbody => 'in table body',
1561     thead => 'in table head',
1562     tfoot => 'in table foot',
1563     caption => 'in caption',
1564     colgroup => 'in column group',
1565     table => 'in table',
1566     head => 'in body', # not in head!
1567     body => 'in body',
1568     frameset => 'in frameset',
1569     }->{$node->[1]};
1570     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1571    
1572     ## Step 14
1573     if ($node->[1] eq 'html') {
1574     unless (defined $self->{head_element}) {
1575     $self->{insertion_mode} = 'before head';
1576     } else {
1577     $self->{insertion_mode} = 'after head';
1578     }
1579     return;
1580     }
1581    
1582     ## Step 15
1583     $self->{insertion_mode} = 'in body' and return if $last;
1584    
1585     ## Step 16
1586     $i--;
1587     $node = $self->{open_elements}->[$i];
1588    
1589     ## Step 17
1590     redo S3;
1591     } # S3
1592     } # _reset_insertion_mode
1593    
1594     sub _tree_construction_main ($) {
1595     my $self = shift;
1596    
1597     my $phase = 'main';
1598 wakaba 1.1
1599     my $active_formatting_elements = [];
1600    
1601     my $reconstruct_active_formatting_elements = sub { # MUST
1602     my $insert = shift;
1603    
1604     ## Step 1
1605     return unless @$active_formatting_elements;
1606    
1607     ## Step 3
1608     my $i = -1;
1609     my $entry = $active_formatting_elements->[$i];
1610    
1611     ## Step 2
1612     return if $entry->[0] eq '#marker';
1613 wakaba 1.3 for (@{$self->{open_elements}}) {
1614 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1615     return;
1616     }
1617     }
1618    
1619     S4: {
1620     ## Step 4
1621     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1622    
1623     ## Step 5
1624     $i--;
1625     $entry = $active_formatting_elements->[$i];
1626    
1627     ## Step 6
1628     if ($entry->[0] eq '#marker') {
1629     #
1630     } else {
1631     my $in_open_elements;
1632 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1633 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1634     $in_open_elements = 1;
1635     last OE;
1636     }
1637     }
1638     if ($in_open_elements) {
1639     #
1640     } else {
1641     redo S4;
1642     }
1643     }
1644    
1645     ## Step 7
1646     $i++;
1647     $entry = $active_formatting_elements->[$i];
1648     } # S4
1649    
1650     S7: {
1651     ## Step 8
1652     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1653    
1654     ## Step 9
1655     $insert->($clone->[0]);
1656 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1657 wakaba 1.1
1658     ## Step 10
1659 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1660 wakaba 1.1
1661     ## Step 11
1662     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1663     ## Step 7'
1664     $i++;
1665     $entry = $active_formatting_elements->[$i];
1666    
1667     redo S7;
1668     }
1669     } # S7
1670     }; # $reconstruct_active_formatting_elements
1671    
1672     my $clear_up_to_marker = sub {
1673     for (reverse 0..$#$active_formatting_elements) {
1674     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1675     splice @$active_formatting_elements, $_;
1676     return;
1677     }
1678     }
1679     }; # $clear_up_to_marker
1680    
1681     my $style_start_tag = sub {
1682 wakaba 1.6 my $style_el; !!!create-element ($style_el, 'style', $token->{attributes});
1683 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
1684     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1685     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
1686 wakaba 1.1 ->append_child ($style_el);
1687     $self->{content_model_flag} = 'CDATA';
1688 wakaba 1.13 delete $self->{escape}; # MUST
1689 wakaba 1.1
1690     my $text = '';
1691     !!!next-token;
1692     while ($token->{type} eq 'character') {
1693     $text .= $token->{data};
1694     !!!next-token;
1695     } # stop if non-character token or tokenizer stops tokenising
1696     if (length $text) {
1697     $style_el->manakai_append_text ($text);
1698     }
1699    
1700     $self->{content_model_flag} = 'PCDATA';
1701    
1702     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1703     ## Ignore the token
1704     } else {
1705 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1706 wakaba 1.1 ## ISSUE: And ignore?
1707     }
1708     !!!next-token;
1709     }; # $style_start_tag
1710    
1711     my $script_start_tag = sub {
1712     my $script_el;
1713     !!!create-element ($script_el, 'script', $token->{attributes});
1714     ## TODO: mark as "parser-inserted"
1715    
1716     $self->{content_model_flag} = 'CDATA';
1717 wakaba 1.13 delete $self->{escape}; # MUST
1718 wakaba 1.1
1719     my $text = '';
1720     !!!next-token;
1721     while ($token->{type} eq 'character') {
1722     $text .= $token->{data};
1723     !!!next-token;
1724     } # stop if non-character token or tokenizer stops tokenising
1725     if (length $text) {
1726     $script_el->manakai_append_text ($text);
1727     }
1728    
1729     $self->{content_model_flag} = 'PCDATA';
1730    
1731     if ($token->{type} eq 'end tag' and
1732     $token->{tag_name} eq 'script') {
1733     ## Ignore the token
1734     } else {
1735 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1736 wakaba 1.1 ## ISSUE: And ignore?
1737     ## TODO: mark as "already executed"
1738     }
1739    
1740 wakaba 1.3 if (defined $self->{inner_html_node}) {
1741     ## TODO: mark as "already executed"
1742     } else {
1743 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
1744     ## TODO: insertion point = just before the next input character
1745    
1746 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1747     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
1748 wakaba 1.1
1749     ## TODO: insertion point = $old_insertion_point (might be "undefined")
1750    
1751     ## TODO: if there is a script that will execute as soon as the parser resume, then...
1752     }
1753    
1754     !!!next-token;
1755     }; # $script_start_tag
1756    
1757     my $formatting_end_tag = sub {
1758     my $tag_name = shift;
1759    
1760     FET: {
1761     ## Step 1
1762     my $formatting_element;
1763     my $formatting_element_i_in_active;
1764     AFE: for (reverse 0..$#$active_formatting_elements) {
1765     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
1766     $formatting_element = $active_formatting_elements->[$_];
1767     $formatting_element_i_in_active = $_;
1768     last AFE;
1769     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
1770     last AFE;
1771     }
1772     } # AFE
1773     unless (defined $formatting_element) {
1774 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
1775 wakaba 1.1 ## Ignore the token
1776     !!!next-token;
1777     return;
1778     }
1779     ## has an element in scope
1780     my $in_scope = 1;
1781     my $formatting_element_i_in_open;
1782 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1783     my $node = $self->{open_elements}->[$_];
1784 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
1785     if ($in_scope) {
1786     $formatting_element_i_in_open = $_;
1787     last INSCOPE;
1788     } else { # in open elements but not in scope
1789 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
1790 wakaba 1.1 ## Ignore the token
1791     !!!next-token;
1792     return;
1793     }
1794     } elsif ({
1795     table => 1, caption => 1, td => 1, th => 1,
1796     button => 1, marquee => 1, object => 1, html => 1,
1797     }->{$node->[1]}) {
1798     $in_scope = 0;
1799     }
1800     } # INSCOPE
1801     unless (defined $formatting_element_i_in_open) {
1802 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
1803 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
1804     !!!next-token; ## TODO: ok?
1805     return;
1806     }
1807 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1808 wakaba 1.4 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
1809 wakaba 1.1 }
1810    
1811     ## Step 2
1812     my $furthest_block;
1813     my $furthest_block_i_in_open;
1814 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1815     my $node = $self->{open_elements}->[$_];
1816 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
1817     #not $phrasing_category->{$node->[1]} and
1818     ($special_category->{$node->[1]} or
1819     $scoping_category->{$node->[1]})) {
1820     $furthest_block = $node;
1821     $furthest_block_i_in_open = $_;
1822     } elsif ($node->[0] eq $formatting_element->[0]) {
1823     last OE;
1824     }
1825     } # OE
1826    
1827     ## Step 3
1828     unless (defined $furthest_block) { # MUST
1829 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
1830 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1831     !!!next-token;
1832     return;
1833     }
1834    
1835     ## Step 4
1836 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
1837 wakaba 1.1
1838     ## Step 5
1839     my $furthest_block_parent = $furthest_block->[0]->parent_node;
1840     if (defined $furthest_block_parent) {
1841     $furthest_block_parent->remove_child ($furthest_block->[0]);
1842     }
1843    
1844     ## Step 6
1845     my $bookmark_prev_el
1846     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1847     ->[0];
1848    
1849     ## Step 7
1850     my $node = $furthest_block;
1851     my $node_i_in_open = $furthest_block_i_in_open;
1852     my $last_node = $furthest_block;
1853     S7: {
1854     ## Step 1
1855     $node_i_in_open--;
1856 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
1857 wakaba 1.1
1858     ## Step 2
1859     my $node_i_in_active;
1860     S7S2: {
1861     for (reverse 0..$#$active_formatting_elements) {
1862     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1863     $node_i_in_active = $_;
1864     last S7S2;
1865     }
1866     }
1867 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
1868 wakaba 1.1 redo S7;
1869     } # S7S2
1870    
1871     ## Step 3
1872     last S7 if $node->[0] eq $formatting_element->[0];
1873    
1874     ## Step 4
1875     if ($last_node->[0] eq $furthest_block->[0]) {
1876     $bookmark_prev_el = $node->[0];
1877     }
1878    
1879     ## Step 5
1880     if ($node->[0]->has_child_nodes ()) {
1881     my $clone = [$node->[0]->clone_node (0), $node->[1]];
1882     $active_formatting_elements->[$node_i_in_active] = $clone;
1883 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
1884 wakaba 1.1 $node = $clone;
1885     }
1886    
1887     ## Step 6
1888     $node->[0]->append_child ($last_node->[0]);
1889    
1890     ## Step 7
1891     $last_node = $node;
1892    
1893     ## Step 8
1894     redo S7;
1895     } # S7
1896    
1897     ## Step 8
1898     $common_ancestor_node->[0]->append_child ($last_node->[0]);
1899    
1900     ## Step 9
1901     my $clone = [$formatting_element->[0]->clone_node (0),
1902     $formatting_element->[1]];
1903    
1904     ## Step 10
1905     my @cn = @{$furthest_block->[0]->child_nodes};
1906     $clone->[0]->append_child ($_) for @cn;
1907    
1908     ## Step 11
1909     $furthest_block->[0]->append_child ($clone->[0]);
1910    
1911     ## Step 12
1912     my $i;
1913     AFE: for (reverse 0..$#$active_formatting_elements) {
1914     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1915     splice @$active_formatting_elements, $_, 1;
1916     $i-- and last AFE if defined $i;
1917     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1918     $i = $_;
1919     }
1920     } # AFE
1921     splice @$active_formatting_elements, $i + 1, 0, $clone;
1922    
1923     ## Step 13
1924     undef $i;
1925 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1926     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
1927     splice @{$self->{open_elements}}, $_, 1;
1928 wakaba 1.1 $i-- and last OE if defined $i;
1929 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
1930 wakaba 1.1 $i = $_;
1931     }
1932     } # OE
1933 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
1934 wakaba 1.1
1935     ## Step 14
1936     redo FET;
1937     } # FET
1938     }; # $formatting_end_tag
1939    
1940     my $insert_to_current = sub {
1941 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
1942 wakaba 1.1 }; # $insert_to_current
1943    
1944     my $insert_to_foster = sub {
1945     my $child = shift;
1946     if ({
1947     table => 1, tbody => 1, tfoot => 1,
1948     thead => 1, tr => 1,
1949 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
1950 wakaba 1.1 # MUST
1951     my $foster_parent_element;
1952     my $next_sibling;
1953 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1954     if ($self->{open_elements}->[$_]->[1] eq 'table') {
1955     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1956 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
1957     $foster_parent_element = $parent;
1958 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
1959 wakaba 1.1 } else {
1960     $foster_parent_element
1961 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
1962 wakaba 1.1 }
1963     last OE;
1964     }
1965     } # OE
1966 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
1967 wakaba 1.1 unless defined $foster_parent_element;
1968     $foster_parent_element->insert_before
1969     ($child, $next_sibling);
1970     } else {
1971 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
1972 wakaba 1.1 }
1973     }; # $insert_to_foster
1974    
1975     my $in_body = sub {
1976     my $insert = shift;
1977     if ($token->{type} eq 'start tag') {
1978     if ($token->{tag_name} eq 'script') {
1979     $script_start_tag->();
1980     return;
1981     } elsif ($token->{tag_name} eq 'style') {
1982     $style_start_tag->();
1983     return;
1984     } elsif ({
1985     base => 1, link => 1, meta => 1,
1986     }->{$token->{tag_name}}) {
1987 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
1988 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
1989     my $el;
1990     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
1991 wakaba 1.3 if (defined $self->{head_element}) {
1992     $self->{head_element}->append_child ($el);
1993 wakaba 1.1 } else {
1994     $insert->($el);
1995     }
1996    
1997     !!!next-token;
1998     return;
1999     } elsif ($token->{tag_name} eq 'title') {
2000 wakaba 1.3 !!!parse-error (type => 'in body:title');
2001 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
2002     my $title_el;
2003     !!!create-element ($title_el, 'title', $token->{attributes});
2004 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2005 wakaba 1.1 ->append_child ($title_el);
2006     $self->{content_model_flag} = 'RCDATA';
2007 wakaba 1.13 delete $self->{escape}; # MUST
2008 wakaba 1.1
2009     my $text = '';
2010     !!!next-token;
2011     while ($token->{type} eq 'character') {
2012     $text .= $token->{data};
2013     !!!next-token;
2014     }
2015     if (length $text) {
2016     $title_el->manakai_append_text ($text);
2017     }
2018    
2019     $self->{content_model_flag} = 'PCDATA';
2020    
2021     if ($token->{type} eq 'end tag' and
2022     $token->{tag_name} eq 'title') {
2023     ## Ignore the token
2024     } else {
2025 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2026 wakaba 1.1 ## ISSUE: And ignore?
2027     }
2028     !!!next-token;
2029     return;
2030     } elsif ($token->{tag_name} eq 'body') {
2031 wakaba 1.3 !!!parse-error (type => 'in body:body');
2032 wakaba 1.1
2033 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
2034     $self->{open_elements}->[1]->[1] ne 'body') {
2035 wakaba 1.1 ## Ignore the token
2036     } else {
2037 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
2038 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2039     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2040     $body_el->set_attribute_ns
2041     (undef, [undef, $attr_name],
2042     $token->{attributes}->{$attr_name}->{value});
2043     }
2044     }
2045     }
2046     !!!next-token;
2047     return;
2048     } elsif ({
2049     address => 1, blockquote => 1, center => 1, dir => 1,
2050     div => 1, dl => 1, fieldset => 1, listing => 1,
2051     menu => 1, ol => 1, p => 1, ul => 1,
2052     pre => 1,
2053     }->{$token->{tag_name}}) {
2054     ## has a p element in scope
2055 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2056 wakaba 1.1 if ($_->[1] eq 'p') {
2057     !!!back-token;
2058     $token = {type => 'end tag', tag_name => 'p'};
2059     return;
2060     } elsif ({
2061     table => 1, caption => 1, td => 1, th => 1,
2062     button => 1, marquee => 1, object => 1, html => 1,
2063     }->{$_->[1]}) {
2064     last INSCOPE;
2065     }
2066     } # INSCOPE
2067    
2068     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2069     if ($token->{tag_name} eq 'pre') {
2070     !!!next-token;
2071     if ($token->{type} eq 'character') {
2072     $token->{data} =~ s/^\x0A//;
2073     unless (length $token->{data}) {
2074     !!!next-token;
2075     }
2076     }
2077     } else {
2078     !!!next-token;
2079     }
2080     return;
2081     } elsif ($token->{tag_name} eq 'form') {
2082 wakaba 1.3 if (defined $self->{form_element}) {
2083     !!!parse-error (type => 'in form:form');
2084 wakaba 1.1 ## Ignore the token
2085 wakaba 1.7 !!!next-token;
2086     return;
2087 wakaba 1.1 } else {
2088     ## has a p element in scope
2089 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2090 wakaba 1.1 if ($_->[1] eq 'p') {
2091     !!!back-token;
2092     $token = {type => 'end tag', tag_name => 'p'};
2093     return;
2094     } elsif ({
2095     table => 1, caption => 1, td => 1, th => 1,
2096     button => 1, marquee => 1, object => 1, html => 1,
2097     }->{$_->[1]}) {
2098     last INSCOPE;
2099     }
2100     } # INSCOPE
2101    
2102     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2103 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2104 wakaba 1.1 !!!next-token;
2105     return;
2106     }
2107     } elsif ($token->{tag_name} eq 'li') {
2108     ## has a p element in scope
2109 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2110 wakaba 1.1 if ($_->[1] eq 'p') {
2111     !!!back-token;
2112     $token = {type => 'end tag', tag_name => 'p'};
2113     return;
2114     } elsif ({
2115     table => 1, caption => 1, td => 1, th => 1,
2116     button => 1, marquee => 1, object => 1, html => 1,
2117     }->{$_->[1]}) {
2118     last INSCOPE;
2119     }
2120     } # INSCOPE
2121    
2122     ## Step 1
2123     my $i = -1;
2124 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2125 wakaba 1.1 LI: {
2126     ## Step 2
2127     if ($node->[1] eq 'li') {
2128 wakaba 1.8 if ($i != -1) {
2129     !!!parse-error (type => 'end tag missing:'.
2130     $self->{open_elements}->[-1]->[1]);
2131     ## TODO: test
2132     }
2133 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2134 wakaba 1.1 last LI;
2135     }
2136    
2137     ## Step 3
2138     if (not $formatting_category->{$node->[1]} and
2139     #not $phrasing_category->{$node->[1]} and
2140     ($special_category->{$node->[1]} or
2141     $scoping_category->{$node->[1]}) and
2142     $node->[1] ne 'address' and $node->[1] ne 'div') {
2143     last LI;
2144     }
2145    
2146     ## Step 4
2147     $i--;
2148 wakaba 1.3 $node = $self->{open_elements}->[$i];
2149 wakaba 1.1 redo LI;
2150     } # LI
2151    
2152     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2153     !!!next-token;
2154     return;
2155     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2156     ## has a p element in scope
2157 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2158 wakaba 1.1 if ($_->[1] eq 'p') {
2159     !!!back-token;
2160     $token = {type => 'end tag', tag_name => 'p'};
2161     return;
2162     } elsif ({
2163     table => 1, caption => 1, td => 1, th => 1,
2164     button => 1, marquee => 1, object => 1, html => 1,
2165     }->{$_->[1]}) {
2166     last INSCOPE;
2167     }
2168     } # INSCOPE
2169    
2170     ## Step 1
2171     my $i = -1;
2172 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2173 wakaba 1.1 LI: {
2174     ## Step 2
2175     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2176 wakaba 1.8 if ($i != -1) {
2177     !!!parse-error (type => 'end tag missing:'.
2178     $self->{open_elements}->[-1]->[1]);
2179     ## TODO: test
2180     }
2181 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2182 wakaba 1.1 last LI;
2183     }
2184    
2185     ## Step 3
2186     if (not $formatting_category->{$node->[1]} and
2187     #not $phrasing_category->{$node->[1]} and
2188     ($special_category->{$node->[1]} or
2189     $scoping_category->{$node->[1]}) and
2190     $node->[1] ne 'address' and $node->[1] ne 'div') {
2191     last LI;
2192     }
2193    
2194     ## Step 4
2195     $i--;
2196 wakaba 1.3 $node = $self->{open_elements}->[$i];
2197 wakaba 1.1 redo LI;
2198     } # LI
2199    
2200     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2201     !!!next-token;
2202     return;
2203     } elsif ($token->{tag_name} eq 'plaintext') {
2204     ## has a p element in scope
2205 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2206 wakaba 1.1 if ($_->[1] eq 'p') {
2207     !!!back-token;
2208     $token = {type => 'end tag', tag_name => 'p'};
2209     return;
2210     } elsif ({
2211     table => 1, caption => 1, td => 1, th => 1,
2212     button => 1, marquee => 1, object => 1, html => 1,
2213     }->{$_->[1]}) {
2214     last INSCOPE;
2215     }
2216     } # INSCOPE
2217    
2218     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2219    
2220     $self->{content_model_flag} = 'PLAINTEXT';
2221    
2222     !!!next-token;
2223     return;
2224     } elsif ({
2225     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2226     }->{$token->{tag_name}}) {
2227     ## has a p element in scope
2228 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2229     my $node = $self->{open_elements}->[$_];
2230 wakaba 1.1 if ($node->[1] eq 'p') {
2231     !!!back-token;
2232     $token = {type => 'end tag', tag_name => 'p'};
2233     return;
2234     } elsif ({
2235     table => 1, caption => 1, td => 1, th => 1,
2236     button => 1, marquee => 1, object => 1, html => 1,
2237     }->{$node->[1]}) {
2238     last INSCOPE;
2239     }
2240     } # INSCOPE
2241    
2242     ## has an element in scope
2243     my $i;
2244 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2245     my $node = $self->{open_elements}->[$_];
2246 wakaba 1.1 if ({
2247     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2248     }->{$node->[1]}) {
2249     $i = $_;
2250     last INSCOPE;
2251     } elsif ({
2252     table => 1, caption => 1, td => 1, th => 1,
2253     button => 1, marquee => 1, object => 1, html => 1,
2254     }->{$node->[1]}) {
2255     last INSCOPE;
2256     }
2257     } # INSCOPE
2258    
2259     if (defined $i) {
2260 wakaba 1.3 !!!parse-error (type => 'in hn:hn');
2261     splice @{$self->{open_elements}}, $i;
2262 wakaba 1.1 }
2263    
2264     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2265    
2266     !!!next-token;
2267     return;
2268     } elsif ($token->{tag_name} eq 'a') {
2269     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2270     my $node = $active_formatting_elements->[$i];
2271     if ($node->[1] eq 'a') {
2272 wakaba 1.3 !!!parse-error (type => 'in a:a');
2273 wakaba 1.1
2274     !!!back-token;
2275     $token = {type => 'end tag', tag_name => 'a'};
2276     $formatting_end_tag->($token->{tag_name});
2277    
2278     AFE2: for (reverse 0..$#$active_formatting_elements) {
2279     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2280     splice @$active_formatting_elements, $_, 1;
2281     last AFE2;
2282     }
2283     } # AFE2
2284 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2285     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2286     splice @{$self->{open_elements}}, $_, 1;
2287 wakaba 1.1 last OE;
2288     }
2289     } # OE
2290     last AFE;
2291     } elsif ($node->[0] eq '#marker') {
2292     last AFE;
2293     }
2294     } # AFE
2295    
2296     $reconstruct_active_formatting_elements->($insert_to_current);
2297    
2298     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2299 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2300 wakaba 1.1
2301     !!!next-token;
2302     return;
2303     } elsif ({
2304     b => 1, big => 1, em => 1, font => 1, i => 1,
2305     nobr => 1, s => 1, small => 1, strile => 1,
2306     strong => 1, tt => 1, u => 1,
2307     }->{$token->{tag_name}}) {
2308     $reconstruct_active_formatting_elements->($insert_to_current);
2309    
2310     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2311 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2312 wakaba 1.1
2313     !!!next-token;
2314     return;
2315     } elsif ($token->{tag_name} eq 'button') {
2316     ## has a button element in scope
2317 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2318     my $node = $self->{open_elements}->[$_];
2319 wakaba 1.1 if ($node->[1] eq 'button') {
2320 wakaba 1.3 !!!parse-error (type => 'in button:button');
2321 wakaba 1.1 !!!back-token;
2322     $token = {type => 'end tag', tag_name => 'button'};
2323     return;
2324     } elsif ({
2325     table => 1, caption => 1, td => 1, th => 1,
2326     button => 1, marquee => 1, object => 1, html => 1,
2327     }->{$node->[1]}) {
2328     last INSCOPE;
2329     }
2330     } # INSCOPE
2331    
2332     $reconstruct_active_formatting_elements->($insert_to_current);
2333    
2334     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2335     push @$active_formatting_elements, ['#marker', ''];
2336    
2337     !!!next-token;
2338     return;
2339     } elsif ($token->{tag_name} eq 'marquee' or
2340     $token->{tag_name} eq 'object') {
2341     $reconstruct_active_formatting_elements->($insert_to_current);
2342    
2343     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2344     push @$active_formatting_elements, ['#marker', ''];
2345    
2346     !!!next-token;
2347     return;
2348     } elsif ($token->{tag_name} eq 'xmp') {
2349     $reconstruct_active_formatting_elements->($insert_to_current);
2350    
2351     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2352    
2353     $self->{content_model_flag} = 'CDATA';
2354 wakaba 1.13 delete $self->{escape}; # MUST
2355 wakaba 1.1
2356     !!!next-token;
2357     return;
2358     } elsif ($token->{tag_name} eq 'table') {
2359     ## has a p element in scope
2360 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2361 wakaba 1.1 if ($_->[1] eq 'p') {
2362     !!!back-token;
2363     $token = {type => 'end tag', tag_name => 'p'};
2364     return;
2365     } elsif ({
2366     table => 1, caption => 1, td => 1, th => 1,
2367     button => 1, marquee => 1, object => 1, html => 1,
2368     }->{$_->[1]}) {
2369     last INSCOPE;
2370     }
2371     } # INSCOPE
2372    
2373     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2374    
2375 wakaba 1.3 $self->{insertion_mode} = 'in table';
2376 wakaba 1.1
2377     !!!next-token;
2378     return;
2379     } elsif ({
2380     area => 1, basefont => 1, bgsound => 1, br => 1,
2381     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2382     image => 1,
2383     }->{$token->{tag_name}}) {
2384     if ($token->{tag_name} eq 'image') {
2385 wakaba 1.3 !!!parse-error (type => 'image');
2386 wakaba 1.1 $token->{tag_name} = 'img';
2387     }
2388    
2389     $reconstruct_active_formatting_elements->($insert_to_current);
2390    
2391     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2392 wakaba 1.3 pop @{$self->{open_elements}};
2393 wakaba 1.1
2394     !!!next-token;
2395     return;
2396     } elsif ($token->{tag_name} eq 'hr') {
2397     ## has a p element in scope
2398 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2399 wakaba 1.1 if ($_->[1] eq 'p') {
2400     !!!back-token;
2401     $token = {type => 'end tag', tag_name => 'p'};
2402     return;
2403     } elsif ({
2404     table => 1, caption => 1, td => 1, th => 1,
2405     button => 1, marquee => 1, object => 1, html => 1,
2406     }->{$_->[1]}) {
2407     last INSCOPE;
2408     }
2409     } # INSCOPE
2410    
2411     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2412 wakaba 1.3 pop @{$self->{open_elements}};
2413 wakaba 1.1
2414     !!!next-token;
2415     return;
2416     } elsif ($token->{tag_name} eq 'input') {
2417     $reconstruct_active_formatting_elements->($insert_to_current);
2418    
2419     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2420 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
2421     pop @{$self->{open_elements}};
2422 wakaba 1.1
2423     !!!next-token;
2424     return;
2425     } elsif ($token->{tag_name} eq 'isindex') {
2426 wakaba 1.3 !!!parse-error (type => 'isindex');
2427 wakaba 1.1
2428 wakaba 1.3 if (defined $self->{form_element}) {
2429 wakaba 1.1 ## Ignore the token
2430     !!!next-token;
2431     return;
2432     } else {
2433     my $at = $token->{attributes};
2434     $at->{name} = {name => 'name', value => 'isindex'};
2435     my @tokens = (
2436     {type => 'start tag', tag_name => 'form'},
2437     {type => 'start tag', tag_name => 'hr'},
2438     {type => 'start tag', tag_name => 'p'},
2439     {type => 'start tag', tag_name => 'label'},
2440     {type => 'character',
2441     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2442     ## TODO: make this configurable
2443     {type => 'start tag', tag_name => 'input', attributes => $at},
2444     #{type => 'character', data => ''}, # SHOULD
2445     {type => 'end tag', tag_name => 'label'},
2446     {type => 'end tag', tag_name => 'p'},
2447     {type => 'start tag', tag_name => 'hr'},
2448     {type => 'end tag', tag_name => 'form'},
2449     );
2450     $token = shift @tokens;
2451     !!!back-token (@tokens);
2452     return;
2453     }
2454     } elsif ({
2455     textarea => 1,
2456 wakaba 1.5 iframe => 1,
2457 wakaba 1.1 noembed => 1,
2458     noframes => 1,
2459     noscript => 0, ## TODO: 1 if scripting is enabled
2460     }->{$token->{tag_name}}) {
2461     my $tag_name = $token->{tag_name};
2462     my $el;
2463     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2464    
2465     if ($token->{tag_name} eq 'textarea') {
2466 wakaba 1.3 ## TODO: $self->{form_element} if defined
2467 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
2468     } else {
2469     $self->{content_model_flag} = 'CDATA';
2470     }
2471 wakaba 1.13 delete $self->{escape}; # MUST
2472 wakaba 1.1
2473     $insert->($el);
2474    
2475     my $text = '';
2476 wakaba 1.9 if ($token->{tag_name} eq 'textarea') {
2477     !!!next-token;
2478     if ($token->{type} eq 'character') {
2479     $token->{data} =~ s/^\x0A//;
2480     unless (length $token->{data}) {
2481     !!!next-token;
2482     }
2483     }
2484     } else {
2485     !!!next-token;
2486     }
2487 wakaba 1.1 while ($token->{type} eq 'character') {
2488     $text .= $token->{data};
2489     !!!next-token;
2490     }
2491     if (length $text) {
2492     $el->manakai_append_text ($text);
2493     }
2494    
2495     $self->{content_model_flag} = 'PCDATA';
2496    
2497     if ($token->{type} eq 'end tag' and
2498     $token->{tag_name} eq $tag_name) {
2499     ## Ignore the token
2500     } else {
2501 wakaba 1.10 if ($token->{tag_name} eq 'textarea') {
2502     !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2503     } else {
2504 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2505     }
2506 wakaba 1.1 ## ISSUE: And ignore?
2507     }
2508     !!!next-token;
2509     return;
2510     } elsif ($token->{tag_name} eq 'select') {
2511     $reconstruct_active_formatting_elements->($insert_to_current);
2512    
2513     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2514    
2515 wakaba 1.3 $self->{insertion_mode} = 'in select';
2516 wakaba 1.1 !!!next-token;
2517     return;
2518     } elsif ({
2519     caption => 1, col => 1, colgroup => 1, frame => 1,
2520     frameset => 1, head => 1, option => 1, optgroup => 1,
2521     tbody => 1, td => 1, tfoot => 1, th => 1,
2522     thead => 1, tr => 1,
2523     }->{$token->{tag_name}}) {
2524 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2525 wakaba 1.1 ## Ignore the token
2526     !!!next-token;
2527     return;
2528    
2529     ## ISSUE: An issue on HTML5 new elements in the spec.
2530     } else {
2531     $reconstruct_active_formatting_elements->($insert_to_current);
2532    
2533     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2534    
2535     !!!next-token;
2536     return;
2537     }
2538     } elsif ($token->{type} eq 'end tag') {
2539     if ($token->{tag_name} eq 'body') {
2540 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2541 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2542 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2543     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2544 wakaba 1.1 }
2545 wakaba 1.3 $self->{insertion_mode} = 'after body';
2546 wakaba 1.1 !!!next-token;
2547     return;
2548     } else {
2549 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2550 wakaba 1.1 ## Ignore the token
2551     !!!next-token;
2552     return;
2553     }
2554     } elsif ($token->{tag_name} eq 'html') {
2555 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2556 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2557 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2558     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
2559 wakaba 1.1 }
2560 wakaba 1.3 $self->{insertion_mode} = 'after body';
2561 wakaba 1.1 ## reprocess
2562     return;
2563     } else {
2564 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2565 wakaba 1.1 ## Ignore the token
2566     !!!next-token;
2567     return;
2568     }
2569     } elsif ({
2570     address => 1, blockquote => 1, center => 1, dir => 1,
2571     div => 1, dl => 1, fieldset => 1, listing => 1,
2572     menu => 1, ol => 1, pre => 1, ul => 1,
2573     p => 1,
2574     dd => 1, dt => 1, li => 1,
2575     button => 1, marquee => 1, object => 1,
2576     }->{$token->{tag_name}}) {
2577     ## has an element in scope
2578     my $i;
2579 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2580     my $node = $self->{open_elements}->[$_];
2581 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
2582     ## generate implied end tags
2583     if ({
2584     dd => ($token->{tag_name} ne 'dd'),
2585     dt => ($token->{tag_name} ne 'dt'),
2586     li => ($token->{tag_name} ne 'li'),
2587     p => ($token->{tag_name} ne 'p'),
2588     td => 1, th => 1, tr => 1,
2589 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2590 wakaba 1.1 !!!back-token;
2591     $token = {type => 'end tag',
2592 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2593 wakaba 1.1 return;
2594     }
2595     $i = $_;
2596     last INSCOPE unless $token->{tag_name} eq 'p';
2597     } elsif ({
2598     table => 1, caption => 1, td => 1, th => 1,
2599     button => 1, marquee => 1, object => 1, html => 1,
2600     }->{$node->[1]}) {
2601     last INSCOPE;
2602     }
2603     } # INSCOPE
2604    
2605 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2606     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2607 wakaba 1.1 }
2608    
2609 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2610 wakaba 1.1 $clear_up_to_marker->()
2611     if {
2612     button => 1, marquee => 1, object => 1,
2613     }->{$token->{tag_name}};
2614     !!!next-token;
2615     return;
2616 wakaba 1.12 } elsif ($token->{tag_name} eq 'form') {
2617     ## has an element in scope
2618     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2619     my $node = $self->{open_elements}->[$_];
2620     if ($node->[1] eq $token->{tag_name}) {
2621     ## generate implied end tags
2622     if ({
2623     dd => 1, dt => 1, li => 1, p => 1,
2624     td => 1, th => 1, tr => 1,
2625     }->{$self->{open_elements}->[-1]->[1]}) {
2626     !!!back-token;
2627     $token = {type => 'end tag',
2628     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2629     return;
2630     }
2631     last INSCOPE;
2632     } elsif ({
2633     table => 1, caption => 1, td => 1, th => 1,
2634     button => 1, marquee => 1, object => 1, html => 1,
2635     }->{$node->[1]}) {
2636     last INSCOPE;
2637     }
2638     } # INSCOPE
2639    
2640     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
2641     pop @{$self->{open_elements}};
2642     } else {
2643     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2644     }
2645    
2646     undef $self->{form_element};
2647     !!!next-token;
2648     return;
2649 wakaba 1.1 } elsif ({
2650     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2651     }->{$token->{tag_name}}) {
2652     ## has an element in scope
2653     my $i;
2654 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2655     my $node = $self->{open_elements}->[$_];
2656 wakaba 1.1 if ({
2657     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2658     }->{$node->[1]}) {
2659     ## generate implied end tags
2660     if ({
2661     dd => 1, dt => 1, li => 1, p => 1,
2662     td => 1, th => 1, tr => 1,
2663 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2664 wakaba 1.1 !!!back-token;
2665     $token = {type => 'end tag',
2666 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2667 wakaba 1.1 return;
2668     }
2669     $i = $_;
2670     last INSCOPE;
2671     } elsif ({
2672     table => 1, caption => 1, td => 1, th => 1,
2673     button => 1, marquee => 1, object => 1, html => 1,
2674     }->{$node->[1]}) {
2675     last INSCOPE;
2676     }
2677     } # INSCOPE
2678    
2679 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2680     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2681 wakaba 1.1 }
2682    
2683 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2684 wakaba 1.1 !!!next-token;
2685     return;
2686     } elsif ({
2687     a => 1,
2688     b => 1, big => 1, em => 1, font => 1, i => 1,
2689     nobr => 1, s => 1, small => 1, strile => 1,
2690     strong => 1, tt => 1, u => 1,
2691     }->{$token->{tag_name}}) {
2692     $formatting_end_tag->($token->{tag_name});
2693 wakaba 1.8 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
2694 wakaba 1.1 return;
2695     } elsif ({
2696     caption => 1, col => 1, colgroup => 1, frame => 1,
2697     frameset => 1, head => 1, option => 1, optgroup => 1,
2698     tbody => 1, td => 1, tfoot => 1, th => 1,
2699     thead => 1, tr => 1,
2700     area => 1, basefont => 1, bgsound => 1, br => 1,
2701     embed => 1, hr => 1, iframe => 1, image => 1,
2702 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
2703 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
2704     table => 1, textarea => 1, wbr => 1,
2705     noscript => 0, ## TODO: if scripting is enabled
2706     }->{$token->{tag_name}}) {
2707 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2708 wakaba 1.1 ## Ignore the token
2709     !!!next-token;
2710     return;
2711    
2712     ## ISSUE: Issue on HTML5 new elements in spec
2713    
2714     } else {
2715     ## Step 1
2716     my $node_i = -1;
2717 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
2718 wakaba 1.1
2719     ## Step 2
2720     S2: {
2721     if ($node->[1] eq $token->{tag_name}) {
2722     ## Step 1
2723     ## generate implied end tags
2724     if ({
2725     dd => 1, dt => 1, li => 1, p => 1,
2726     td => 1, th => 1, tr => 1,
2727 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2728 wakaba 1.1 !!!back-token;
2729     $token = {type => 'end tag',
2730 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2731 wakaba 1.1 return;
2732     }
2733    
2734     ## Step 2
2735 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
2736     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2737 wakaba 1.1 }
2738    
2739     ## Step 3
2740 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
2741    
2742     !!!next-token;
2743 wakaba 1.1 last S2;
2744     } else {
2745     ## Step 3
2746     if (not $formatting_category->{$node->[1]} and
2747     #not $phrasing_category->{$node->[1]} and
2748     ($special_category->{$node->[1]} or
2749     $scoping_category->{$node->[1]})) {
2750 wakaba 1.3 !!!parse-error (type => 'not closed:'.$node->[1]);
2751 wakaba 1.1 ## Ignore the token
2752     !!!next-token;
2753     last S2;
2754     }
2755     }
2756    
2757     ## Step 4
2758     $node_i--;
2759 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
2760 wakaba 1.1
2761     ## Step 5;
2762     redo S2;
2763     } # S2
2764 wakaba 1.3 return;
2765 wakaba 1.1 }
2766     }
2767     }; # $in_body
2768    
2769     B: {
2770 wakaba 1.3 if ($phase eq 'main') {
2771 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
2772 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
2773 wakaba 1.1 ## Ignore the token
2774     ## Stay in the phase
2775     !!!next-token;
2776     redo B;
2777     } elsif ($token->{type} eq 'start tag' and
2778     $token->{tag_name} eq 'html') {
2779     ## TODO: unless it is the first start tag token, parse-error
2780 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
2781 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2782     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
2783     $top_el->set_attribute_ns
2784     (undef, [undef, $attr_name],
2785     $token->{attributes}->{$attr_name}->{value});
2786     }
2787     }
2788     !!!next-token;
2789     redo B;
2790     } elsif ($token->{type} eq 'end-of-file') {
2791     ## Generate implied end tags
2792     if ({
2793     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
2794 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2795 wakaba 1.1 !!!back-token;
2796 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
2797 wakaba 1.1 redo B;
2798     }
2799    
2800 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
2801     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
2802     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2803     } elsif (defined $self->{inner_html_node} and
2804     @{$self->{open_elements}} > 1 and
2805     $self->{open_elements}->[1]->[1] ne 'body') {
2806     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2807 wakaba 1.1 }
2808    
2809     ## Stop parsing
2810     last B;
2811    
2812     ## ISSUE: There is an issue in the spec.
2813     } else {
2814 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
2815 wakaba 1.1 if ($token->{type} eq 'character') {
2816     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2817 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2818 wakaba 1.1 unless (length $token->{data}) {
2819     !!!next-token;
2820     redo B;
2821     }
2822     }
2823     ## As if <head>
2824 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
2825     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2826     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
2827     $self->{insertion_mode} = 'in head';
2828 wakaba 1.1 ## reprocess
2829     redo B;
2830     } elsif ($token->{type} eq 'comment') {
2831     my $comment = $self->{document}->create_comment ($token->{data});
2832 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
2833 wakaba 1.1 !!!next-token;
2834     redo B;
2835     } elsif ($token->{type} eq 'start tag') {
2836     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
2837 wakaba 1.3 !!!create-element ($self->{head_element}, 'head', $attr);
2838     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2839     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
2840     $self->{insertion_mode} = 'in head';
2841 wakaba 1.1 if ($token->{tag_name} eq 'head') {
2842     !!!next-token;
2843     #} elsif ({
2844     # base => 1, link => 1, meta => 1,
2845     # script => 1, style => 1, title => 1,
2846     # }->{$token->{tag_name}}) {
2847     # ## reprocess
2848     } else {
2849     ## reprocess
2850     }
2851     redo B;
2852     } elsif ($token->{type} eq 'end tag') {
2853     if ($token->{tag_name} eq 'html') {
2854     ## As if <head>
2855 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
2856     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2857     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
2858     $self->{insertion_mode} = 'in head';
2859 wakaba 1.1 ## reprocess
2860     redo B;
2861     } else {
2862 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2863 wakaba 1.1 ## Ignore the token
2864     !!!next-token;
2865     redo B;
2866     }
2867     } else {
2868     die "$0: $token->{type}: Unknown type";
2869     }
2870 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
2871 wakaba 1.1 if ($token->{type} eq 'character') {
2872     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2873 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2874 wakaba 1.1 unless (length $token->{data}) {
2875     !!!next-token;
2876     redo B;
2877     }
2878     }
2879    
2880     #
2881     } elsif ($token->{type} eq 'comment') {
2882     my $comment = $self->{document}->create_comment ($token->{data});
2883 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
2884 wakaba 1.1 !!!next-token;
2885     redo B;
2886     } elsif ($token->{type} eq 'start tag') {
2887     if ($token->{tag_name} eq 'title') {
2888     ## NOTE: There is an "as if in head" code clone
2889     my $title_el;
2890     !!!create-element ($title_el, 'title', $token->{attributes});
2891 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2892 wakaba 1.1 ->append_child ($title_el);
2893     $self->{content_model_flag} = 'RCDATA';
2894 wakaba 1.13 delete $self->{escape}; # MUST
2895 wakaba 1.1
2896     my $text = '';
2897     !!!next-token;
2898     while ($token->{type} eq 'character') {
2899     $text .= $token->{data};
2900     !!!next-token;
2901     }
2902     if (length $text) {
2903     $title_el->manakai_append_text ($text);
2904     }
2905    
2906     $self->{content_model_flag} = 'PCDATA';
2907    
2908     if ($token->{type} eq 'end tag' and
2909     $token->{tag_name} eq 'title') {
2910     ## Ignore the token
2911     } else {
2912 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2913 wakaba 1.1 ## ISSUE: And ignore?
2914     }
2915     !!!next-token;
2916     redo B;
2917     } elsif ($token->{tag_name} eq 'style') {
2918     $style_start_tag->();
2919     redo B;
2920     } elsif ($token->{tag_name} eq 'script') {
2921     $script_start_tag->();
2922     redo B;
2923     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
2924     ## NOTE: There are "as if in head" code clones
2925     my $el;
2926     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2927 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2928 wakaba 1.1 ->append_child ($el);
2929    
2930     !!!next-token;
2931     redo B;
2932     } elsif ($token->{tag_name} eq 'head') {
2933 wakaba 1.3 !!!parse-error (type => 'in head:head');
2934 wakaba 1.1 ## Ignore the token
2935     !!!next-token;
2936     redo B;
2937     } else {
2938     #
2939     }
2940     } elsif ($token->{type} eq 'end tag') {
2941     if ($token->{tag_name} eq 'head') {
2942 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
2943     pop @{$self->{open_elements}};
2944 wakaba 1.1 } else {
2945 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:head');
2946 wakaba 1.1 }
2947 wakaba 1.3 $self->{insertion_mode} = 'after head';
2948 wakaba 1.1 !!!next-token;
2949     redo B;
2950     } elsif ($token->{tag_name} eq 'html') {
2951     #
2952     } else {
2953 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2954 wakaba 1.1 ## Ignore the token
2955     !!!next-token;
2956     redo B;
2957     }
2958     } else {
2959     #
2960     }
2961    
2962 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
2963 wakaba 1.1 ## As if </head>
2964 wakaba 1.3 pop @{$self->{open_elements}};
2965 wakaba 1.1 }
2966 wakaba 1.3 $self->{insertion_mode} = 'after head';
2967 wakaba 1.1 ## reprocess
2968     redo B;
2969    
2970     ## ISSUE: An issue in the spec.
2971 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
2972 wakaba 1.1 if ($token->{type} eq 'character') {
2973     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2974 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2975 wakaba 1.1 unless (length $token->{data}) {
2976     !!!next-token;
2977     redo B;
2978     }
2979     }
2980    
2981     #
2982     } elsif ($token->{type} eq 'comment') {
2983     my $comment = $self->{document}->create_comment ($token->{data});
2984 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
2985 wakaba 1.1 !!!next-token;
2986     redo B;
2987     } elsif ($token->{type} eq 'start tag') {
2988     if ($token->{tag_name} eq 'body') {
2989     !!!insert-element ('body', $token->{attributes});
2990 wakaba 1.3 $self->{insertion_mode} = 'in body';
2991 wakaba 1.1 !!!next-token;
2992     redo B;
2993     } elsif ($token->{tag_name} eq 'frameset') {
2994     !!!insert-element ('frameset', $token->{attributes});
2995 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
2996 wakaba 1.1 !!!next-token;
2997     redo B;
2998     } elsif ({
2999     base => 1, link => 1, meta => 1,
3000 wakaba 1.3 script => 1, style => 1, title => 1,
3001 wakaba 1.1 }->{$token->{tag_name}}) {
3002 wakaba 1.3 !!!parse-error (type => 'after head:'.$token->{tag_name});
3003     $self->{insertion_mode} = 'in head';
3004 wakaba 1.1 ## reprocess
3005     redo B;
3006     } else {
3007     #
3008     }
3009     } else {
3010     #
3011     }
3012    
3013     ## As if <body>
3014     !!!insert-element ('body');
3015 wakaba 1.3 $self->{insertion_mode} = 'in body';
3016 wakaba 1.1 ## reprocess
3017     redo B;
3018 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
3019 wakaba 1.1 if ($token->{type} eq 'character') {
3020     ## NOTE: There is a code clone of "character in body".
3021     $reconstruct_active_formatting_elements->($insert_to_current);
3022    
3023 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3024 wakaba 1.1
3025     !!!next-token;
3026     redo B;
3027     } elsif ($token->{type} eq 'comment') {
3028     ## NOTE: There is a code clone of "comment in body".
3029     my $comment = $self->{document}->create_comment ($token->{data});
3030 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3031 wakaba 1.1 !!!next-token;
3032     redo B;
3033     } else {
3034     $in_body->($insert_to_current);
3035     redo B;
3036     }
3037 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
3038 wakaba 1.1 if ($token->{type} eq 'character') {
3039     ## NOTE: There are "character in table" code clones.
3040     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3041 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3042 wakaba 1.1
3043     unless (length $token->{data}) {
3044     !!!next-token;
3045     redo B;
3046     }
3047     }
3048    
3049 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3050    
3051 wakaba 1.1 ## As if in body, but insert into foster parent element
3052     ## ISSUE: Spec says that "whenever a node would be inserted
3053     ## into the current node" while characters might not be
3054     ## result in a new Text node.
3055     $reconstruct_active_formatting_elements->($insert_to_foster);
3056    
3057     if ({
3058     table => 1, tbody => 1, tfoot => 1,
3059     thead => 1, tr => 1,
3060 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3061 wakaba 1.1 # MUST
3062     my $foster_parent_element;
3063     my $next_sibling;
3064     my $prev_sibling;
3065 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3066     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3067     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3068 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3069     $foster_parent_element = $parent;
3070 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3071 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3072     } else {
3073 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3074 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3075     }
3076     last OE;
3077     }
3078     } # OE
3079 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3080 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3081     unless defined $foster_parent_element;
3082     if (defined $prev_sibling and
3083     $prev_sibling->node_type == 3) {
3084     $prev_sibling->manakai_append_text ($token->{data});
3085     } else {
3086     $foster_parent_element->insert_before
3087     ($self->{document}->create_text_node ($token->{data}),
3088     $next_sibling);
3089     }
3090     } else {
3091 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3092 wakaba 1.1 }
3093    
3094     !!!next-token;
3095     redo B;
3096     } elsif ($token->{type} eq 'comment') {
3097     my $comment = $self->{document}->create_comment ($token->{data});
3098 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3099 wakaba 1.1 !!!next-token;
3100     redo B;
3101     } elsif ($token->{type} eq 'start tag') {
3102     if ({
3103     caption => 1,
3104     colgroup => 1,
3105     tbody => 1, tfoot => 1, thead => 1,
3106     }->{$token->{tag_name}}) {
3107     ## Clear back to table context
3108 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3109     $self->{open_elements}->[-1]->[1] ne 'html') {
3110     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3111     pop @{$self->{open_elements}};
3112 wakaba 1.1 }
3113    
3114     push @$active_formatting_elements, ['#marker', '']
3115     if $token->{tag_name} eq 'caption';
3116    
3117     !!!insert-element ($token->{tag_name}, $token->{attributes});
3118 wakaba 1.3 $self->{insertion_mode} = {
3119 wakaba 1.1 caption => 'in caption',
3120     colgroup => 'in column group',
3121     tbody => 'in table body',
3122     tfoot => 'in table body',
3123     thead => 'in table body',
3124     }->{$token->{tag_name}};
3125     !!!next-token;
3126     redo B;
3127     } elsif ({
3128     col => 1,
3129     td => 1, th => 1, tr => 1,
3130     }->{$token->{tag_name}}) {
3131     ## Clear back to table context
3132 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3133     $self->{open_elements}->[-1]->[1] ne 'html') {
3134     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3135     pop @{$self->{open_elements}};
3136 wakaba 1.1 }
3137    
3138     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3139 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3140 wakaba 1.1 ? 'in column group' : 'in table body';
3141     ## reprocess
3142     redo B;
3143     } elsif ($token->{tag_name} eq 'table') {
3144     ## NOTE: There are code clones for this "table in table"
3145 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3146 wakaba 1.1
3147     ## As if </table>
3148     ## have a table element in table scope
3149     my $i;
3150 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3151     my $node = $self->{open_elements}->[$_];
3152 wakaba 1.1 if ($node->[1] eq 'table') {
3153     $i = $_;
3154     last INSCOPE;
3155     } elsif ({
3156     table => 1, html => 1,
3157     }->{$node->[1]}) {
3158     last INSCOPE;
3159     }
3160     } # INSCOPE
3161     unless (defined $i) {
3162 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3163 wakaba 1.1 ## Ignore tokens </table><table>
3164     !!!next-token;
3165     redo B;
3166     }
3167    
3168     ## generate implied end tags
3169     if ({
3170     dd => 1, dt => 1, li => 1, p => 1,
3171     td => 1, th => 1, tr => 1,
3172 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3173 wakaba 1.1 !!!back-token; # <table>
3174     $token = {type => 'end tag', tag_name => 'table'};
3175     !!!back-token;
3176     $token = {type => 'end tag',
3177 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3178 wakaba 1.1 redo B;
3179     }
3180    
3181 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3182     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3183 wakaba 1.1 }
3184    
3185 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3186 wakaba 1.1
3187 wakaba 1.3 $self->_reset_insertion_mode;
3188 wakaba 1.1
3189     ## reprocess
3190     redo B;
3191     } else {
3192     #
3193     }
3194     } elsif ($token->{type} eq 'end tag') {
3195     if ($token->{tag_name} eq 'table') {
3196     ## have a table element in table scope
3197     my $i;
3198 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3199     my $node = $self->{open_elements}->[$_];
3200 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3201     $i = $_;
3202     last INSCOPE;
3203     } elsif ({
3204     table => 1, html => 1,
3205     }->{$node->[1]}) {
3206     last INSCOPE;
3207     }
3208     } # INSCOPE
3209     unless (defined $i) {
3210 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3211 wakaba 1.1 ## Ignore the token
3212     !!!next-token;
3213     redo B;
3214     }
3215    
3216     ## generate implied end tags
3217     if ({
3218     dd => 1, dt => 1, li => 1, p => 1,
3219     td => 1, th => 1, tr => 1,
3220 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3221 wakaba 1.1 !!!back-token;
3222     $token = {type => 'end tag',
3223 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3224 wakaba 1.1 redo B;
3225     }
3226    
3227 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3228     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3229 wakaba 1.1 }
3230    
3231 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3232 wakaba 1.1
3233 wakaba 1.3 $self->_reset_insertion_mode;
3234 wakaba 1.1
3235     !!!next-token;
3236     redo B;
3237     } elsif ({
3238     body => 1, caption => 1, col => 1, colgroup => 1,
3239     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3240     thead => 1, tr => 1,
3241     }->{$token->{tag_name}}) {
3242 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3243 wakaba 1.1 ## Ignore the token
3244     !!!next-token;
3245     redo B;
3246     } else {
3247     #
3248     }
3249     } else {
3250     #
3251     }
3252    
3253 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3254 wakaba 1.1 $in_body->($insert_to_foster);
3255     redo B;
3256 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
3257 wakaba 1.1 if ($token->{type} eq 'character') {
3258     ## NOTE: This is a code clone of "character in body".
3259     $reconstruct_active_formatting_elements->($insert_to_current);
3260    
3261 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3262 wakaba 1.1
3263     !!!next-token;
3264     redo B;
3265     } elsif ($token->{type} eq 'comment') {
3266     ## NOTE: This is a code clone of "comment in body".
3267     my $comment = $self->{document}->create_comment ($token->{data});
3268 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3269 wakaba 1.1 !!!next-token;
3270     redo B;
3271     } elsif ($token->{type} eq 'start tag') {
3272     if ({
3273     caption => 1, col => 1, colgroup => 1, tbody => 1,
3274     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3275     }->{$token->{tag_name}}) {
3276 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3277 wakaba 1.1
3278     ## As if </caption>
3279     ## have a table element in table scope
3280     my $i;
3281 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3282     my $node = $self->{open_elements}->[$_];
3283 wakaba 1.1 if ($node->[1] eq 'caption') {
3284     $i = $_;
3285     last INSCOPE;
3286     } elsif ({
3287     table => 1, html => 1,
3288     }->{$node->[1]}) {
3289     last INSCOPE;
3290     }
3291     } # INSCOPE
3292     unless (defined $i) {
3293 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3294 wakaba 1.1 ## Ignore the token
3295     !!!next-token;
3296     redo B;
3297     }
3298    
3299     ## generate implied end tags
3300     if ({
3301     dd => 1, dt => 1, li => 1, p => 1,
3302     td => 1, th => 1, tr => 1,
3303 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3304 wakaba 1.1 !!!back-token; # <?>
3305     $token = {type => 'end tag', tag_name => 'caption'};
3306     !!!back-token;
3307     $token = {type => 'end tag',
3308 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3309 wakaba 1.1 redo B;
3310     }
3311    
3312 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3313     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3314 wakaba 1.1 }
3315    
3316 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3317 wakaba 1.1
3318     $clear_up_to_marker->();
3319    
3320 wakaba 1.3 $self->{insertion_mode} = 'in table';
3321 wakaba 1.1
3322     ## reprocess
3323     redo B;
3324     } else {
3325     #
3326     }
3327     } elsif ($token->{type} eq 'end tag') {
3328     if ($token->{tag_name} eq 'caption') {
3329     ## have a table element in table scope
3330     my $i;
3331 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3332     my $node = $self->{open_elements}->[$_];
3333 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3334     $i = $_;
3335     last INSCOPE;
3336     } elsif ({
3337     table => 1, html => 1,
3338     }->{$node->[1]}) {
3339     last INSCOPE;
3340     }
3341     } # INSCOPE
3342     unless (defined $i) {
3343 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3344 wakaba 1.1 ## Ignore the token
3345     !!!next-token;
3346     redo B;
3347     }
3348    
3349     ## generate implied end tags
3350     if ({
3351     dd => 1, dt => 1, li => 1, p => 1,
3352     td => 1, th => 1, tr => 1,
3353 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3354 wakaba 1.1 !!!back-token;
3355     $token = {type => 'end tag',
3356 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3357 wakaba 1.1 redo B;
3358     }
3359    
3360 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3361     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3362 wakaba 1.1 }
3363    
3364 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3365 wakaba 1.1
3366     $clear_up_to_marker->();
3367    
3368 wakaba 1.3 $self->{insertion_mode} = 'in table';
3369 wakaba 1.1
3370     !!!next-token;
3371     redo B;
3372     } elsif ($token->{tag_name} eq 'table') {
3373 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3374 wakaba 1.1
3375     ## As if </caption>
3376     ## have a table element in table scope
3377     my $i;
3378 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3379     my $node = $self->{open_elements}->[$_];
3380 wakaba 1.1 if ($node->[1] eq 'caption') {
3381     $i = $_;
3382     last INSCOPE;
3383     } elsif ({
3384     table => 1, html => 1,
3385     }->{$node->[1]}) {
3386     last INSCOPE;
3387     }
3388     } # INSCOPE
3389     unless (defined $i) {
3390 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3391 wakaba 1.1 ## Ignore the token
3392     !!!next-token;
3393     redo B;
3394     }
3395    
3396     ## generate implied end tags
3397     if ({
3398     dd => 1, dt => 1, li => 1, p => 1,
3399     td => 1, th => 1, tr => 1,
3400 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3401 wakaba 1.1 !!!back-token; # </table>
3402     $token = {type => 'end tag', tag_name => 'caption'};
3403     !!!back-token;
3404     $token = {type => 'end tag',
3405 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3406 wakaba 1.1 redo B;
3407     }
3408    
3409 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3410     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3411 wakaba 1.1 }
3412    
3413 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3414 wakaba 1.1
3415     $clear_up_to_marker->();
3416    
3417 wakaba 1.3 $self->{insertion_mode} = 'in table';
3418 wakaba 1.1
3419     ## reprocess
3420     redo B;
3421     } elsif ({
3422     body => 1, col => 1, colgroup => 1,
3423     html => 1, tbody => 1, td => 1, tfoot => 1,
3424     th => 1, thead => 1, tr => 1,
3425     }->{$token->{tag_name}}) {
3426 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3427 wakaba 1.1 ## Ignore the token
3428     redo B;
3429     } else {
3430     #
3431     }
3432     } else {
3433     #
3434     }
3435    
3436     $in_body->($insert_to_current);
3437     redo B;
3438 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
3439 wakaba 1.1 if ($token->{type} eq 'character') {
3440     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3441 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3442 wakaba 1.1 unless (length $token->{data}) {
3443     !!!next-token;
3444     redo B;
3445     }
3446     }
3447    
3448     #
3449     } elsif ($token->{type} eq 'comment') {
3450     my $comment = $self->{document}->create_comment ($token->{data});
3451 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3452 wakaba 1.1 !!!next-token;
3453     redo B;
3454     } elsif ($token->{type} eq 'start tag') {
3455     if ($token->{tag_name} eq 'col') {
3456     !!!insert-element ($token->{tag_name}, $token->{attributes});
3457 wakaba 1.3 pop @{$self->{open_elements}};
3458 wakaba 1.1 !!!next-token;
3459     redo B;
3460     } else {
3461     #
3462     }
3463     } elsif ($token->{type} eq 'end tag') {
3464     if ($token->{tag_name} eq 'colgroup') {
3465 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3466     !!!parse-error (type => 'unmatched end tag:colgroup');
3467 wakaba 1.1 ## Ignore the token
3468     !!!next-token;
3469     redo B;
3470     } else {
3471 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3472     $self->{insertion_mode} = 'in table';
3473 wakaba 1.1 !!!next-token;
3474     redo B;
3475     }
3476     } elsif ($token->{tag_name} eq 'col') {
3477 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:col');
3478 wakaba 1.1 ## Ignore the token
3479     !!!next-token;
3480     redo B;
3481     } else {
3482     #
3483     }
3484     } else {
3485     #
3486     }
3487    
3488     ## As if </colgroup>
3489 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3490     !!!parse-error (type => 'unmatched end tag:colgroup');
3491 wakaba 1.1 ## Ignore the token
3492     !!!next-token;
3493     redo B;
3494     } else {
3495 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3496     $self->{insertion_mode} = 'in table';
3497 wakaba 1.1 ## reprocess
3498     redo B;
3499     }
3500 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
3501 wakaba 1.1 if ($token->{type} eq 'character') {
3502     ## NOTE: This is a "character in table" code clone.
3503     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3504 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3505 wakaba 1.1
3506     unless (length $token->{data}) {
3507     !!!next-token;
3508     redo B;
3509     }
3510     }
3511    
3512 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3513    
3514 wakaba 1.1 ## As if in body, but insert into foster parent element
3515     ## ISSUE: Spec says that "whenever a node would be inserted
3516     ## into the current node" while characters might not be
3517     ## result in a new Text node.
3518     $reconstruct_active_formatting_elements->($insert_to_foster);
3519    
3520     if ({
3521     table => 1, tbody => 1, tfoot => 1,
3522     thead => 1, tr => 1,
3523 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3524 wakaba 1.1 # MUST
3525     my $foster_parent_element;
3526     my $next_sibling;
3527     my $prev_sibling;
3528 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3529     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3530     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3531 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3532     $foster_parent_element = $parent;
3533 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3534 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3535     } else {
3536 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3537 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3538     }
3539     last OE;
3540     }
3541     } # OE
3542 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3543 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3544     unless defined $foster_parent_element;
3545     if (defined $prev_sibling and
3546     $prev_sibling->node_type == 3) {
3547     $prev_sibling->manakai_append_text ($token->{data});
3548     } else {
3549     $foster_parent_element->insert_before
3550     ($self->{document}->create_text_node ($token->{data}),
3551     $next_sibling);
3552     }
3553     } else {
3554 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3555 wakaba 1.1 }
3556    
3557     !!!next-token;
3558     redo B;
3559     } elsif ($token->{type} eq 'comment') {
3560     ## Copied from 'in table'
3561     my $comment = $self->{document}->create_comment ($token->{data});
3562 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3563 wakaba 1.1 !!!next-token;
3564     redo B;
3565     } elsif ($token->{type} eq 'start tag') {
3566     if ({
3567     tr => 1,
3568     th => 1, td => 1,
3569     }->{$token->{tag_name}}) {
3570 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
3571     !!!parse-error (type => 'missing start tag:tr');
3572     }
3573    
3574 wakaba 1.1 ## Clear back to table body context
3575     while (not {
3576     tbody => 1, tfoot => 1, thead => 1, html => 1,
3577 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3578     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3579     pop @{$self->{open_elements}};
3580 wakaba 1.1 }
3581    
3582 wakaba 1.3 $self->{insertion_mode} = 'in row';
3583 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
3584     !!!insert-element ($token->{tag_name}, $token->{attributes});
3585     !!!next-token;
3586     } else {
3587     !!!insert-element ('tr');
3588     ## reprocess
3589     }
3590     redo B;
3591     } elsif ({
3592     caption => 1, col => 1, colgroup => 1,
3593     tbody => 1, tfoot => 1, thead => 1,
3594     }->{$token->{tag_name}}) {
3595     ## have an element in table scope
3596     my $i;
3597 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3598     my $node = $self->{open_elements}->[$_];
3599 wakaba 1.1 if ({
3600     tbody => 1, thead => 1, tfoot => 1,
3601     }->{$node->[1]}) {
3602     $i = $_;
3603     last INSCOPE;
3604     } elsif ({
3605     table => 1, html => 1,
3606     }->{$node->[1]}) {
3607     last INSCOPE;
3608     }
3609     } # INSCOPE
3610     unless (defined $i) {
3611 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3612 wakaba 1.1 ## Ignore the token
3613     !!!next-token;
3614     redo B;
3615     }
3616    
3617     ## Clear back to table body context
3618     while (not {
3619     tbody => 1, tfoot => 1, thead => 1, html => 1,
3620 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3621     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3622     pop @{$self->{open_elements}};
3623 wakaba 1.1 }
3624    
3625     ## As if <{current node}>
3626     ## have an element in table scope
3627     ## true by definition
3628    
3629     ## Clear back to table body context
3630     ## nop by definition
3631    
3632 wakaba 1.3 pop @{$self->{open_elements}};
3633     $self->{insertion_mode} = 'in table';
3634 wakaba 1.1 ## reprocess
3635     redo B;
3636     } elsif ($token->{tag_name} eq 'table') {
3637     ## NOTE: This is a code clone of "table in table"
3638 wakaba 1.3 !!!parse-error (type => 'not closed:table');
3639 wakaba 1.1
3640     ## As if </table>
3641     ## have a table element in table scope
3642     my $i;
3643 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3644     my $node = $self->{open_elements}->[$_];
3645 wakaba 1.1 if ($node->[1] eq 'table') {
3646     $i = $_;
3647     last INSCOPE;
3648     } elsif ({
3649     table => 1, html => 1,
3650     }->{$node->[1]}) {
3651     last INSCOPE;
3652     }
3653     } # INSCOPE
3654     unless (defined $i) {
3655 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3656 wakaba 1.1 ## Ignore tokens </table><table>
3657     !!!next-token;
3658     redo B;
3659     }
3660    
3661     ## generate implied end tags
3662     if ({
3663     dd => 1, dt => 1, li => 1, p => 1,
3664     td => 1, th => 1, tr => 1,
3665 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3666 wakaba 1.1 !!!back-token; # <table>
3667     $token = {type => 'end tag', tag_name => 'table'};
3668     !!!back-token;
3669     $token = {type => 'end tag',
3670 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3671 wakaba 1.1 redo B;
3672     }
3673    
3674 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3675     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3676 wakaba 1.1 }
3677    
3678 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3679 wakaba 1.1
3680 wakaba 1.3 $self->_reset_insertion_mode;
3681 wakaba 1.1
3682     ## reprocess
3683     redo B;
3684     } else {
3685     #
3686     }
3687     } elsif ($token->{type} eq 'end tag') {
3688     if ({
3689     tbody => 1, tfoot => 1, thead => 1,
3690     }->{$token->{tag_name}}) {
3691     ## have an element in table scope
3692     my $i;
3693 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3694     my $node = $self->{open_elements}->[$_];
3695 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3696     $i = $_;
3697     last INSCOPE;
3698     } elsif ({
3699     table => 1, html => 1,
3700     }->{$node->[1]}) {
3701     last INSCOPE;
3702     }
3703     } # INSCOPE
3704     unless (defined $i) {
3705 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3706 wakaba 1.1 ## Ignore the token
3707     !!!next-token;
3708     redo B;
3709     }
3710    
3711     ## Clear back to table body context
3712     while (not {
3713     tbody => 1, tfoot => 1, thead => 1, html => 1,
3714 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3715     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3716     pop @{$self->{open_elements}};
3717 wakaba 1.1 }
3718    
3719 wakaba 1.3 pop @{$self->{open_elements}};
3720     $self->{insertion_mode} = 'in table';
3721 wakaba 1.1 !!!next-token;
3722     redo B;
3723     } elsif ($token->{tag_name} eq 'table') {
3724     ## have an element in table scope
3725     my $i;
3726 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3727     my $node = $self->{open_elements}->[$_];
3728 wakaba 1.1 if ({
3729     tbody => 1, thead => 1, tfoot => 1,
3730     }->{$node->[1]}) {
3731     $i = $_;
3732     last INSCOPE;
3733     } elsif ({
3734     table => 1, html => 1,
3735     }->{$node->[1]}) {
3736     last INSCOPE;
3737     }
3738     } # INSCOPE
3739     unless (defined $i) {
3740 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3741 wakaba 1.1 ## Ignore the token
3742     !!!next-token;
3743     redo B;
3744     }
3745    
3746     ## Clear back to table body context
3747     while (not {
3748     tbody => 1, tfoot => 1, thead => 1, html => 1,
3749 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3750     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3751     pop @{$self->{open_elements}};
3752 wakaba 1.1 }
3753    
3754     ## As if <{current node}>
3755     ## have an element in table scope
3756     ## true by definition
3757    
3758     ## Clear back to table body context
3759     ## nop by definition
3760    
3761 wakaba 1.3 pop @{$self->{open_elements}};
3762     $self->{insertion_mode} = 'in table';
3763 wakaba 1.1 ## reprocess
3764     redo B;
3765     } elsif ({
3766     body => 1, caption => 1, col => 1, colgroup => 1,
3767     html => 1, td => 1, th => 1, tr => 1,
3768     }->{$token->{tag_name}}) {
3769 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3770 wakaba 1.1 ## Ignore the token
3771     !!!next-token;
3772     redo B;
3773     } else {
3774     #
3775     }
3776     } else {
3777     #
3778     }
3779    
3780     ## As if in table
3781 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3782 wakaba 1.1 $in_body->($insert_to_foster);
3783     redo B;
3784 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
3785 wakaba 1.1 if ($token->{type} eq 'character') {
3786     ## NOTE: This is a "character in table" code clone.
3787     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3788 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3789 wakaba 1.1
3790     unless (length $token->{data}) {
3791     !!!next-token;
3792     redo B;
3793     }
3794     }
3795    
3796 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3797    
3798 wakaba 1.1 ## As if in body, but insert into foster parent element
3799     ## ISSUE: Spec says that "whenever a node would be inserted
3800     ## into the current node" while characters might not be
3801     ## result in a new Text node.
3802     $reconstruct_active_formatting_elements->($insert_to_foster);
3803    
3804     if ({
3805     table => 1, tbody => 1, tfoot => 1,
3806     thead => 1, tr => 1,
3807 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3808 wakaba 1.1 # MUST
3809     my $foster_parent_element;
3810     my $next_sibling;
3811     my $prev_sibling;
3812 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3813     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3814     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3815 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3816     $foster_parent_element = $parent;
3817 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3818 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3819     } else {
3820 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3821 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3822     }
3823     last OE;
3824     }
3825     } # OE
3826 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3827 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3828     unless defined $foster_parent_element;
3829     if (defined $prev_sibling and
3830     $prev_sibling->node_type == 3) {
3831     $prev_sibling->manakai_append_text ($token->{data});
3832     } else {
3833     $foster_parent_element->insert_before
3834     ($self->{document}->create_text_node ($token->{data}),
3835     $next_sibling);
3836     }
3837     } else {
3838 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3839 wakaba 1.1 }
3840    
3841     !!!next-token;
3842     redo B;
3843     } elsif ($token->{type} eq 'comment') {
3844     ## Copied from 'in table'
3845     my $comment = $self->{document}->create_comment ($token->{data});
3846 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3847 wakaba 1.1 !!!next-token;
3848     redo B;
3849     } elsif ($token->{type} eq 'start tag') {
3850     if ($token->{tag_name} eq 'th' or
3851     $token->{tag_name} eq 'td') {
3852     ## Clear back to table row context
3853     while (not {
3854     tr => 1, html => 1,
3855 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3856     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3857     pop @{$self->{open_elements}};
3858 wakaba 1.1 }
3859    
3860     !!!insert-element ($token->{tag_name}, $token->{attributes});
3861 wakaba 1.3 $self->{insertion_mode} = 'in cell';
3862 wakaba 1.1
3863     push @$active_formatting_elements, ['#marker', ''];
3864    
3865     !!!next-token;
3866     redo B;
3867     } elsif ({
3868     caption => 1, col => 1, colgroup => 1,
3869     tbody => 1, tfoot => 1, thead => 1, tr => 1,
3870     }->{$token->{tag_name}}) {
3871     ## As if </tr>
3872     ## have an element in table scope
3873     my $i;
3874 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3875     my $node = $self->{open_elements}->[$_];
3876 wakaba 1.1 if ($node->[1] eq 'tr') {
3877     $i = $_;
3878     last INSCOPE;
3879     } elsif ({
3880     table => 1, html => 1,
3881     }->{$node->[1]}) {
3882     last INSCOPE;
3883     }
3884     } # INSCOPE
3885     unless (defined $i) {
3886 wakaba 1.3 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
3887 wakaba 1.1 ## Ignore the token
3888     !!!next-token;
3889     redo B;
3890     }
3891    
3892     ## Clear back to table row context
3893     while (not {
3894     tr => 1, html => 1,
3895 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3896     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3897     pop @{$self->{open_elements}};
3898 wakaba 1.1 }
3899    
3900 wakaba 1.3 pop @{$self->{open_elements}}; # tr
3901     $self->{insertion_mode} = 'in table body';
3902 wakaba 1.1 ## reprocess
3903     redo B;
3904     } elsif ($token->{tag_name} eq 'table') {
3905     ## NOTE: This is a code clone of "table in table"
3906 wakaba 1.3 !!!parse-error (type => 'not closed:table');
3907 wakaba 1.1
3908     ## As if </table>
3909     ## have a table element in table scope
3910     my $i;
3911 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3912     my $node = $self->{open_elements}->[$_];
3913 wakaba 1.1 if ($node->[1] eq 'table') {
3914     $i = $_;
3915     last INSCOPE;
3916     } elsif ({
3917     table => 1, html => 1,
3918     }->{$node->[1]}) {
3919     last INSCOPE;
3920     }
3921     } # INSCOPE
3922     unless (defined $i) {
3923 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3924 wakaba 1.1 ## Ignore tokens </table><table>
3925     !!!next-token;
3926     redo B;
3927     }
3928    
3929     ## generate implied end tags
3930     if ({
3931     dd => 1, dt => 1, li => 1, p => 1,
3932     td => 1, th => 1, tr => 1,
3933 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3934 wakaba 1.1 !!!back-token; # <table>
3935     $token = {type => 'end tag', tag_name => 'table'};
3936     !!!back-token;
3937     $token = {type => 'end tag',
3938 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3939 wakaba 1.1 redo B;
3940     }
3941    
3942 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3943     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3944 wakaba 1.1 }
3945    
3946 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3947 wakaba 1.1
3948 wakaba 1.3 $self->_reset_insertion_mode;
3949 wakaba 1.1
3950     ## reprocess
3951     redo B;
3952     } else {
3953     #
3954     }
3955     } elsif ($token->{type} eq 'end tag') {
3956     if ($token->{tag_name} eq 'tr') {
3957     ## have an element in table scope
3958     my $i;
3959 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3960     my $node = $self->{open_elements}->[$_];
3961 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3962     $i = $_;
3963     last INSCOPE;
3964     } elsif ({
3965     table => 1, html => 1,
3966     }->{$node->[1]}) {
3967     last INSCOPE;
3968     }
3969     } # INSCOPE
3970     unless (defined $i) {
3971 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3972 wakaba 1.1 ## Ignore the token
3973     !!!next-token;
3974     redo B;
3975     }
3976    
3977     ## Clear back to table row context
3978     while (not {
3979     tr => 1, html => 1,
3980 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3981     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3982     pop @{$self->{open_elements}};
3983 wakaba 1.1 }
3984    
3985 wakaba 1.3 pop @{$self->{open_elements}}; # tr
3986     $self->{insertion_mode} = 'in table body';
3987 wakaba 1.1 !!!next-token;
3988     redo B;
3989     } elsif ($token->{tag_name} eq 'table') {
3990     ## As if </tr>
3991     ## have an element in table scope
3992     my $i;
3993 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3994     my $node = $self->{open_elements}->[$_];
3995 wakaba 1.1 if ($node->[1] eq 'tr') {
3996     $i = $_;
3997     last INSCOPE;
3998     } elsif ({
3999     table => 1, html => 1,
4000     }->{$node->[1]}) {
4001     last INSCOPE;
4002     }
4003     } # INSCOPE
4004     unless (defined $i) {
4005 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4006 wakaba 1.1 ## Ignore the token
4007     !!!next-token;
4008     redo B;
4009     }
4010    
4011     ## Clear back to table row context
4012     while (not {
4013     tr => 1, html => 1,
4014 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4015     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4016     pop @{$self->{open_elements}};
4017 wakaba 1.1 }
4018    
4019 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4020     $self->{insertion_mode} = 'in table body';
4021 wakaba 1.1 ## reprocess
4022     redo B;
4023     } elsif ({
4024     tbody => 1, tfoot => 1, thead => 1,
4025     }->{$token->{tag_name}}) {
4026     ## have an element in table scope
4027     my $i;
4028 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4029     my $node = $self->{open_elements}->[$_];
4030 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4031     $i = $_;
4032     last INSCOPE;
4033     } elsif ({
4034     table => 1, html => 1,
4035     }->{$node->[1]}) {
4036     last INSCOPE;
4037     }
4038     } # INSCOPE
4039     unless (defined $i) {
4040 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4041 wakaba 1.1 ## Ignore the token
4042     !!!next-token;
4043     redo B;
4044     }
4045    
4046     ## As if </tr>
4047     ## have an element in table scope
4048     my $i;
4049 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4050     my $node = $self->{open_elements}->[$_];
4051 wakaba 1.1 if ($node->[1] eq 'tr') {
4052     $i = $_;
4053     last INSCOPE;
4054     } elsif ({
4055     table => 1, html => 1,
4056     }->{$node->[1]}) {
4057     last INSCOPE;
4058     }
4059     } # INSCOPE
4060     unless (defined $i) {
4061 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:tr');
4062 wakaba 1.1 ## Ignore the token
4063     !!!next-token;
4064     redo B;
4065     }
4066    
4067     ## Clear back to table row context
4068     while (not {
4069     tr => 1, html => 1,
4070 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4071     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4072     pop @{$self->{open_elements}};
4073 wakaba 1.1 }
4074    
4075 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4076     $self->{insertion_mode} = 'in table body';
4077 wakaba 1.1 ## reprocess
4078     redo B;
4079     } elsif ({
4080     body => 1, caption => 1, col => 1,
4081     colgroup => 1, html => 1, td => 1, th => 1,
4082     }->{$token->{tag_name}}) {
4083 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4084 wakaba 1.1 ## Ignore the token
4085     !!!next-token;
4086     redo B;
4087     } else {
4088     #
4089     }
4090     } else {
4091     #
4092     }
4093    
4094     ## As if in table
4095 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4096 wakaba 1.1 $in_body->($insert_to_foster);
4097     redo B;
4098 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
4099 wakaba 1.1 if ($token->{type} eq 'character') {
4100     ## NOTE: This is a code clone of "character in body".
4101     $reconstruct_active_formatting_elements->($insert_to_current);
4102    
4103 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4104 wakaba 1.1
4105     !!!next-token;
4106     redo B;
4107     } elsif ($token->{type} eq 'comment') {
4108     ## NOTE: This is a code clone of "comment in body".
4109     my $comment = $self->{document}->create_comment ($token->{data});
4110 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4111 wakaba 1.1 !!!next-token;
4112     redo B;
4113     } elsif ($token->{type} eq 'start tag') {
4114     if ({
4115     caption => 1, col => 1, colgroup => 1,
4116     tbody => 1, td => 1, tfoot => 1, th => 1,
4117     thead => 1, tr => 1,
4118     }->{$token->{tag_name}}) {
4119     ## have an element in table scope
4120     my $tn;
4121 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4122     my $node = $self->{open_elements}->[$_];
4123 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4124     $tn = $node->[1];
4125     last INSCOPE;
4126     } elsif ({
4127     table => 1, html => 1,
4128     }->{$node->[1]}) {
4129     last INSCOPE;
4130     }
4131     } # INSCOPE
4132     unless (defined $tn) {
4133 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4134 wakaba 1.1 ## Ignore the token
4135     !!!next-token;
4136     redo B;
4137     }
4138    
4139     ## Close the cell
4140     !!!back-token; # <?>
4141     $token = {type => 'end tag', tag_name => $tn};
4142     redo B;
4143     } else {
4144     #
4145     }
4146     } elsif ($token->{type} eq 'end tag') {
4147     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4148     ## have an element in table scope
4149     my $i;
4150 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4151     my $node = $self->{open_elements}->[$_];
4152 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4153     $i = $_;
4154     last INSCOPE;
4155     } elsif ({
4156     table => 1, html => 1,
4157     }->{$node->[1]}) {
4158     last INSCOPE;
4159     }
4160     } # INSCOPE
4161     unless (defined $i) {
4162 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4163 wakaba 1.1 ## Ignore the token
4164     !!!next-token;
4165     redo B;
4166     }
4167    
4168     ## generate implied end tags
4169     if ({
4170     dd => 1, dt => 1, li => 1, p => 1,
4171     td => ($token->{tag_name} eq 'th'),
4172     th => ($token->{tag_name} eq 'td'),
4173     tr => 1,
4174 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4175 wakaba 1.1 !!!back-token;
4176     $token = {type => 'end tag',
4177 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4178 wakaba 1.1 redo B;
4179     }
4180    
4181 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4182     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4183 wakaba 1.1 }
4184    
4185 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4186 wakaba 1.1
4187     $clear_up_to_marker->();
4188    
4189 wakaba 1.3 $self->{insertion_mode} = 'in row';
4190 wakaba 1.1
4191     !!!next-token;
4192     redo B;
4193     } elsif ({
4194     body => 1, caption => 1, col => 1,
4195     colgroup => 1, html => 1,
4196     }->{$token->{tag_name}}) {
4197 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4198 wakaba 1.1 ## Ignore the token
4199     !!!next-token;
4200     redo B;
4201     } elsif ({
4202     table => 1, tbody => 1, tfoot => 1,
4203     thead => 1, tr => 1,
4204     }->{$token->{tag_name}}) {
4205     ## have an element in table scope
4206     my $i;
4207     my $tn;
4208 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4209     my $node = $self->{open_elements}->[$_];
4210 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4211     $i = $_;
4212     last INSCOPE;
4213     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4214     $tn = $node->[1];
4215     ## NOTE: There is exactly one |td| or |th| element
4216     ## in scope in the stack of open elements by definition.
4217     } elsif ({
4218     table => 1, html => 1,
4219     }->{$node->[1]}) {
4220     last INSCOPE;
4221     }
4222     } # INSCOPE
4223     unless (defined $i) {
4224 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4225 wakaba 1.1 ## Ignore the token
4226     !!!next-token;
4227     redo B;
4228     }
4229    
4230     ## Close the cell
4231     !!!back-token; # </?>
4232     $token = {type => 'end tag', tag_name => $tn};
4233     redo B;
4234     } else {
4235     #
4236     }
4237     } else {
4238     #
4239     }
4240    
4241     $in_body->($insert_to_current);
4242     redo B;
4243 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
4244 wakaba 1.1 if ($token->{type} eq 'character') {
4245 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4246 wakaba 1.1 !!!next-token;
4247     redo B;
4248     } elsif ($token->{type} eq 'comment') {
4249     my $comment = $self->{document}->create_comment ($token->{data});
4250 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4251 wakaba 1.1 !!!next-token;
4252     redo B;
4253     } elsif ($token->{type} eq 'start tag') {
4254     if ($token->{tag_name} eq 'option') {
4255 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4256 wakaba 1.1 ## As if </option>
4257 wakaba 1.3 pop @{$self->{open_elements}};
4258 wakaba 1.1 }
4259    
4260     !!!insert-element ($token->{tag_name}, $token->{attributes});
4261     !!!next-token;
4262     redo B;
4263     } elsif ($token->{tag_name} eq 'optgroup') {
4264 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4265 wakaba 1.1 ## As if </option>
4266 wakaba 1.3 pop @{$self->{open_elements}};
4267 wakaba 1.1 }
4268    
4269 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4270 wakaba 1.1 ## As if </optgroup>
4271 wakaba 1.3 pop @{$self->{open_elements}};
4272 wakaba 1.1 }
4273    
4274     !!!insert-element ($token->{tag_name}, $token->{attributes});
4275     !!!next-token;
4276     redo B;
4277     } elsif ($token->{tag_name} eq 'select') {
4278 wakaba 1.3 !!!parse-error (type => 'not closed:select');
4279 wakaba 1.1 ## As if </select> instead
4280     ## have an element in table scope
4281     my $i;
4282 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4283     my $node = $self->{open_elements}->[$_];
4284 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4285     $i = $_;
4286     last INSCOPE;
4287     } elsif ({
4288     table => 1, html => 1,
4289     }->{$node->[1]}) {
4290     last INSCOPE;
4291     }
4292     } # INSCOPE
4293     unless (defined $i) {
4294 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4295 wakaba 1.1 ## Ignore the token
4296     !!!next-token;
4297     redo B;
4298     }
4299    
4300 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4301 wakaba 1.1
4302 wakaba 1.3 $self->_reset_insertion_mode;
4303 wakaba 1.1
4304     !!!next-token;
4305     redo B;
4306     } else {
4307     #
4308     }
4309     } elsif ($token->{type} eq 'end tag') {
4310     if ($token->{tag_name} eq 'optgroup') {
4311 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4312     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4313 wakaba 1.1 ## As if </option>
4314 wakaba 1.3 splice @{$self->{open_elements}}, -2;
4315     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4316     pop @{$self->{open_elements}};
4317 wakaba 1.1 } else {
4318 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4319 wakaba 1.1 ## Ignore the token
4320     }
4321     !!!next-token;
4322     redo B;
4323     } elsif ($token->{tag_name} eq 'option') {
4324 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4325     pop @{$self->{open_elements}};
4326 wakaba 1.1 } else {
4327 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4328 wakaba 1.1 ## Ignore the token
4329     }
4330     !!!next-token;
4331     redo B;
4332     } elsif ($token->{tag_name} eq 'select') {
4333     ## have an element in table scope
4334     my $i;
4335 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4336     my $node = $self->{open_elements}->[$_];
4337 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4338     $i = $_;
4339     last INSCOPE;
4340     } elsif ({
4341     table => 1, html => 1,
4342     }->{$node->[1]}) {
4343     last INSCOPE;
4344     }
4345     } # INSCOPE
4346     unless (defined $i) {
4347 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4348 wakaba 1.1 ## Ignore the token
4349     !!!next-token;
4350     redo B;
4351     }
4352    
4353 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4354 wakaba 1.1
4355 wakaba 1.3 $self->_reset_insertion_mode;
4356 wakaba 1.1
4357     !!!next-token;
4358     redo B;
4359     } elsif ({
4360     caption => 1, table => 1, tbody => 1,
4361     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4362     }->{$token->{tag_name}}) {
4363 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4364 wakaba 1.1
4365     ## have an element in table scope
4366     my $i;
4367 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4368     my $node = $self->{open_elements}->[$_];
4369 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4370     $i = $_;
4371     last INSCOPE;
4372     } elsif ({
4373     table => 1, html => 1,
4374     }->{$node->[1]}) {
4375     last INSCOPE;
4376     }
4377     } # INSCOPE
4378     unless (defined $i) {
4379     ## Ignore the token
4380     !!!next-token;
4381     redo B;
4382     }
4383    
4384     ## As if </select>
4385     ## have an element in table scope
4386     undef $i;
4387 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4388     my $node = $self->{open_elements}->[$_];
4389 wakaba 1.1 if ($node->[1] eq 'select') {
4390     $i = $_;
4391     last INSCOPE;
4392     } elsif ({
4393     table => 1, html => 1,
4394     }->{$node->[1]}) {
4395     last INSCOPE;
4396     }
4397     } # INSCOPE
4398     unless (defined $i) {
4399 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4400 wakaba 1.1 ## Ignore the </select> token
4401     !!!next-token; ## TODO: ok?
4402     redo B;
4403     }
4404    
4405 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4406 wakaba 1.1
4407 wakaba 1.3 $self->_reset_insertion_mode;
4408 wakaba 1.1
4409     ## reprocess
4410     redo B;
4411     } else {
4412     #
4413     }
4414     } else {
4415     #
4416     }
4417    
4418 wakaba 1.3 !!!parse-error (type => 'in select:'.$token->{tag_name});
4419 wakaba 1.1 ## Ignore the token
4420     !!!next-token;
4421     redo B;
4422 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
4423 wakaba 1.1 if ($token->{type} eq 'character') {
4424     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4425     ## As if in body
4426     $reconstruct_active_formatting_elements->($insert_to_current);
4427    
4428 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4429 wakaba 1.1
4430     unless (length $token->{data}) {
4431     !!!next-token;
4432     redo B;
4433     }
4434     }
4435    
4436     #
4437 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4438 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
4439     my $comment = $self->{document}->create_comment ($token->{data});
4440 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
4441 wakaba 1.1 !!!next-token;
4442     redo B;
4443 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
4444     !!!parse-error (type => 'after body:'.$token->{tag_name});
4445     #
4446 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
4447     if ($token->{tag_name} eq 'html') {
4448 wakaba 1.3 if (defined $self->{inner_html_node}) {
4449     !!!parse-error (type => 'unmatched end tag:html');
4450     ## Ignore the token
4451     !!!next-token;
4452     redo B;
4453     } else {
4454     $phase = 'trailing end';
4455     !!!next-token;
4456     redo B;
4457     }
4458 wakaba 1.1 } else {
4459 wakaba 1.3 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4460 wakaba 1.1 }
4461     } else {
4462 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4463 wakaba 1.1 }
4464    
4465 wakaba 1.3 $self->{insertion_mode} = 'in body';
4466 wakaba 1.1 ## reprocess
4467     redo B;
4468 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
4469 wakaba 1.1 if ($token->{type} eq 'character') {
4470     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4471 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4472 wakaba 1.1
4473     unless (length $token->{data}) {
4474     !!!next-token;
4475     redo B;
4476     }
4477     }
4478    
4479     #
4480     } elsif ($token->{type} eq 'comment') {
4481     my $comment = $self->{document}->create_comment ($token->{data});
4482 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4483 wakaba 1.1 !!!next-token;
4484     redo B;
4485     } elsif ($token->{type} eq 'start tag') {
4486     if ($token->{tag_name} eq 'frameset') {
4487     !!!insert-element ($token->{tag_name}, $token->{attributes});
4488     !!!next-token;
4489     redo B;
4490     } elsif ($token->{tag_name} eq 'frame') {
4491     !!!insert-element ($token->{tag_name}, $token->{attributes});
4492 wakaba 1.3 pop @{$self->{open_elements}};
4493 wakaba 1.1 !!!next-token;
4494     redo B;
4495     } elsif ($token->{tag_name} eq 'noframes') {
4496     $in_body->($insert_to_current);
4497     redo B;
4498     } else {
4499     #
4500     }
4501     } elsif ($token->{type} eq 'end tag') {
4502     if ($token->{tag_name} eq 'frameset') {
4503 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
4504     @{$self->{open_elements}} == 1) {
4505     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4506 wakaba 1.1 ## Ignore the token
4507     !!!next-token;
4508     } else {
4509 wakaba 1.3 pop @{$self->{open_elements}};
4510 wakaba 1.1 !!!next-token;
4511     }
4512    
4513     ## if not inner_html and
4514 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
4515     $self->{insertion_mode} = 'after frameset';
4516 wakaba 1.1 }
4517     redo B;
4518     } else {
4519     #
4520     }
4521     } else {
4522     #
4523     }
4524    
4525 wakaba 1.3 if (defined $token->{tag_name}) {
4526     !!!parse-error (type => 'in frameset:'.$token->{tag_name});
4527     } else {
4528     !!!parse-error (type => 'in frameset:#'.$token->{type});
4529     }
4530 wakaba 1.1 ## Ignore the token
4531     !!!next-token;
4532     redo B;
4533 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
4534 wakaba 1.1 if ($token->{type} eq 'character') {
4535     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4536 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4537 wakaba 1.1
4538     unless (length $token->{data}) {
4539     !!!next-token;
4540     redo B;
4541     }
4542     }
4543    
4544     #
4545     } elsif ($token->{type} eq 'comment') {
4546     my $comment = $self->{document}->create_comment ($token->{data});
4547 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4548 wakaba 1.1 !!!next-token;
4549     redo B;
4550     } elsif ($token->{type} eq 'start tag') {
4551     if ($token->{tag_name} eq 'noframes') {
4552     $in_body->($insert_to_current);
4553     redo B;
4554     } else {
4555     #
4556     }
4557     } elsif ($token->{type} eq 'end tag') {
4558     if ($token->{tag_name} eq 'html') {
4559     $phase = 'trailing end';
4560     !!!next-token;
4561     redo B;
4562     } else {
4563     #
4564     }
4565     } else {
4566     #
4567     }
4568    
4569 wakaba 1.3 if (defined $token->{tag_name}) {
4570     !!!parse-error (type => 'after frameset:'.$token->{tag_name});
4571     } else {
4572     !!!parse-error (type => 'after frameset:#'.$token->{type});
4573     }
4574 wakaba 1.1 ## Ignore the token
4575     !!!next-token;
4576     redo B;
4577    
4578     ## ISSUE: An issue in spec there
4579     } else {
4580 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4581 wakaba 1.1 }
4582     }
4583     } elsif ($phase eq 'trailing end') {
4584     ## states in the main stage is preserved yet # MUST
4585    
4586     if ($token->{type} eq 'DOCTYPE') {
4587 wakaba 1.3 !!!parse-error (type => 'after html:#DOCTYPE');
4588 wakaba 1.1 ## Ignore the token
4589     !!!next-token;
4590     redo B;
4591     } elsif ($token->{type} eq 'comment') {
4592     my $comment = $self->{document}->create_comment ($token->{data});
4593     $self->{document}->append_child ($comment);
4594     !!!next-token;
4595     redo B;
4596     } elsif ($token->{type} eq 'character') {
4597     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4598     my $data = $1;
4599     ## As if in the main phase.
4600     ## NOTE: The insertion mode in the main phase
4601     ## just before the phase has been changed to the trailing
4602     ## end phase is either "after body" or "after frameset".
4603     $reconstruct_active_formatting_elements->($insert_to_current)
4604     if $phase eq 'main';
4605    
4606 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
4607 wakaba 1.1
4608     unless (length $token->{data}) {
4609     !!!next-token;
4610     redo B;
4611     }
4612     }
4613    
4614 wakaba 1.3 !!!parse-error (type => 'after html:#character');
4615 wakaba 1.1 $phase = 'main';
4616     ## reprocess
4617     redo B;
4618     } elsif ($token->{type} eq 'start tag' or
4619     $token->{type} eq 'end tag') {
4620 wakaba 1.3 !!!parse-error (type => 'after html:'.$token->{tag_name});
4621 wakaba 1.1 $phase = 'main';
4622     ## reprocess
4623     redo B;
4624     } elsif ($token->{type} eq 'end-of-file') {
4625     ## Stop parsing
4626     last B;
4627     } else {
4628     die "$0: $token->{type}: Unknown token";
4629     }
4630     }
4631     } # B
4632    
4633     ## Stop parsing # MUST
4634    
4635     ## TODO: script stuffs
4636 wakaba 1.3 } # _tree_construct_main
4637    
4638     sub set_inner_html ($$$) {
4639     my $class = shift;
4640     my $node = shift;
4641     my $s = \$_[0];
4642     my $onerror = $_[1];
4643    
4644     my $nt = $node->node_type;
4645     if ($nt == 9) {
4646     # MUST
4647    
4648     ## Step 1 # MUST
4649     ## TODO: If the document has an active parser, ...
4650     ## ISSUE: There is an issue in the spec.
4651    
4652     ## Step 2 # MUST
4653     my @cn = @{$node->child_nodes};
4654     for (@cn) {
4655     $node->remove_child ($_);
4656     }
4657    
4658     ## Step 3, 4, 5 # MUST
4659     $class->parse_string ($$s => $node, $onerror);
4660     } elsif ($nt == 1) {
4661     ## TODO: If non-html element
4662    
4663     ## NOTE: Most of this code is copied from |parse_string|
4664    
4665     ## Step 1 # MUST
4666 wakaba 1.14 my $this_doc = $node->owner_document;
4667     my $doc = $this_doc->implementation->create_document;
4668 wakaba 1.3 ## TODO: Mark as HTML document
4669     my $p = $class->new;
4670     $p->{document} = $doc;
4671    
4672     ## Step 9 # MUST
4673     my $i = 0;
4674     my $line = 1;
4675     my $column = 0;
4676     $p->{set_next_input_character} = sub {
4677     my $self = shift;
4678 wakaba 1.14
4679     pop @{$self->{prev_input_character}};
4680     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
4681    
4682 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
4683     $self->{next_input_character} = ord substr $$s, $i++, 1;
4684     $column++;
4685 wakaba 1.4
4686     if ($self->{next_input_character} == 0x000A) { # LF
4687     $line++;
4688     $column = 0;
4689     } elsif ($self->{next_input_character} == 0x000D) { # CR
4690 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
4691 wakaba 1.3 $self->{next_input_character} = 0x000A; # LF # MUST
4692     $line++;
4693 wakaba 1.4 $column = 0;
4694 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
4695     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4696     } elsif ($self->{next_input_character} == 0x0000) { # NULL
4697 wakaba 1.14 !!!parse-error (type => 'NULL');
4698 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4699     }
4700     };
4701 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
4702     $p->{next_input_character} = -1;
4703 wakaba 1.3
4704     my $ponerror = $onerror || sub {
4705     my (%opt) = @_;
4706     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
4707     };
4708     $p->{parse_error} = sub {
4709     $ponerror->(@_, line => $line, column => $column);
4710     };
4711    
4712     $p->_initialize_tokenizer;
4713     $p->_initialize_tree_constructor;
4714    
4715     ## Step 2
4716     my $node_ln = $node->local_name;
4717     $p->{content_model_flag} = {
4718     title => 'RCDATA',
4719     textarea => 'RCDATA',
4720     style => 'CDATA',
4721     script => 'CDATA',
4722     xmp => 'CDATA',
4723     iframe => 'CDATA',
4724     noembed => 'CDATA',
4725     noframes => 'CDATA',
4726     noscript => 'CDATA',
4727     plaintext => 'PLAINTEXT',
4728     }->{$node_ln} || 'PCDATA';
4729     ## ISSUE: What is "the name of the element"? local name?
4730    
4731     $p->{inner_html_node} = [$node, $node_ln];
4732    
4733     ## Step 4
4734     my $root = $doc->create_element_ns
4735     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
4736    
4737     ## Step 5 # MUST
4738     $doc->append_child ($root);
4739    
4740     ## Step 6 # MUST
4741     push @{$p->{open_elements}}, [$root, 'html'];
4742    
4743     undef $p->{head_element};
4744    
4745     ## Step 7 # MUST
4746     $p->_reset_insertion_mode;
4747    
4748     ## Step 8 # MUST
4749     my $anode = $node;
4750     AN: while (defined $anode) {
4751     if ($anode->node_type == 1) {
4752     my $nsuri = $anode->namespace_uri;
4753     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
4754     if ($anode->local_name eq 'form') { ## TODO: case?
4755     $p->{form_element} = $anode;
4756     last AN;
4757     }
4758     }
4759     }
4760     $anode = $anode->parent_node;
4761     } # AN
4762    
4763     ## Step 3 # MUST
4764     ## Step 10 # MUST
4765     {
4766     my $self = $p;
4767     !!!next-token;
4768     }
4769     $p->_tree_construction_main;
4770    
4771     ## Step 11 # MUST
4772     my @cn = @{$node->child_nodes};
4773     for (@cn) {
4774     $node->remove_child ($_);
4775     }
4776     ## ISSUE: mutation events? read-only?
4777    
4778     ## Step 12 # MUST
4779     @cn = @{$root->child_nodes};
4780     for (@cn) {
4781 wakaba 1.14 $this_doc->adopt_node ($_);
4782 wakaba 1.3 $node->append_child ($_);
4783     }
4784 wakaba 1.14 ## ISSUE: mutation events?
4785 wakaba 1.3
4786     $p->_terminate_tree_constructor;
4787     } else {
4788     die "$0: |set_inner_html| is not defined for node of type $nt";
4789     }
4790     } # set_inner_html
4791    
4792     } # tree construction stage
4793 wakaba 1.1
4794     sub get_inner_html ($$$) {
4795 wakaba 1.3 my (undef, $node, $on_error) = @_;
4796 wakaba 1.1
4797     ## Step 1
4798     my $s = '';
4799    
4800     my $in_cdata;
4801     my $parent = $node;
4802     while (defined $parent) {
4803     if ($parent->node_type == 1 and
4804     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
4805     {
4806     style => 1, script => 1, xmp => 1, iframe => 1,
4807     noembed => 1, noframes => 1, noscript => 1,
4808     }->{$parent->local_name}) { ## TODO: case thingy
4809     $in_cdata = 1;
4810     }
4811     $parent = $parent->parent_node;
4812     }
4813    
4814     ## Step 2
4815     my @node = @{$node->child_nodes};
4816     C: while (@node) {
4817     my $child = shift @node;
4818     unless (ref $child) {
4819     if ($child eq 'cdata-out') {
4820     $in_cdata = 0;
4821     } else {
4822     $s .= $child; # end tag
4823     }
4824     next C;
4825     }
4826    
4827     my $nt = $child->node_type;
4828     if ($nt == 1) { # Element
4829     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
4830     $s .= '<' . $tag_name;
4831    
4832     ## ISSUE: Non-html elements
4833    
4834     my @attrs = @{$child->attributes}; # sort order MUST be stable
4835     for my $attr (@attrs) { # order is implementation dependent
4836     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
4837     $s .= ' ' . $attr_name . '="';
4838     my $attr_value = $attr->value;
4839     ## escape
4840     $attr_value =~ s/&/&amp;/g;
4841     $attr_value =~ s/</&lt;/g;
4842     $attr_value =~ s/>/&gt;/g;
4843     $attr_value =~ s/"/&quot;/g;
4844     $s .= $attr_value . '"';
4845     }
4846     $s .= '>';
4847    
4848     next C if {
4849     area => 1, base => 1, basefont => 1, bgsound => 1,
4850     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
4851     img => 1, input => 1, link => 1, meta => 1, param => 1,
4852     spacer => 1, wbr => 1,
4853     }->{$tag_name};
4854    
4855     if (not $in_cdata and {
4856     style => 1, script => 1, xmp => 1, iframe => 1,
4857     noembed => 1, noframes => 1, noscript => 1,
4858     }->{$tag_name}) {
4859     unshift @node, 'cdata-out';
4860     $in_cdata = 1;
4861     }
4862    
4863     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
4864     } elsif ($nt == 3 or $nt == 4) {
4865     if ($in_cdata) {
4866     $s .= $child->data;
4867     } else {
4868     my $value = $child->data;
4869     $value =~ s/&/&amp;/g;
4870     $value =~ s/</&lt;/g;
4871     $value =~ s/>/&gt;/g;
4872     $value =~ s/"/&quot;/g;
4873     $s .= $value;
4874     }
4875     } elsif ($nt == 8) {
4876     $s .= '<!--' . $child->data . '-->';
4877     } elsif ($nt == 10) {
4878     $s .= '<!DOCTYPE ' . $child->name . '>';
4879     } elsif ($nt == 5) { # entrefs
4880     push @node, @{$child->child_nodes};
4881     } else {
4882     $on_error->($child) if defined $on_error;
4883     }
4884     ## ISSUE: This code does not support PIs.
4885     } # C
4886    
4887     ## Step 3
4888     return \$s;
4889     } # get_inner_html
4890    
4891     1;
4892 wakaba 1.17 # $Date: 2007/06/23 07:42:11 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24