/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.52 - (hide annotations) (download)
Sat Jul 21 10:59:39 2007 UTC (17 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.51: +134 -106 lines
++ whatpm/Whatpm/ChangeLog	21 Jul 2007 10:59:21 -0000
	* HTML.pm.src: The "before head" insertion mode is
	merged with the "in head" insertion mode.

2007-07-21  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.52 our $VERSION=do{my @r=(q$Revision: 1.49 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 ## ISSUE:
6     ## var doc = implementation.createDocument (null, null, null);
7     ## doc.write ('');
8     ## alert (doc.compatMode);
9 wakaba 1.1
10 wakaba 1.31 ## ISSUE: HTML5 revision 967 says that the encoding layer MUST NOT
11     ## strip BOM and the HTML layer MUST ignore it. Whether we can do it
12     ## is not yet clear.
13     ## "{U+FEFF}..." in UTF-16BE/UTF-16LE is three or four characters?
14     ## "{U+FEFF}..." in GB18030?
15    
16 wakaba 1.1 my $permitted_slash_tag_name = {
17     base => 1,
18     link => 1,
19     meta => 1,
20     hr => 1,
21     br => 1,
22     img=> 1,
23     embed => 1,
24     param => 1,
25     area => 1,
26     col => 1,
27     input => 1,
28     };
29    
30 wakaba 1.4 my $c1_entity_char = {
31 wakaba 1.9 0x80 => 0x20AC,
32     0x81 => 0xFFFD,
33     0x82 => 0x201A,
34     0x83 => 0x0192,
35     0x84 => 0x201E,
36     0x85 => 0x2026,
37     0x86 => 0x2020,
38     0x87 => 0x2021,
39     0x88 => 0x02C6,
40     0x89 => 0x2030,
41     0x8A => 0x0160,
42     0x8B => 0x2039,
43     0x8C => 0x0152,
44     0x8D => 0xFFFD,
45     0x8E => 0x017D,
46     0x8F => 0xFFFD,
47     0x90 => 0xFFFD,
48     0x91 => 0x2018,
49     0x92 => 0x2019,
50     0x93 => 0x201C,
51     0x94 => 0x201D,
52     0x95 => 0x2022,
53     0x96 => 0x2013,
54     0x97 => 0x2014,
55     0x98 => 0x02DC,
56     0x99 => 0x2122,
57     0x9A => 0x0161,
58     0x9B => 0x203A,
59     0x9C => 0x0153,
60     0x9D => 0xFFFD,
61     0x9E => 0x017E,
62     0x9F => 0x0178,
63 wakaba 1.4 }; # $c1_entity_char
64 wakaba 1.1
65     my $special_category = {
66     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
67     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
68     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
69     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
70     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
71     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
72     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
73     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
74     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
75     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
76     };
77     my $scoping_category = {
78     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
79     table => 1, td => 1, th => 1,
80     };
81     my $formatting_category = {
82     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
83     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
84     };
85     # $phrasing_category: all other elements
86    
87     sub parse_string ($$$;$) {
88     my $self = shift->new;
89     my $s = \$_[0];
90     $self->{document} = $_[1];
91    
92 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
93    
94 wakaba 1.1 my $i = 0;
95 wakaba 1.3 my $line = 1;
96     my $column = 0;
97 wakaba 1.1 $self->{set_next_input_character} = sub {
98     my $self = shift;
99 wakaba 1.13
100     pop @{$self->{prev_input_character}};
101     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
102    
103 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
104     $self->{next_input_character} = ord substr $$s, $i++, 1;
105 wakaba 1.3 $column++;
106 wakaba 1.1
107 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
108     $line++;
109     $column = 0;
110     } elsif ($self->{next_input_character} == 0x000D) { # CR
111 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
112 wakaba 1.1 $self->{next_input_character} = 0x000A; # LF # MUST
113 wakaba 1.3 $line++;
114 wakaba 1.4 $column = 0;
115 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
116     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
117     } elsif ($self->{next_input_character} == 0x0000) { # NULL
118 wakaba 1.8 $self->{parse_error}-> (type => 'NULL');
119 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
120     }
121     };
122 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
123     $self->{next_input_character} = -1;
124 wakaba 1.1
125 wakaba 1.3 my $onerror = $_[2] || sub {
126     my (%opt) = @_;
127     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
128     };
129     $self->{parse_error} = sub {
130     $onerror->(@_, line => $line, column => $column);
131 wakaba 1.1 };
132    
133     $self->_initialize_tokenizer;
134     $self->_initialize_tree_constructor;
135     $self->_construct_tree;
136     $self->_terminate_tree_constructor;
137    
138     return $self->{document};
139     } # parse_string
140    
141     sub new ($) {
142     my $class = shift;
143     my $self = bless {}, $class;
144     $self->{set_next_input_character} = sub {
145     $self->{next_input_character} = -1;
146     };
147     $self->{parse_error} = sub {
148     #
149     };
150     return $self;
151     } # new
152    
153 wakaba 1.41 sub CM_ENTITY () { 0b001 } # & markup in data
154     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
155     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
156    
157     sub PLAINTEXT_CONTENT_MODEL () { 0 }
158     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
159     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
160     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
161    
162 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
163    
164     sub _initialize_tokenizer ($) {
165     my $self = shift;
166     $self->{state} = 'data'; # MUST
167 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
168 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
169     undef $self->{current_attribute};
170     undef $self->{last_emitted_start_tag_name};
171     undef $self->{last_attribute_value_state};
172     $self->{char} = [];
173     # $self->{next_input_character}
174    
175     if (@{$self->{char}}) {
176     $self->{next_input_character} = shift @{$self->{char}};
177     } else {
178     $self->{set_next_input_character}->($self);
179     }
180    
181     $self->{token} = [];
182 wakaba 1.18 # $self->{escape}
183 wakaba 1.1 } # _initialize_tokenizer
184    
185     ## A token has:
186     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
187     ## 'character', or 'end-of-file'
188 wakaba 1.18 ## ->{name} (DOCTYPE, start tag (tag name), end tag (tag name))
189     ## ->{public_identifier} (DOCTYPE)
190     ## ->{system_identifier} (DOCTYPE)
191     ## ->{correct} == 1 or 0 (DOCTYPE)
192 wakaba 1.1 ## ->{attributes} isa HASH (start tag, end tag)
193     ## ->{data} (comment, character)
194    
195     ## Emitted token MUST immediately be handled by the tree construction state.
196    
197     ## Before each step, UA MAY check to see if either one of the scripts in
198     ## "list of scripts that will execute as soon as possible" or the first
199     ## script in the "list of scripts that will execute asynchronously",
200     ## has completed loading. If one has, then it MUST be executed
201     ## and removed from the list.
202    
203     sub _get_next_token ($) {
204     my $self = shift;
205     if (@{$self->{token}}) {
206     return shift @{$self->{token}};
207     }
208    
209     A: {
210     if ($self->{state} eq 'data') {
211     if ($self->{next_input_character} == 0x0026) { # &
212 wakaba 1.41 if ($self->{content_model} & CM_ENTITY) { # PCDATA | RCDATA
213 wakaba 1.1 $self->{state} = 'entity data';
214    
215     if (@{$self->{char}}) {
216     $self->{next_input_character} = shift @{$self->{char}};
217     } else {
218     $self->{set_next_input_character}->($self);
219     }
220    
221     redo A;
222     } else {
223     #
224     }
225 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
226 wakaba 1.41 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
227 wakaba 1.13 unless ($self->{escape}) {
228     if ($self->{prev_input_character}->[0] == 0x002D and # -
229     $self->{prev_input_character}->[1] == 0x0021 and # !
230     $self->{prev_input_character}->[2] == 0x003C) { # <
231     $self->{escape} = 1;
232     }
233     }
234     }
235    
236     #
237 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
238 wakaba 1.41 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
239     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
240 wakaba 1.13 not $self->{escape})) {
241 wakaba 1.1 $self->{state} = 'tag open';
242    
243     if (@{$self->{char}}) {
244     $self->{next_input_character} = shift @{$self->{char}};
245     } else {
246     $self->{set_next_input_character}->($self);
247     }
248    
249     redo A;
250     } else {
251     #
252     }
253 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
254     if ($self->{escape} and
255 wakaba 1.41 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
256 wakaba 1.13 if ($self->{prev_input_character}->[0] == 0x002D and # -
257     $self->{prev_input_character}->[1] == 0x002D) { # -
258     delete $self->{escape};
259     }
260     }
261    
262     #
263 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
264     return ({type => 'end-of-file'});
265     last A; ## TODO: ok?
266     }
267     # Anything else
268     my $token = {type => 'character',
269     data => chr $self->{next_input_character}};
270     ## Stay in the data state
271    
272     if (@{$self->{char}}) {
273     $self->{next_input_character} = shift @{$self->{char}};
274     } else {
275     $self->{set_next_input_character}->($self);
276     }
277    
278    
279     return ($token);
280    
281     redo A;
282     } elsif ($self->{state} eq 'entity data') {
283     ## (cannot happen in CDATA state)
284    
285 wakaba 1.26 my $token = $self->_tokenize_attempt_to_consume_an_entity (0);
286 wakaba 1.1
287     $self->{state} = 'data';
288     # next-input-character is already done
289    
290     unless (defined $token) {
291     return ({type => 'character', data => '&'});
292     } else {
293     return ($token);
294     }
295    
296     redo A;
297     } elsif ($self->{state} eq 'tag open') {
298 wakaba 1.41 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
299 wakaba 1.1 if ($self->{next_input_character} == 0x002F) { # /
300    
301     if (@{$self->{char}}) {
302     $self->{next_input_character} = shift @{$self->{char}};
303     } else {
304     $self->{set_next_input_character}->($self);
305     }
306    
307     $self->{state} = 'close tag open';
308     redo A;
309     } else {
310     ## reconsume
311     $self->{state} = 'data';
312    
313     return ({type => 'character', data => '<'});
314    
315     redo A;
316     }
317 wakaba 1.41 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
318 wakaba 1.1 if ($self->{next_input_character} == 0x0021) { # !
319     $self->{state} = 'markup declaration open';
320    
321     if (@{$self->{char}}) {
322     $self->{next_input_character} = shift @{$self->{char}};
323     } else {
324     $self->{set_next_input_character}->($self);
325     }
326    
327     redo A;
328     } elsif ($self->{next_input_character} == 0x002F) { # /
329     $self->{state} = 'close tag open';
330    
331     if (@{$self->{char}}) {
332     $self->{next_input_character} = shift @{$self->{char}};
333     } else {
334     $self->{set_next_input_character}->($self);
335     }
336    
337     redo A;
338     } elsif (0x0041 <= $self->{next_input_character} and
339     $self->{next_input_character} <= 0x005A) { # A..Z
340     $self->{current_token}
341     = {type => 'start tag',
342     tag_name => chr ($self->{next_input_character} + 0x0020)};
343     $self->{state} = 'tag name';
344    
345     if (@{$self->{char}}) {
346     $self->{next_input_character} = shift @{$self->{char}};
347     } else {
348     $self->{set_next_input_character}->($self);
349     }
350    
351     redo A;
352     } elsif (0x0061 <= $self->{next_input_character} and
353     $self->{next_input_character} <= 0x007A) { # a..z
354     $self->{current_token} = {type => 'start tag',
355     tag_name => chr ($self->{next_input_character})};
356     $self->{state} = 'tag name';
357    
358     if (@{$self->{char}}) {
359     $self->{next_input_character} = shift @{$self->{char}};
360     } else {
361     $self->{set_next_input_character}->($self);
362     }
363    
364     redo A;
365     } elsif ($self->{next_input_character} == 0x003E) { # >
366 wakaba 1.3 $self->{parse_error}-> (type => 'empty start tag');
367 wakaba 1.1 $self->{state} = 'data';
368    
369     if (@{$self->{char}}) {
370     $self->{next_input_character} = shift @{$self->{char}};
371     } else {
372     $self->{set_next_input_character}->($self);
373     }
374    
375    
376     return ({type => 'character', data => '<>'});
377    
378     redo A;
379     } elsif ($self->{next_input_character} == 0x003F) { # ?
380 wakaba 1.3 $self->{parse_error}-> (type => 'pio');
381 wakaba 1.1 $self->{state} = 'bogus comment';
382     ## $self->{next_input_character} is intentionally left as is
383     redo A;
384     } else {
385 wakaba 1.3 $self->{parse_error}-> (type => 'bare stago');
386 wakaba 1.1 $self->{state} = 'data';
387     ## reconsume
388    
389     return ({type => 'character', data => '<'});
390    
391     redo A;
392     }
393     } else {
394 wakaba 1.41 die "$0: $self->{content_model} in tag open";
395 wakaba 1.1 }
396     } elsif ($self->{state} eq 'close tag open') {
397 wakaba 1.41 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
398 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
399 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
400 wakaba 1.23 my @next_char;
401     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
402     push @next_char, $self->{next_input_character};
403     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
404     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
405     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
406    
407 wakaba 1.1 if (@{$self->{char}}) {
408     $self->{next_input_character} = shift @{$self->{char}};
409     } else {
410     $self->{set_next_input_character}->($self);
411     }
412    
413 wakaba 1.23 next TAGNAME;
414     } else {
415     $self->{next_input_character} = shift @next_char; # reconsume
416     unshift @{$self->{char}}, (@next_char);
417     $self->{state} = 'data';
418    
419     return ({type => 'character', data => '</'});
420    
421     redo A;
422     }
423     }
424     push @next_char, $self->{next_input_character};
425    
426     unless ($self->{next_input_character} == 0x0009 or # HT
427     $self->{next_input_character} == 0x000A or # LF
428     $self->{next_input_character} == 0x000B or # VT
429     $self->{next_input_character} == 0x000C or # FF
430     $self->{next_input_character} == 0x0020 or # SP
431     $self->{next_input_character} == 0x003E or # >
432     $self->{next_input_character} == 0x002F or # /
433     $self->{next_input_character} == -1) {
434 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
435     unshift @{$self->{char}}, (@next_char);
436     $self->{state} = 'data';
437     return ({type => 'character', data => '</'});
438     redo A;
439 wakaba 1.23 } else {
440     $self->{next_input_character} = shift @next_char;
441     unshift @{$self->{char}}, (@next_char);
442     # and consume...
443 wakaba 1.1 }
444 wakaba 1.23 } else {
445     ## No start tag token has ever been emitted
446     # next-input-character is already done
447 wakaba 1.1 $self->{state} = 'data';
448     return ({type => 'character', data => '</'});
449     redo A;
450     }
451     }
452    
453     if (0x0041 <= $self->{next_input_character} and
454     $self->{next_input_character} <= 0x005A) { # A..Z
455     $self->{current_token} = {type => 'end tag',
456     tag_name => chr ($self->{next_input_character} + 0x0020)};
457     $self->{state} = 'tag name';
458    
459     if (@{$self->{char}}) {
460     $self->{next_input_character} = shift @{$self->{char}};
461     } else {
462     $self->{set_next_input_character}->($self);
463     }
464    
465     redo A;
466     } elsif (0x0061 <= $self->{next_input_character} and
467     $self->{next_input_character} <= 0x007A) { # a..z
468     $self->{current_token} = {type => 'end tag',
469     tag_name => chr ($self->{next_input_character})};
470     $self->{state} = 'tag name';
471    
472     if (@{$self->{char}}) {
473     $self->{next_input_character} = shift @{$self->{char}};
474     } else {
475     $self->{set_next_input_character}->($self);
476     }
477    
478     redo A;
479     } elsif ($self->{next_input_character} == 0x003E) { # >
480 wakaba 1.3 $self->{parse_error}-> (type => 'empty end tag');
481 wakaba 1.1 $self->{state} = 'data';
482    
483     if (@{$self->{char}}) {
484     $self->{next_input_character} = shift @{$self->{char}};
485     } else {
486     $self->{set_next_input_character}->($self);
487     }
488    
489     redo A;
490     } elsif ($self->{next_input_character} == -1) {
491 wakaba 1.3 $self->{parse_error}-> (type => 'bare etago');
492 wakaba 1.1 $self->{state} = 'data';
493     # reconsume
494    
495     return ({type => 'character', data => '</'});
496    
497     redo A;
498     } else {
499 wakaba 1.3 $self->{parse_error}-> (type => 'bogus end tag');
500 wakaba 1.1 $self->{state} = 'bogus comment';
501     ## $self->{next_input_character} is intentionally left as is
502     redo A;
503     }
504     } elsif ($self->{state} eq 'tag name') {
505     if ($self->{next_input_character} == 0x0009 or # HT
506     $self->{next_input_character} == 0x000A or # LF
507     $self->{next_input_character} == 0x000B or # VT
508     $self->{next_input_character} == 0x000C or # FF
509     $self->{next_input_character} == 0x0020) { # SP
510     $self->{state} = 'before attribute name';
511    
512     if (@{$self->{char}}) {
513     $self->{next_input_character} = shift @{$self->{char}};
514     } else {
515     $self->{set_next_input_character}->($self);
516     }
517    
518     redo A;
519     } elsif ($self->{next_input_character} == 0x003E) { # >
520     if ($self->{current_token}->{type} eq 'start tag') {
521 wakaba 1.28 $self->{current_token}->{first_start_tag}
522     = not defined $self->{last_emitted_start_tag_name};
523 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
524     } elsif ($self->{current_token}->{type} eq 'end tag') {
525 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
526 wakaba 1.1 if ($self->{current_token}->{attributes}) {
527 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
528 wakaba 1.1 }
529     } else {
530     die "$0: $self->{current_token}->{type}: Unknown token type";
531     }
532     $self->{state} = 'data';
533    
534     if (@{$self->{char}}) {
535     $self->{next_input_character} = shift @{$self->{char}};
536     } else {
537     $self->{set_next_input_character}->($self);
538     }
539    
540    
541     return ($self->{current_token}); # start tag or end tag
542    
543     redo A;
544     } elsif (0x0041 <= $self->{next_input_character} and
545     $self->{next_input_character} <= 0x005A) { # A..Z
546     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
547     # start tag or end tag
548     ## Stay in this state
549    
550     if (@{$self->{char}}) {
551     $self->{next_input_character} = shift @{$self->{char}};
552     } else {
553     $self->{set_next_input_character}->($self);
554     }
555    
556     redo A;
557 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
558 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
559 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
560 wakaba 1.28 $self->{current_token}->{first_start_tag}
561     = not defined $self->{last_emitted_start_tag_name};
562 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
563     } elsif ($self->{current_token}->{type} eq 'end tag') {
564 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
565 wakaba 1.1 if ($self->{current_token}->{attributes}) {
566 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
567 wakaba 1.1 }
568     } else {
569     die "$0: $self->{current_token}->{type}: Unknown token type";
570     }
571     $self->{state} = 'data';
572     # reconsume
573    
574     return ($self->{current_token}); # start tag or end tag
575    
576     redo A;
577     } elsif ($self->{next_input_character} == 0x002F) { # /
578    
579     if (@{$self->{char}}) {
580     $self->{next_input_character} = shift @{$self->{char}};
581     } else {
582     $self->{set_next_input_character}->($self);
583     }
584    
585     if ($self->{next_input_character} == 0x003E and # >
586     $self->{current_token}->{type} eq 'start tag' and
587     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
588     # permitted slash
589     #
590     } else {
591 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
592 wakaba 1.1 }
593     $self->{state} = 'before attribute name';
594     # next-input-character is already done
595     redo A;
596     } else {
597     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
598     # start tag or end tag
599     ## Stay in the state
600    
601     if (@{$self->{char}}) {
602     $self->{next_input_character} = shift @{$self->{char}};
603     } else {
604     $self->{set_next_input_character}->($self);
605     }
606    
607     redo A;
608     }
609     } elsif ($self->{state} eq 'before attribute name') {
610     if ($self->{next_input_character} == 0x0009 or # HT
611     $self->{next_input_character} == 0x000A or # LF
612     $self->{next_input_character} == 0x000B or # VT
613     $self->{next_input_character} == 0x000C or # FF
614     $self->{next_input_character} == 0x0020) { # SP
615     ## Stay in the state
616    
617     if (@{$self->{char}}) {
618     $self->{next_input_character} = shift @{$self->{char}};
619     } else {
620     $self->{set_next_input_character}->($self);
621     }
622    
623     redo A;
624     } elsif ($self->{next_input_character} == 0x003E) { # >
625     if ($self->{current_token}->{type} eq 'start tag') {
626 wakaba 1.28 $self->{current_token}->{first_start_tag}
627     = not defined $self->{last_emitted_start_tag_name};
628 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
629     } elsif ($self->{current_token}->{type} eq 'end tag') {
630 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
631 wakaba 1.1 if ($self->{current_token}->{attributes}) {
632 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
633 wakaba 1.1 }
634     } else {
635     die "$0: $self->{current_token}->{type}: Unknown token type";
636     }
637     $self->{state} = 'data';
638    
639     if (@{$self->{char}}) {
640     $self->{next_input_character} = shift @{$self->{char}};
641     } else {
642     $self->{set_next_input_character}->($self);
643     }
644    
645    
646     return ($self->{current_token}); # start tag or end tag
647    
648     redo A;
649     } elsif (0x0041 <= $self->{next_input_character} and
650     $self->{next_input_character} <= 0x005A) { # A..Z
651     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
652     value => ''};
653     $self->{state} = 'attribute name';
654    
655     if (@{$self->{char}}) {
656     $self->{next_input_character} = shift @{$self->{char}};
657     } else {
658     $self->{set_next_input_character}->($self);
659     }
660    
661     redo A;
662     } elsif ($self->{next_input_character} == 0x002F) { # /
663    
664     if (@{$self->{char}}) {
665     $self->{next_input_character} = shift @{$self->{char}};
666     } else {
667     $self->{set_next_input_character}->($self);
668     }
669    
670     if ($self->{next_input_character} == 0x003E and # >
671     $self->{current_token}->{type} eq 'start tag' and
672     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
673     # permitted slash
674     #
675     } else {
676 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
677 wakaba 1.1 }
678     ## Stay in the state
679     # next-input-character is already done
680     redo A;
681 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
682 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
683 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
684 wakaba 1.28 $self->{current_token}->{first_start_tag}
685     = not defined $self->{last_emitted_start_tag_name};
686 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
687     } elsif ($self->{current_token}->{type} eq 'end tag') {
688 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
689 wakaba 1.1 if ($self->{current_token}->{attributes}) {
690 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
691 wakaba 1.1 }
692     } else {
693     die "$0: $self->{current_token}->{type}: Unknown token type";
694     }
695     $self->{state} = 'data';
696     # reconsume
697    
698     return ($self->{current_token}); # start tag or end tag
699    
700     redo A;
701     } else {
702     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
703     value => ''};
704     $self->{state} = 'attribute name';
705    
706     if (@{$self->{char}}) {
707     $self->{next_input_character} = shift @{$self->{char}};
708     } else {
709     $self->{set_next_input_character}->($self);
710     }
711    
712     redo A;
713     }
714     } elsif ($self->{state} eq 'attribute name') {
715     my $before_leave = sub {
716     if (exists $self->{current_token}->{attributes} # start tag or end tag
717     ->{$self->{current_attribute}->{name}}) { # MUST
718 wakaba 1.40 $self->{parse_error}-> (type => 'duplicate attribute:'.$self->{current_attribute}->{name});
719 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
720     } else {
721     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
722     = $self->{current_attribute};
723     }
724     }; # $before_leave
725    
726     if ($self->{next_input_character} == 0x0009 or # HT
727     $self->{next_input_character} == 0x000A or # LF
728     $self->{next_input_character} == 0x000B or # VT
729     $self->{next_input_character} == 0x000C or # FF
730     $self->{next_input_character} == 0x0020) { # SP
731     $before_leave->();
732     $self->{state} = 'after attribute name';
733    
734     if (@{$self->{char}}) {
735     $self->{next_input_character} = shift @{$self->{char}};
736     } else {
737     $self->{set_next_input_character}->($self);
738     }
739    
740     redo A;
741     } elsif ($self->{next_input_character} == 0x003D) { # =
742     $before_leave->();
743     $self->{state} = 'before attribute value';
744    
745     if (@{$self->{char}}) {
746     $self->{next_input_character} = shift @{$self->{char}};
747     } else {
748     $self->{set_next_input_character}->($self);
749     }
750    
751     redo A;
752     } elsif ($self->{next_input_character} == 0x003E) { # >
753     $before_leave->();
754     if ($self->{current_token}->{type} eq 'start tag') {
755 wakaba 1.28 $self->{current_token}->{first_start_tag}
756     = not defined $self->{last_emitted_start_tag_name};
757 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
758     } elsif ($self->{current_token}->{type} eq 'end tag') {
759 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
760 wakaba 1.1 if ($self->{current_token}->{attributes}) {
761 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
762 wakaba 1.1 }
763     } else {
764     die "$0: $self->{current_token}->{type}: Unknown token type";
765     }
766     $self->{state} = 'data';
767    
768     if (@{$self->{char}}) {
769     $self->{next_input_character} = shift @{$self->{char}};
770     } else {
771     $self->{set_next_input_character}->($self);
772     }
773    
774    
775     return ($self->{current_token}); # start tag or end tag
776    
777     redo A;
778     } elsif (0x0041 <= $self->{next_input_character} and
779     $self->{next_input_character} <= 0x005A) { # A..Z
780     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
781     ## Stay in the state
782    
783     if (@{$self->{char}}) {
784     $self->{next_input_character} = shift @{$self->{char}};
785     } else {
786     $self->{set_next_input_character}->($self);
787     }
788    
789     redo A;
790     } elsif ($self->{next_input_character} == 0x002F) { # /
791     $before_leave->();
792    
793     if (@{$self->{char}}) {
794     $self->{next_input_character} = shift @{$self->{char}};
795     } else {
796     $self->{set_next_input_character}->($self);
797     }
798    
799     if ($self->{next_input_character} == 0x003E and # >
800     $self->{current_token}->{type} eq 'start tag' and
801     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
802     # permitted slash
803     #
804     } else {
805 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
806 wakaba 1.1 }
807     $self->{state} = 'before attribute name';
808     # next-input-character is already done
809     redo A;
810 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
811 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
812 wakaba 1.1 $before_leave->();
813     if ($self->{current_token}->{type} eq 'start tag') {
814 wakaba 1.28 $self->{current_token}->{first_start_tag}
815     = not defined $self->{last_emitted_start_tag_name};
816 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
817     } elsif ($self->{current_token}->{type} eq 'end tag') {
818 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
819 wakaba 1.1 if ($self->{current_token}->{attributes}) {
820 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
821 wakaba 1.1 }
822     } else {
823     die "$0: $self->{current_token}->{type}: Unknown token type";
824     }
825     $self->{state} = 'data';
826     # reconsume
827    
828     return ($self->{current_token}); # start tag or end tag
829    
830     redo A;
831     } else {
832     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
833     ## Stay in the state
834    
835     if (@{$self->{char}}) {
836     $self->{next_input_character} = shift @{$self->{char}};
837     } else {
838     $self->{set_next_input_character}->($self);
839     }
840    
841     redo A;
842     }
843     } elsif ($self->{state} eq 'after attribute name') {
844     if ($self->{next_input_character} == 0x0009 or # HT
845     $self->{next_input_character} == 0x000A or # LF
846     $self->{next_input_character} == 0x000B or # VT
847     $self->{next_input_character} == 0x000C or # FF
848     $self->{next_input_character} == 0x0020) { # SP
849     ## Stay in the state
850    
851     if (@{$self->{char}}) {
852     $self->{next_input_character} = shift @{$self->{char}};
853     } else {
854     $self->{set_next_input_character}->($self);
855     }
856    
857     redo A;
858     } elsif ($self->{next_input_character} == 0x003D) { # =
859     $self->{state} = 'before attribute value';
860    
861     if (@{$self->{char}}) {
862     $self->{next_input_character} = shift @{$self->{char}};
863     } else {
864     $self->{set_next_input_character}->($self);
865     }
866    
867     redo A;
868     } elsif ($self->{next_input_character} == 0x003E) { # >
869     if ($self->{current_token}->{type} eq 'start tag') {
870 wakaba 1.28 $self->{current_token}->{first_start_tag}
871     = not defined $self->{last_emitted_start_tag_name};
872 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
873     } elsif ($self->{current_token}->{type} eq 'end tag') {
874 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
875 wakaba 1.1 if ($self->{current_token}->{attributes}) {
876 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
877 wakaba 1.1 }
878     } else {
879     die "$0: $self->{current_token}->{type}: Unknown token type";
880     }
881     $self->{state} = 'data';
882    
883     if (@{$self->{char}}) {
884     $self->{next_input_character} = shift @{$self->{char}};
885     } else {
886     $self->{set_next_input_character}->($self);
887     }
888    
889    
890     return ($self->{current_token}); # start tag or end tag
891    
892     redo A;
893     } elsif (0x0041 <= $self->{next_input_character} and
894     $self->{next_input_character} <= 0x005A) { # A..Z
895     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
896     value => ''};
897     $self->{state} = 'attribute name';
898    
899     if (@{$self->{char}}) {
900     $self->{next_input_character} = shift @{$self->{char}};
901     } else {
902     $self->{set_next_input_character}->($self);
903     }
904    
905     redo A;
906     } elsif ($self->{next_input_character} == 0x002F) { # /
907    
908     if (@{$self->{char}}) {
909     $self->{next_input_character} = shift @{$self->{char}};
910     } else {
911     $self->{set_next_input_character}->($self);
912     }
913    
914     if ($self->{next_input_character} == 0x003E and # >
915     $self->{current_token}->{type} eq 'start tag' and
916     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
917     # permitted slash
918     #
919     } else {
920 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
921 wakaba 1.33 ## TODO: Different error type for <aa / bb> than <aa/>
922 wakaba 1.1 }
923     $self->{state} = 'before attribute name';
924     # next-input-character is already done
925     redo A;
926 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
927 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
928 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
929 wakaba 1.28 $self->{current_token}->{first_start_tag}
930     = not defined $self->{last_emitted_start_tag_name};
931 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
932     } elsif ($self->{current_token}->{type} eq 'end tag') {
933 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
934 wakaba 1.1 if ($self->{current_token}->{attributes}) {
935 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
936 wakaba 1.1 }
937     } else {
938     die "$0: $self->{current_token}->{type}: Unknown token type";
939     }
940     $self->{state} = 'data';
941     # reconsume
942    
943     return ($self->{current_token}); # start tag or end tag
944    
945     redo A;
946     } else {
947     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
948     value => ''};
949     $self->{state} = 'attribute name';
950    
951     if (@{$self->{char}}) {
952     $self->{next_input_character} = shift @{$self->{char}};
953     } else {
954     $self->{set_next_input_character}->($self);
955     }
956    
957     redo A;
958     }
959     } elsif ($self->{state} eq 'before attribute value') {
960     if ($self->{next_input_character} == 0x0009 or # HT
961     $self->{next_input_character} == 0x000A or # LF
962     $self->{next_input_character} == 0x000B or # VT
963     $self->{next_input_character} == 0x000C or # FF
964     $self->{next_input_character} == 0x0020) { # SP
965     ## Stay in the state
966    
967     if (@{$self->{char}}) {
968     $self->{next_input_character} = shift @{$self->{char}};
969     } else {
970     $self->{set_next_input_character}->($self);
971     }
972    
973     redo A;
974     } elsif ($self->{next_input_character} == 0x0022) { # "
975     $self->{state} = 'attribute value (double-quoted)';
976    
977     if (@{$self->{char}}) {
978     $self->{next_input_character} = shift @{$self->{char}};
979     } else {
980     $self->{set_next_input_character}->($self);
981     }
982    
983     redo A;
984     } elsif ($self->{next_input_character} == 0x0026) { # &
985     $self->{state} = 'attribute value (unquoted)';
986     ## reconsume
987     redo A;
988     } elsif ($self->{next_input_character} == 0x0027) { # '
989     $self->{state} = 'attribute value (single-quoted)';
990    
991     if (@{$self->{char}}) {
992     $self->{next_input_character} = shift @{$self->{char}};
993     } else {
994     $self->{set_next_input_character}->($self);
995     }
996    
997     redo A;
998     } elsif ($self->{next_input_character} == 0x003E) { # >
999     if ($self->{current_token}->{type} eq 'start tag') {
1000 wakaba 1.28 $self->{current_token}->{first_start_tag}
1001     = not defined $self->{last_emitted_start_tag_name};
1002 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1003     } elsif ($self->{current_token}->{type} eq 'end tag') {
1004 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1005 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1006 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1007 wakaba 1.1 }
1008     } else {
1009     die "$0: $self->{current_token}->{type}: Unknown token type";
1010     }
1011     $self->{state} = 'data';
1012    
1013     if (@{$self->{char}}) {
1014     $self->{next_input_character} = shift @{$self->{char}};
1015     } else {
1016     $self->{set_next_input_character}->($self);
1017     }
1018    
1019    
1020     return ($self->{current_token}); # start tag or end tag
1021    
1022     redo A;
1023 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
1024 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
1025 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1026 wakaba 1.28 $self->{current_token}->{first_start_tag}
1027     = not defined $self->{last_emitted_start_tag_name};
1028 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1029     } elsif ($self->{current_token}->{type} eq 'end tag') {
1030 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1031 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1032 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1033 wakaba 1.1 }
1034     } else {
1035     die "$0: $self->{current_token}->{type}: Unknown token type";
1036     }
1037     $self->{state} = 'data';
1038     ## reconsume
1039    
1040     return ($self->{current_token}); # start tag or end tag
1041    
1042     redo A;
1043     } else {
1044     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1045     $self->{state} = 'attribute value (unquoted)';
1046    
1047     if (@{$self->{char}}) {
1048     $self->{next_input_character} = shift @{$self->{char}};
1049     } else {
1050     $self->{set_next_input_character}->($self);
1051     }
1052    
1053     redo A;
1054     }
1055     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
1056     if ($self->{next_input_character} == 0x0022) { # "
1057     $self->{state} = 'before attribute name';
1058    
1059     if (@{$self->{char}}) {
1060     $self->{next_input_character} = shift @{$self->{char}};
1061     } else {
1062     $self->{set_next_input_character}->($self);
1063     }
1064    
1065     redo A;
1066     } elsif ($self->{next_input_character} == 0x0026) { # &
1067     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
1068     $self->{state} = 'entity in attribute value';
1069    
1070     if (@{$self->{char}}) {
1071     $self->{next_input_character} = shift @{$self->{char}};
1072     } else {
1073     $self->{set_next_input_character}->($self);
1074     }
1075    
1076     redo A;
1077     } elsif ($self->{next_input_character} == -1) {
1078 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1079 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1080 wakaba 1.28 $self->{current_token}->{first_start_tag}
1081     = not defined $self->{last_emitted_start_tag_name};
1082 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1083     } elsif ($self->{current_token}->{type} eq 'end tag') {
1084 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1085 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1086 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1087 wakaba 1.1 }
1088     } else {
1089     die "$0: $self->{current_token}->{type}: Unknown token type";
1090     }
1091     $self->{state} = 'data';
1092     ## reconsume
1093    
1094     return ($self->{current_token}); # start tag or end tag
1095    
1096     redo A;
1097     } else {
1098     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1099     ## Stay in the state
1100    
1101     if (@{$self->{char}}) {
1102     $self->{next_input_character} = shift @{$self->{char}};
1103     } else {
1104     $self->{set_next_input_character}->($self);
1105     }
1106    
1107     redo A;
1108     }
1109     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1110     if ($self->{next_input_character} == 0x0027) { # '
1111     $self->{state} = 'before attribute name';
1112    
1113     if (@{$self->{char}}) {
1114     $self->{next_input_character} = shift @{$self->{char}};
1115     } else {
1116     $self->{set_next_input_character}->($self);
1117     }
1118    
1119     redo A;
1120     } elsif ($self->{next_input_character} == 0x0026) { # &
1121     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1122     $self->{state} = 'entity in attribute value';
1123    
1124     if (@{$self->{char}}) {
1125     $self->{next_input_character} = shift @{$self->{char}};
1126     } else {
1127     $self->{set_next_input_character}->($self);
1128     }
1129    
1130     redo A;
1131     } elsif ($self->{next_input_character} == -1) {
1132 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1133 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1134 wakaba 1.28 $self->{current_token}->{first_start_tag}
1135     = not defined $self->{last_emitted_start_tag_name};
1136 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1137     } elsif ($self->{current_token}->{type} eq 'end tag') {
1138 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1139 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1140 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1141 wakaba 1.1 }
1142     } else {
1143     die "$0: $self->{current_token}->{type}: Unknown token type";
1144     }
1145     $self->{state} = 'data';
1146     ## reconsume
1147    
1148     return ($self->{current_token}); # start tag or end tag
1149    
1150     redo A;
1151     } else {
1152     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1153     ## Stay in the state
1154    
1155     if (@{$self->{char}}) {
1156     $self->{next_input_character} = shift @{$self->{char}};
1157     } else {
1158     $self->{set_next_input_character}->($self);
1159     }
1160    
1161     redo A;
1162     }
1163     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1164     if ($self->{next_input_character} == 0x0009 or # HT
1165     $self->{next_input_character} == 0x000A or # LF
1166     $self->{next_input_character} == 0x000B or # HT
1167     $self->{next_input_character} == 0x000C or # FF
1168     $self->{next_input_character} == 0x0020) { # SP
1169     $self->{state} = 'before attribute name';
1170    
1171     if (@{$self->{char}}) {
1172     $self->{next_input_character} = shift @{$self->{char}};
1173     } else {
1174     $self->{set_next_input_character}->($self);
1175     }
1176    
1177     redo A;
1178     } elsif ($self->{next_input_character} == 0x0026) { # &
1179     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1180     $self->{state} = 'entity in attribute value';
1181    
1182     if (@{$self->{char}}) {
1183     $self->{next_input_character} = shift @{$self->{char}};
1184     } else {
1185     $self->{set_next_input_character}->($self);
1186     }
1187    
1188     redo A;
1189     } elsif ($self->{next_input_character} == 0x003E) { # >
1190     if ($self->{current_token}->{type} eq 'start tag') {
1191 wakaba 1.28 $self->{current_token}->{first_start_tag}
1192     = not defined $self->{last_emitted_start_tag_name};
1193 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1194     } elsif ($self->{current_token}->{type} eq 'end tag') {
1195 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1196 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1197 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1198 wakaba 1.1 }
1199     } else {
1200     die "$0: $self->{current_token}->{type}: Unknown token type";
1201     }
1202     $self->{state} = 'data';
1203    
1204     if (@{$self->{char}}) {
1205     $self->{next_input_character} = shift @{$self->{char}};
1206     } else {
1207     $self->{set_next_input_character}->($self);
1208     }
1209    
1210    
1211     return ($self->{current_token}); # start tag or end tag
1212    
1213     redo A;
1214 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
1215 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
1216 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1217 wakaba 1.28 $self->{current_token}->{first_start_tag}
1218     = not defined $self->{last_emitted_start_tag_name};
1219 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1220     } elsif ($self->{current_token}->{type} eq 'end tag') {
1221 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1222 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1223 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1224 wakaba 1.1 }
1225     } else {
1226     die "$0: $self->{current_token}->{type}: Unknown token type";
1227     }
1228     $self->{state} = 'data';
1229     ## reconsume
1230    
1231     return ($self->{current_token}); # start tag or end tag
1232    
1233     redo A;
1234     } else {
1235     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1236     ## Stay in the state
1237    
1238     if (@{$self->{char}}) {
1239     $self->{next_input_character} = shift @{$self->{char}};
1240     } else {
1241     $self->{set_next_input_character}->($self);
1242     }
1243    
1244     redo A;
1245     }
1246     } elsif ($self->{state} eq 'entity in attribute value') {
1247 wakaba 1.26 my $token = $self->_tokenize_attempt_to_consume_an_entity (1);
1248 wakaba 1.1
1249     unless (defined $token) {
1250     $self->{current_attribute}->{value} .= '&';
1251     } else {
1252     $self->{current_attribute}->{value} .= $token->{data};
1253     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1254     }
1255    
1256     $self->{state} = $self->{last_attribute_value_state};
1257     # next-input-character is already done
1258     redo A;
1259     } elsif ($self->{state} eq 'bogus comment') {
1260     ## (only happen if PCDATA state)
1261    
1262     my $token = {type => 'comment', data => ''};
1263    
1264     BC: {
1265     if ($self->{next_input_character} == 0x003E) { # >
1266     $self->{state} = 'data';
1267    
1268     if (@{$self->{char}}) {
1269     $self->{next_input_character} = shift @{$self->{char}};
1270     } else {
1271     $self->{set_next_input_character}->($self);
1272     }
1273    
1274    
1275     return ($token);
1276    
1277     redo A;
1278     } elsif ($self->{next_input_character} == -1) {
1279     $self->{state} = 'data';
1280     ## reconsume
1281    
1282     return ($token);
1283    
1284     redo A;
1285     } else {
1286     $token->{data} .= chr ($self->{next_input_character});
1287    
1288     if (@{$self->{char}}) {
1289     $self->{next_input_character} = shift @{$self->{char}};
1290     } else {
1291     $self->{set_next_input_character}->($self);
1292     }
1293    
1294     redo BC;
1295     }
1296     } # BC
1297     } elsif ($self->{state} eq 'markup declaration open') {
1298     ## (only happen if PCDATA state)
1299    
1300     my @next_char;
1301     push @next_char, $self->{next_input_character};
1302    
1303     if ($self->{next_input_character} == 0x002D) { # -
1304    
1305     if (@{$self->{char}}) {
1306     $self->{next_input_character} = shift @{$self->{char}};
1307     } else {
1308     $self->{set_next_input_character}->($self);
1309     }
1310    
1311     push @next_char, $self->{next_input_character};
1312     if ($self->{next_input_character} == 0x002D) { # -
1313     $self->{current_token} = {type => 'comment', data => ''};
1314 wakaba 1.23 $self->{state} = 'comment start';
1315 wakaba 1.1
1316     if (@{$self->{char}}) {
1317     $self->{next_input_character} = shift @{$self->{char}};
1318     } else {
1319     $self->{set_next_input_character}->($self);
1320     }
1321    
1322     redo A;
1323     }
1324     } elsif ($self->{next_input_character} == 0x0044 or # D
1325     $self->{next_input_character} == 0x0064) { # d
1326    
1327     if (@{$self->{char}}) {
1328     $self->{next_input_character} = shift @{$self->{char}};
1329     } else {
1330     $self->{set_next_input_character}->($self);
1331     }
1332    
1333     push @next_char, $self->{next_input_character};
1334     if ($self->{next_input_character} == 0x004F or # O
1335     $self->{next_input_character} == 0x006F) { # o
1336    
1337     if (@{$self->{char}}) {
1338     $self->{next_input_character} = shift @{$self->{char}};
1339     } else {
1340     $self->{set_next_input_character}->($self);
1341     }
1342    
1343     push @next_char, $self->{next_input_character};
1344     if ($self->{next_input_character} == 0x0043 or # C
1345     $self->{next_input_character} == 0x0063) { # c
1346    
1347     if (@{$self->{char}}) {
1348     $self->{next_input_character} = shift @{$self->{char}};
1349     } else {
1350     $self->{set_next_input_character}->($self);
1351     }
1352    
1353     push @next_char, $self->{next_input_character};
1354     if ($self->{next_input_character} == 0x0054 or # T
1355     $self->{next_input_character} == 0x0074) { # t
1356    
1357     if (@{$self->{char}}) {
1358     $self->{next_input_character} = shift @{$self->{char}};
1359     } else {
1360     $self->{set_next_input_character}->($self);
1361     }
1362    
1363     push @next_char, $self->{next_input_character};
1364     if ($self->{next_input_character} == 0x0059 or # Y
1365     $self->{next_input_character} == 0x0079) { # y
1366    
1367     if (@{$self->{char}}) {
1368     $self->{next_input_character} = shift @{$self->{char}};
1369     } else {
1370     $self->{set_next_input_character}->($self);
1371     }
1372    
1373     push @next_char, $self->{next_input_character};
1374     if ($self->{next_input_character} == 0x0050 or # P
1375     $self->{next_input_character} == 0x0070) { # p
1376    
1377     if (@{$self->{char}}) {
1378     $self->{next_input_character} = shift @{$self->{char}};
1379     } else {
1380     $self->{set_next_input_character}->($self);
1381     }
1382    
1383     push @next_char, $self->{next_input_character};
1384     if ($self->{next_input_character} == 0x0045 or # E
1385     $self->{next_input_character} == 0x0065) { # e
1386     ## ISSUE: What a stupid code this is!
1387     $self->{state} = 'DOCTYPE';
1388    
1389     if (@{$self->{char}}) {
1390     $self->{next_input_character} = shift @{$self->{char}};
1391     } else {
1392     $self->{set_next_input_character}->($self);
1393     }
1394    
1395     redo A;
1396     }
1397     }
1398     }
1399     }
1400     }
1401     }
1402     }
1403    
1404 wakaba 1.30 $self->{parse_error}-> (type => 'bogus comment');
1405 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1406     unshift @{$self->{char}}, (@next_char);
1407     $self->{state} = 'bogus comment';
1408     redo A;
1409    
1410     ## ISSUE: typos in spec: chacacters, is is a parse error
1411     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1412 wakaba 1.23 } elsif ($self->{state} eq 'comment start') {
1413     if ($self->{next_input_character} == 0x002D) { # -
1414     $self->{state} = 'comment start dash';
1415    
1416     if (@{$self->{char}}) {
1417     $self->{next_input_character} = shift @{$self->{char}};
1418     } else {
1419     $self->{set_next_input_character}->($self);
1420     }
1421    
1422     redo A;
1423     } elsif ($self->{next_input_character} == 0x003E) { # >
1424     $self->{parse_error}-> (type => 'bogus comment');
1425     $self->{state} = 'data';
1426    
1427     if (@{$self->{char}}) {
1428     $self->{next_input_character} = shift @{$self->{char}};
1429     } else {
1430     $self->{set_next_input_character}->($self);
1431     }
1432    
1433    
1434     return ($self->{current_token}); # comment
1435    
1436     redo A;
1437     } elsif ($self->{next_input_character} == -1) {
1438     $self->{parse_error}-> (type => 'unclosed comment');
1439     $self->{state} = 'data';
1440     ## reconsume
1441    
1442     return ($self->{current_token}); # comment
1443    
1444     redo A;
1445     } else {
1446     $self->{current_token}->{data} # comment
1447     .= chr ($self->{next_input_character});
1448     $self->{state} = 'comment';
1449    
1450     if (@{$self->{char}}) {
1451     $self->{next_input_character} = shift @{$self->{char}};
1452     } else {
1453     $self->{set_next_input_character}->($self);
1454     }
1455    
1456     redo A;
1457     }
1458     } elsif ($self->{state} eq 'comment start dash') {
1459     if ($self->{next_input_character} == 0x002D) { # -
1460     $self->{state} = 'comment end';
1461    
1462     if (@{$self->{char}}) {
1463     $self->{next_input_character} = shift @{$self->{char}};
1464     } else {
1465     $self->{set_next_input_character}->($self);
1466     }
1467    
1468     redo A;
1469     } elsif ($self->{next_input_character} == 0x003E) { # >
1470     $self->{parse_error}-> (type => 'bogus comment');
1471     $self->{state} = 'data';
1472    
1473     if (@{$self->{char}}) {
1474     $self->{next_input_character} = shift @{$self->{char}};
1475     } else {
1476     $self->{set_next_input_character}->($self);
1477     }
1478    
1479    
1480     return ($self->{current_token}); # comment
1481    
1482     redo A;
1483     } elsif ($self->{next_input_character} == -1) {
1484     $self->{parse_error}-> (type => 'unclosed comment');
1485     $self->{state} = 'data';
1486     ## reconsume
1487    
1488     return ($self->{current_token}); # comment
1489    
1490     redo A;
1491     } else {
1492     $self->{current_token}->{data} # comment
1493 wakaba 1.33 .= '-' . chr ($self->{next_input_character});
1494 wakaba 1.23 $self->{state} = 'comment';
1495    
1496     if (@{$self->{char}}) {
1497     $self->{next_input_character} = shift @{$self->{char}};
1498     } else {
1499     $self->{set_next_input_character}->($self);
1500     }
1501    
1502     redo A;
1503     }
1504 wakaba 1.1 } elsif ($self->{state} eq 'comment') {
1505     if ($self->{next_input_character} == 0x002D) { # -
1506 wakaba 1.23 $self->{state} = 'comment end dash';
1507 wakaba 1.1
1508     if (@{$self->{char}}) {
1509     $self->{next_input_character} = shift @{$self->{char}};
1510     } else {
1511     $self->{set_next_input_character}->($self);
1512     }
1513    
1514     redo A;
1515     } elsif ($self->{next_input_character} == -1) {
1516 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1517 wakaba 1.1 $self->{state} = 'data';
1518     ## reconsume
1519    
1520     return ($self->{current_token}); # comment
1521    
1522     redo A;
1523     } else {
1524     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1525     ## Stay in the state
1526    
1527     if (@{$self->{char}}) {
1528     $self->{next_input_character} = shift @{$self->{char}};
1529     } else {
1530     $self->{set_next_input_character}->($self);
1531     }
1532    
1533     redo A;
1534     }
1535 wakaba 1.23 } elsif ($self->{state} eq 'comment end dash') {
1536 wakaba 1.1 if ($self->{next_input_character} == 0x002D) { # -
1537     $self->{state} = 'comment end';
1538    
1539     if (@{$self->{char}}) {
1540     $self->{next_input_character} = shift @{$self->{char}};
1541     } else {
1542     $self->{set_next_input_character}->($self);
1543     }
1544    
1545     redo A;
1546     } elsif ($self->{next_input_character} == -1) {
1547 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1548 wakaba 1.1 $self->{state} = 'data';
1549     ## reconsume
1550    
1551     return ($self->{current_token}); # comment
1552    
1553     redo A;
1554     } else {
1555     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1556     $self->{state} = 'comment';
1557    
1558     if (@{$self->{char}}) {
1559     $self->{next_input_character} = shift @{$self->{char}};
1560     } else {
1561     $self->{set_next_input_character}->($self);
1562     }
1563    
1564     redo A;
1565     }
1566     } elsif ($self->{state} eq 'comment end') {
1567     if ($self->{next_input_character} == 0x003E) { # >
1568     $self->{state} = 'data';
1569    
1570     if (@{$self->{char}}) {
1571     $self->{next_input_character} = shift @{$self->{char}};
1572     } else {
1573     $self->{set_next_input_character}->($self);
1574     }
1575    
1576    
1577     return ($self->{current_token}); # comment
1578    
1579     redo A;
1580     } elsif ($self->{next_input_character} == 0x002D) { # -
1581 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1582 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1583     ## Stay in the state
1584    
1585     if (@{$self->{char}}) {
1586     $self->{next_input_character} = shift @{$self->{char}};
1587     } else {
1588     $self->{set_next_input_character}->($self);
1589     }
1590    
1591     redo A;
1592     } elsif ($self->{next_input_character} == -1) {
1593 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1594 wakaba 1.1 $self->{state} = 'data';
1595     ## reconsume
1596    
1597     return ($self->{current_token}); # comment
1598    
1599     redo A;
1600     } else {
1601 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1602 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1603     $self->{state} = 'comment';
1604    
1605     if (@{$self->{char}}) {
1606     $self->{next_input_character} = shift @{$self->{char}};
1607     } else {
1608     $self->{set_next_input_character}->($self);
1609     }
1610    
1611     redo A;
1612     }
1613     } elsif ($self->{state} eq 'DOCTYPE') {
1614     if ($self->{next_input_character} == 0x0009 or # HT
1615     $self->{next_input_character} == 0x000A or # LF
1616     $self->{next_input_character} == 0x000B or # VT
1617     $self->{next_input_character} == 0x000C or # FF
1618     $self->{next_input_character} == 0x0020) { # SP
1619     $self->{state} = 'before DOCTYPE name';
1620    
1621     if (@{$self->{char}}) {
1622     $self->{next_input_character} = shift @{$self->{char}};
1623     } else {
1624     $self->{set_next_input_character}->($self);
1625     }
1626    
1627     redo A;
1628     } else {
1629 wakaba 1.3 $self->{parse_error}-> (type => 'no space before DOCTYPE name');
1630 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1631     ## reconsume
1632     redo A;
1633     }
1634     } elsif ($self->{state} eq 'before DOCTYPE name') {
1635     if ($self->{next_input_character} == 0x0009 or # HT
1636     $self->{next_input_character} == 0x000A or # LF
1637     $self->{next_input_character} == 0x000B or # VT
1638     $self->{next_input_character} == 0x000C or # FF
1639     $self->{next_input_character} == 0x0020) { # SP
1640     ## Stay in the state
1641    
1642     if (@{$self->{char}}) {
1643     $self->{next_input_character} = shift @{$self->{char}};
1644     } else {
1645     $self->{set_next_input_character}->($self);
1646     }
1647    
1648     redo A;
1649 wakaba 1.18 } elsif ($self->{next_input_character} == 0x003E) { # >
1650     $self->{parse_error}-> (type => 'no DOCTYPE name');
1651     $self->{state} = 'data';
1652    
1653     if (@{$self->{char}}) {
1654     $self->{next_input_character} = shift @{$self->{char}};
1655     } else {
1656     $self->{set_next_input_character}->($self);
1657     }
1658    
1659    
1660     return ({type => 'DOCTYPE'}); # incorrect
1661    
1662     redo A;
1663     } elsif ($self->{next_input_character} == -1) {
1664     $self->{parse_error}-> (type => 'no DOCTYPE name');
1665     $self->{state} = 'data';
1666     ## reconsume
1667    
1668     return ({type => 'DOCTYPE'}); # incorrect
1669    
1670     redo A;
1671     } else {
1672     $self->{current_token}
1673     = {type => 'DOCTYPE',
1674     name => chr ($self->{next_input_character}),
1675     correct => 1};
1676 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1677 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1678    
1679     if (@{$self->{char}}) {
1680     $self->{next_input_character} = shift @{$self->{char}};
1681     } else {
1682     $self->{set_next_input_character}->($self);
1683     }
1684    
1685     redo A;
1686 wakaba 1.18 }
1687     } elsif ($self->{state} eq 'DOCTYPE name') {
1688     ## ISSUE: Redundant "First," in the spec.
1689     if ($self->{next_input_character} == 0x0009 or # HT
1690     $self->{next_input_character} == 0x000A or # LF
1691     $self->{next_input_character} == 0x000B or # VT
1692     $self->{next_input_character} == 0x000C or # FF
1693     $self->{next_input_character} == 0x0020) { # SP
1694     $self->{state} = 'after DOCTYPE name';
1695    
1696     if (@{$self->{char}}) {
1697     $self->{next_input_character} = shift @{$self->{char}};
1698     } else {
1699     $self->{set_next_input_character}->($self);
1700     }
1701    
1702     redo A;
1703 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
1704     $self->{state} = 'data';
1705    
1706     if (@{$self->{char}}) {
1707     $self->{next_input_character} = shift @{$self->{char}};
1708     } else {
1709     $self->{set_next_input_character}->($self);
1710     }
1711    
1712    
1713 wakaba 1.18 return ($self->{current_token}); # DOCTYPE
1714 wakaba 1.1
1715     redo A;
1716 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1717     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1718 wakaba 1.1 $self->{state} = 'data';
1719     ## reconsume
1720    
1721 wakaba 1.18 delete $self->{current_token}->{correct};
1722     return ($self->{current_token}); # DOCTYPE
1723 wakaba 1.1
1724     redo A;
1725     } else {
1726 wakaba 1.18 $self->{current_token}->{name}
1727     .= chr ($self->{next_input_character}); # DOCTYPE
1728     ## Stay in the state
1729 wakaba 1.1
1730     if (@{$self->{char}}) {
1731     $self->{next_input_character} = shift @{$self->{char}};
1732     } else {
1733     $self->{set_next_input_character}->($self);
1734     }
1735    
1736     redo A;
1737     }
1738 wakaba 1.18 } elsif ($self->{state} eq 'after DOCTYPE name') {
1739 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1740     $self->{next_input_character} == 0x000A or # LF
1741     $self->{next_input_character} == 0x000B or # VT
1742     $self->{next_input_character} == 0x000C or # FF
1743     $self->{next_input_character} == 0x0020) { # SP
1744 wakaba 1.18 ## Stay in the state
1745 wakaba 1.1
1746     if (@{$self->{char}}) {
1747     $self->{next_input_character} = shift @{$self->{char}};
1748     } else {
1749     $self->{set_next_input_character}->($self);
1750     }
1751    
1752     redo A;
1753     } elsif ($self->{next_input_character} == 0x003E) { # >
1754     $self->{state} = 'data';
1755    
1756     if (@{$self->{char}}) {
1757     $self->{next_input_character} = shift @{$self->{char}};
1758     } else {
1759     $self->{set_next_input_character}->($self);
1760     }
1761    
1762    
1763     return ($self->{current_token}); # DOCTYPE
1764    
1765     redo A;
1766 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1767     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1768     $self->{state} = 'data';
1769     ## reconsume
1770    
1771     delete $self->{current_token}->{correct};
1772     return ($self->{current_token}); # DOCTYPE
1773    
1774     redo A;
1775     } elsif ($self->{next_input_character} == 0x0050 or # P
1776     $self->{next_input_character} == 0x0070) { # p
1777    
1778     if (@{$self->{char}}) {
1779     $self->{next_input_character} = shift @{$self->{char}};
1780     } else {
1781     $self->{set_next_input_character}->($self);
1782     }
1783    
1784     if ($self->{next_input_character} == 0x0055 or # U
1785     $self->{next_input_character} == 0x0075) { # u
1786    
1787     if (@{$self->{char}}) {
1788     $self->{next_input_character} = shift @{$self->{char}};
1789     } else {
1790     $self->{set_next_input_character}->($self);
1791     }
1792    
1793     if ($self->{next_input_character} == 0x0042 or # B
1794     $self->{next_input_character} == 0x0062) { # b
1795    
1796     if (@{$self->{char}}) {
1797     $self->{next_input_character} = shift @{$self->{char}};
1798     } else {
1799     $self->{set_next_input_character}->($self);
1800     }
1801    
1802     if ($self->{next_input_character} == 0x004C or # L
1803     $self->{next_input_character} == 0x006C) { # l
1804    
1805     if (@{$self->{char}}) {
1806     $self->{next_input_character} = shift @{$self->{char}};
1807     } else {
1808     $self->{set_next_input_character}->($self);
1809     }
1810    
1811     if ($self->{next_input_character} == 0x0049 or # I
1812     $self->{next_input_character} == 0x0069) { # i
1813    
1814     if (@{$self->{char}}) {
1815     $self->{next_input_character} = shift @{$self->{char}};
1816     } else {
1817     $self->{set_next_input_character}->($self);
1818     }
1819    
1820     if ($self->{next_input_character} == 0x0043 or # C
1821     $self->{next_input_character} == 0x0063) { # c
1822     $self->{state} = 'before DOCTYPE public identifier';
1823    
1824     if (@{$self->{char}}) {
1825     $self->{next_input_character} = shift @{$self->{char}};
1826     } else {
1827     $self->{set_next_input_character}->($self);
1828     }
1829    
1830     redo A;
1831     }
1832     }
1833     }
1834     }
1835     }
1836    
1837     #
1838     } elsif ($self->{next_input_character} == 0x0053 or # S
1839     $self->{next_input_character} == 0x0073) { # s
1840    
1841     if (@{$self->{char}}) {
1842     $self->{next_input_character} = shift @{$self->{char}};
1843     } else {
1844     $self->{set_next_input_character}->($self);
1845     }
1846    
1847     if ($self->{next_input_character} == 0x0059 or # Y
1848     $self->{next_input_character} == 0x0079) { # y
1849    
1850     if (@{$self->{char}}) {
1851     $self->{next_input_character} = shift @{$self->{char}};
1852     } else {
1853     $self->{set_next_input_character}->($self);
1854     }
1855    
1856     if ($self->{next_input_character} == 0x0053 or # S
1857     $self->{next_input_character} == 0x0073) { # s
1858    
1859     if (@{$self->{char}}) {
1860     $self->{next_input_character} = shift @{$self->{char}};
1861     } else {
1862     $self->{set_next_input_character}->($self);
1863     }
1864    
1865     if ($self->{next_input_character} == 0x0054 or # T
1866     $self->{next_input_character} == 0x0074) { # t
1867    
1868     if (@{$self->{char}}) {
1869     $self->{next_input_character} = shift @{$self->{char}};
1870     } else {
1871     $self->{set_next_input_character}->($self);
1872     }
1873    
1874     if ($self->{next_input_character} == 0x0045 or # E
1875     $self->{next_input_character} == 0x0065) { # e
1876    
1877     if (@{$self->{char}}) {
1878     $self->{next_input_character} = shift @{$self->{char}};
1879     } else {
1880     $self->{set_next_input_character}->($self);
1881     }
1882    
1883     if ($self->{next_input_character} == 0x004D or # M
1884     $self->{next_input_character} == 0x006D) { # m
1885     $self->{state} = 'before DOCTYPE system identifier';
1886    
1887     if (@{$self->{char}}) {
1888     $self->{next_input_character} = shift @{$self->{char}};
1889     } else {
1890     $self->{set_next_input_character}->($self);
1891     }
1892    
1893     redo A;
1894     }
1895     }
1896     }
1897     }
1898     }
1899    
1900     #
1901     } else {
1902    
1903     if (@{$self->{char}}) {
1904     $self->{next_input_character} = shift @{$self->{char}};
1905     } else {
1906     $self->{set_next_input_character}->($self);
1907     }
1908    
1909     #
1910     }
1911    
1912     $self->{parse_error}-> (type => 'string after DOCTYPE name');
1913     $self->{state} = 'bogus DOCTYPE';
1914     # next-input-character is already done
1915     redo A;
1916     } elsif ($self->{state} eq 'before DOCTYPE public identifier') {
1917     if ({
1918     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1919     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1920     }->{$self->{next_input_character}}) {
1921 wakaba 1.1 ## Stay in the state
1922    
1923     if (@{$self->{char}}) {
1924     $self->{next_input_character} = shift @{$self->{char}};
1925     } else {
1926     $self->{set_next_input_character}->($self);
1927     }
1928    
1929     redo A;
1930 wakaba 1.18 } elsif ($self->{next_input_character} eq 0x0022) { # "
1931     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1932     $self->{state} = 'DOCTYPE public identifier (double-quoted)';
1933    
1934     if (@{$self->{char}}) {
1935     $self->{next_input_character} = shift @{$self->{char}};
1936     } else {
1937     $self->{set_next_input_character}->($self);
1938     }
1939    
1940     redo A;
1941     } elsif ($self->{next_input_character} eq 0x0027) { # '
1942     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1943     $self->{state} = 'DOCTYPE public identifier (single-quoted)';
1944    
1945     if (@{$self->{char}}) {
1946     $self->{next_input_character} = shift @{$self->{char}};
1947     } else {
1948     $self->{set_next_input_character}->($self);
1949     }
1950    
1951     redo A;
1952     } elsif ($self->{next_input_character} eq 0x003E) { # >
1953     $self->{parse_error}-> (type => 'no PUBLIC literal');
1954    
1955     $self->{state} = 'data';
1956    
1957     if (@{$self->{char}}) {
1958     $self->{next_input_character} = shift @{$self->{char}};
1959     } else {
1960     $self->{set_next_input_character}->($self);
1961     }
1962    
1963    
1964     delete $self->{current_token}->{correct};
1965     return ($self->{current_token}); # DOCTYPE
1966    
1967     redo A;
1968 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
1969 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1970 wakaba 1.18
1971 wakaba 1.1 $self->{state} = 'data';
1972     ## reconsume
1973    
1974 wakaba 1.18 delete $self->{current_token}->{correct};
1975     return ($self->{current_token}); # DOCTYPE
1976 wakaba 1.1
1977     redo A;
1978     } else {
1979 wakaba 1.18 $self->{parse_error}-> (type => 'string after PUBLIC');
1980     $self->{state} = 'bogus DOCTYPE';
1981    
1982     if (@{$self->{char}}) {
1983     $self->{next_input_character} = shift @{$self->{char}};
1984     } else {
1985     $self->{set_next_input_character}->($self);
1986     }
1987    
1988     redo A;
1989     }
1990     } elsif ($self->{state} eq 'DOCTYPE public identifier (double-quoted)') {
1991     if ($self->{next_input_character} == 0x0022) { # "
1992     $self->{state} = 'after DOCTYPE public identifier';
1993    
1994     if (@{$self->{char}}) {
1995     $self->{next_input_character} = shift @{$self->{char}};
1996     } else {
1997     $self->{set_next_input_character}->($self);
1998     }
1999    
2000     redo A;
2001     } elsif ($self->{next_input_character} == -1) {
2002     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
2003    
2004     $self->{state} = 'data';
2005     ## reconsume
2006    
2007     delete $self->{current_token}->{correct};
2008     return ($self->{current_token}); # DOCTYPE
2009    
2010     redo A;
2011     } else {
2012     $self->{current_token}->{public_identifier} # DOCTYPE
2013     .= chr $self->{next_input_character};
2014     ## Stay in the state
2015    
2016     if (@{$self->{char}}) {
2017     $self->{next_input_character} = shift @{$self->{char}};
2018     } else {
2019     $self->{set_next_input_character}->($self);
2020     }
2021    
2022     redo A;
2023     }
2024     } elsif ($self->{state} eq 'DOCTYPE public identifier (single-quoted)') {
2025     if ($self->{next_input_character} == 0x0027) { # '
2026     $self->{state} = 'after DOCTYPE public identifier';
2027    
2028     if (@{$self->{char}}) {
2029     $self->{next_input_character} = shift @{$self->{char}};
2030     } else {
2031     $self->{set_next_input_character}->($self);
2032     }
2033    
2034     redo A;
2035     } elsif ($self->{next_input_character} == -1) {
2036     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
2037    
2038     $self->{state} = 'data';
2039     ## reconsume
2040    
2041     delete $self->{current_token}->{correct};
2042     return ($self->{current_token}); # DOCTYPE
2043    
2044     redo A;
2045     } else {
2046     $self->{current_token}->{public_identifier} # DOCTYPE
2047     .= chr $self->{next_input_character};
2048     ## Stay in the state
2049    
2050     if (@{$self->{char}}) {
2051     $self->{next_input_character} = shift @{$self->{char}};
2052     } else {
2053     $self->{set_next_input_character}->($self);
2054     }
2055    
2056     redo A;
2057     }
2058     } elsif ($self->{state} eq 'after DOCTYPE public identifier') {
2059     if ({
2060     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2061     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2062     }->{$self->{next_input_character}}) {
2063 wakaba 1.1 ## Stay in the state
2064    
2065     if (@{$self->{char}}) {
2066     $self->{next_input_character} = shift @{$self->{char}};
2067     } else {
2068     $self->{set_next_input_character}->($self);
2069     }
2070    
2071     redo A;
2072 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
2073     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2074     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
2075    
2076     if (@{$self->{char}}) {
2077     $self->{next_input_character} = shift @{$self->{char}};
2078     } else {
2079     $self->{set_next_input_character}->($self);
2080     }
2081    
2082     redo A;
2083     } elsif ($self->{next_input_character} == 0x0027) { # '
2084     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2085     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
2086    
2087     if (@{$self->{char}}) {
2088     $self->{next_input_character} = shift @{$self->{char}};
2089     } else {
2090     $self->{set_next_input_character}->($self);
2091     }
2092    
2093     redo A;
2094     } elsif ($self->{next_input_character} == 0x003E) { # >
2095     $self->{state} = 'data';
2096    
2097     if (@{$self->{char}}) {
2098     $self->{next_input_character} = shift @{$self->{char}};
2099     } else {
2100     $self->{set_next_input_character}->($self);
2101     }
2102    
2103    
2104     return ($self->{current_token}); # DOCTYPE
2105    
2106     redo A;
2107     } elsif ($self->{next_input_character} == -1) {
2108     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2109    
2110     $self->{state} = 'data';
2111 wakaba 1.26 ## reconsume
2112 wakaba 1.18
2113     delete $self->{current_token}->{correct};
2114     return ($self->{current_token}); # DOCTYPE
2115    
2116     redo A;
2117     } else {
2118     $self->{parse_error}-> (type => 'string after PUBLIC literal');
2119     $self->{state} = 'bogus DOCTYPE';
2120    
2121     if (@{$self->{char}}) {
2122     $self->{next_input_character} = shift @{$self->{char}};
2123     } else {
2124     $self->{set_next_input_character}->($self);
2125     }
2126    
2127     redo A;
2128 wakaba 1.1 }
2129 wakaba 1.18 } elsif ($self->{state} eq 'before DOCTYPE system identifier') {
2130     if ({
2131     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2132     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2133     }->{$self->{next_input_character}}) {
2134 wakaba 1.1 ## Stay in the state
2135    
2136     if (@{$self->{char}}) {
2137     $self->{next_input_character} = shift @{$self->{char}};
2138     } else {
2139     $self->{set_next_input_character}->($self);
2140     }
2141    
2142     redo A;
2143 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
2144     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2145     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
2146    
2147     if (@{$self->{char}}) {
2148     $self->{next_input_character} = shift @{$self->{char}};
2149     } else {
2150     $self->{set_next_input_character}->($self);
2151     }
2152    
2153     redo A;
2154     } elsif ($self->{next_input_character} == 0x0027) { # '
2155     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2156     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
2157    
2158     if (@{$self->{char}}) {
2159     $self->{next_input_character} = shift @{$self->{char}};
2160     } else {
2161     $self->{set_next_input_character}->($self);
2162     }
2163    
2164     redo A;
2165 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
2166 wakaba 1.18 $self->{parse_error}-> (type => 'no SYSTEM literal');
2167 wakaba 1.1 $self->{state} = 'data';
2168    
2169     if (@{$self->{char}}) {
2170     $self->{next_input_character} = shift @{$self->{char}};
2171     } else {
2172     $self->{set_next_input_character}->($self);
2173     }
2174    
2175    
2176 wakaba 1.18 delete $self->{current_token}->{correct};
2177 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2178    
2179     redo A;
2180     } elsif ($self->{next_input_character} == -1) {
2181 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2182 wakaba 1.18
2183     $self->{state} = 'data';
2184 wakaba 1.26 ## reconsume
2185 wakaba 1.18
2186     delete $self->{current_token}->{correct};
2187     return ($self->{current_token}); # DOCTYPE
2188    
2189     redo A;
2190     } else {
2191 wakaba 1.30 $self->{parse_error}-> (type => 'string after SYSTEM');
2192 wakaba 1.18 $self->{state} = 'bogus DOCTYPE';
2193    
2194     if (@{$self->{char}}) {
2195     $self->{next_input_character} = shift @{$self->{char}};
2196     } else {
2197     $self->{set_next_input_character}->($self);
2198     }
2199    
2200     redo A;
2201     }
2202     } elsif ($self->{state} eq 'DOCTYPE system identifier (double-quoted)') {
2203     if ($self->{next_input_character} == 0x0022) { # "
2204     $self->{state} = 'after DOCTYPE system identifier';
2205    
2206     if (@{$self->{char}}) {
2207     $self->{next_input_character} = shift @{$self->{char}};
2208     } else {
2209     $self->{set_next_input_character}->($self);
2210     }
2211    
2212     redo A;
2213     } elsif ($self->{next_input_character} == -1) {
2214     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2215    
2216 wakaba 1.1 $self->{state} = 'data';
2217     ## reconsume
2218    
2219 wakaba 1.18 delete $self->{current_token}->{correct};
2220 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2221    
2222     redo A;
2223     } else {
2224 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2225     .= chr $self->{next_input_character};
2226     ## Stay in the state
2227    
2228     if (@{$self->{char}}) {
2229     $self->{next_input_character} = shift @{$self->{char}};
2230     } else {
2231     $self->{set_next_input_character}->($self);
2232     }
2233    
2234     redo A;
2235     }
2236     } elsif ($self->{state} eq 'DOCTYPE system identifier (single-quoted)') {
2237     if ($self->{next_input_character} == 0x0027) { # '
2238     $self->{state} = 'after DOCTYPE system identifier';
2239    
2240     if (@{$self->{char}}) {
2241     $self->{next_input_character} = shift @{$self->{char}};
2242     } else {
2243     $self->{set_next_input_character}->($self);
2244     }
2245    
2246     redo A;
2247     } elsif ($self->{next_input_character} == -1) {
2248     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2249    
2250     $self->{state} = 'data';
2251     ## reconsume
2252    
2253     delete $self->{current_token}->{correct};
2254     return ($self->{current_token}); # DOCTYPE
2255    
2256     redo A;
2257     } else {
2258     $self->{current_token}->{system_identifier} # DOCTYPE
2259     .= chr $self->{next_input_character};
2260     ## Stay in the state
2261    
2262     if (@{$self->{char}}) {
2263     $self->{next_input_character} = shift @{$self->{char}};
2264     } else {
2265     $self->{set_next_input_character}->($self);
2266     }
2267    
2268     redo A;
2269     }
2270     } elsif ($self->{state} eq 'after DOCTYPE system identifier') {
2271     if ({
2272     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2273     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2274     }->{$self->{next_input_character}}) {
2275     ## Stay in the state
2276    
2277     if (@{$self->{char}}) {
2278     $self->{next_input_character} = shift @{$self->{char}};
2279     } else {
2280     $self->{set_next_input_character}->($self);
2281     }
2282    
2283     redo A;
2284     } elsif ($self->{next_input_character} == 0x003E) { # >
2285     $self->{state} = 'data';
2286    
2287     if (@{$self->{char}}) {
2288     $self->{next_input_character} = shift @{$self->{char}};
2289     } else {
2290     $self->{set_next_input_character}->($self);
2291     }
2292    
2293    
2294     return ($self->{current_token}); # DOCTYPE
2295    
2296     redo A;
2297     } elsif ($self->{next_input_character} == -1) {
2298     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2299    
2300     $self->{state} = 'data';
2301 wakaba 1.26 ## reconsume
2302 wakaba 1.18
2303     delete $self->{current_token}->{correct};
2304     return ($self->{current_token}); # DOCTYPE
2305    
2306     redo A;
2307     } else {
2308     $self->{parse_error}-> (type => 'string after SYSTEM literal');
2309 wakaba 1.1 $self->{state} = 'bogus DOCTYPE';
2310    
2311     if (@{$self->{char}}) {
2312     $self->{next_input_character} = shift @{$self->{char}};
2313     } else {
2314     $self->{set_next_input_character}->($self);
2315     }
2316    
2317     redo A;
2318     }
2319     } elsif ($self->{state} eq 'bogus DOCTYPE') {
2320     if ($self->{next_input_character} == 0x003E) { # >
2321     $self->{state} = 'data';
2322    
2323     if (@{$self->{char}}) {
2324     $self->{next_input_character} = shift @{$self->{char}};
2325     } else {
2326     $self->{set_next_input_character}->($self);
2327     }
2328    
2329    
2330 wakaba 1.18 delete $self->{current_token}->{correct};
2331 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2332    
2333     redo A;
2334     } elsif ($self->{next_input_character} == -1) {
2335 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2336 wakaba 1.1 $self->{state} = 'data';
2337     ## reconsume
2338    
2339 wakaba 1.18 delete $self->{current_token}->{correct};
2340 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2341    
2342     redo A;
2343     } else {
2344     ## Stay in the state
2345    
2346     if (@{$self->{char}}) {
2347     $self->{next_input_character} = shift @{$self->{char}};
2348     } else {
2349     $self->{set_next_input_character}->($self);
2350     }
2351    
2352     redo A;
2353     }
2354     } else {
2355     die "$0: $self->{state}: Unknown state";
2356     }
2357     } # A
2358    
2359     die "$0: _get_next_token: unexpected case";
2360     } # _get_next_token
2361    
2362 wakaba 1.26 sub _tokenize_attempt_to_consume_an_entity ($$) {
2363     my ($self, $in_attr) = @_;
2364 wakaba 1.20
2365     if ({
2366     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2367     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2368     }->{$self->{next_input_character}}) {
2369     ## Don't consume
2370     ## No error
2371     return undef;
2372     } elsif ($self->{next_input_character} == 0x0023) { # #
2373 wakaba 1.1
2374     if (@{$self->{char}}) {
2375     $self->{next_input_character} = shift @{$self->{char}};
2376     } else {
2377     $self->{set_next_input_character}->($self);
2378     }
2379    
2380     if ($self->{next_input_character} == 0x0078 or # x
2381     $self->{next_input_character} == 0x0058) { # X
2382 wakaba 1.26 my $code;
2383 wakaba 1.1 X: {
2384     my $x_char = $self->{next_input_character};
2385    
2386     if (@{$self->{char}}) {
2387     $self->{next_input_character} = shift @{$self->{char}};
2388     } else {
2389     $self->{set_next_input_character}->($self);
2390     }
2391    
2392     if (0x0030 <= $self->{next_input_character} and
2393     $self->{next_input_character} <= 0x0039) { # 0..9
2394 wakaba 1.26 $code ||= 0;
2395     $code *= 0x10;
2396     $code += $self->{next_input_character} - 0x0030;
2397 wakaba 1.1 redo X;
2398     } elsif (0x0061 <= $self->{next_input_character} and
2399     $self->{next_input_character} <= 0x0066) { # a..f
2400 wakaba 1.26 $code ||= 0;
2401     $code *= 0x10;
2402     $code += $self->{next_input_character} - 0x0060 + 9;
2403 wakaba 1.1 redo X;
2404     } elsif (0x0041 <= $self->{next_input_character} and
2405     $self->{next_input_character} <= 0x0046) { # A..F
2406 wakaba 1.26 $code ||= 0;
2407     $code *= 0x10;
2408     $code += $self->{next_input_character} - 0x0040 + 9;
2409 wakaba 1.1 redo X;
2410 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2411 wakaba 1.3 $self->{parse_error}-> (type => 'bare hcro');
2412 wakaba 1.37 unshift @{$self->{char}}, ($x_char, $self->{next_input_character});
2413 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
2414     return undef;
2415     } elsif ($self->{next_input_character} == 0x003B) { # ;
2416    
2417     if (@{$self->{char}}) {
2418     $self->{next_input_character} = shift @{$self->{char}};
2419     } else {
2420     $self->{set_next_input_character}->($self);
2421     }
2422    
2423     } else {
2424 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2425 wakaba 1.1 }
2426    
2427 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2428     $self->{parse_error}-> (type => sprintf 'invalid character reference:U+%04X', $code);
2429     $code = 0xFFFD;
2430     } elsif ($code > 0x10FFFF) {
2431     $self->{parse_error}-> (type => sprintf 'invalid character reference:U-%08X', $code);
2432     $code = 0xFFFD;
2433     } elsif ($code == 0x000D) {
2434     $self->{parse_error}-> (type => 'CR character reference');
2435     $code = 0x000A;
2436     } elsif (0x80 <= $code and $code <= 0x9F) {
2437 wakaba 1.30 $self->{parse_error}-> (type => sprintf 'C1 character reference:U+%04X', $code);
2438 wakaba 1.26 $code = $c1_entity_char->{$code};
2439 wakaba 1.1 }
2440    
2441 wakaba 1.26 return {type => 'character', data => chr $code};
2442 wakaba 1.1 } # X
2443     } elsif (0x0030 <= $self->{next_input_character} and
2444     $self->{next_input_character} <= 0x0039) { # 0..9
2445     my $code = $self->{next_input_character} - 0x0030;
2446    
2447     if (@{$self->{char}}) {
2448     $self->{next_input_character} = shift @{$self->{char}};
2449     } else {
2450     $self->{set_next_input_character}->($self);
2451     }
2452    
2453    
2454     while (0x0030 <= $self->{next_input_character} and
2455     $self->{next_input_character} <= 0x0039) { # 0..9
2456     $code *= 10;
2457     $code += $self->{next_input_character} - 0x0030;
2458    
2459    
2460     if (@{$self->{char}}) {
2461     $self->{next_input_character} = shift @{$self->{char}};
2462     } else {
2463     $self->{set_next_input_character}->($self);
2464     }
2465    
2466     }
2467    
2468     if ($self->{next_input_character} == 0x003B) { # ;
2469    
2470     if (@{$self->{char}}) {
2471     $self->{next_input_character} = shift @{$self->{char}};
2472     } else {
2473     $self->{set_next_input_character}->($self);
2474     }
2475    
2476     } else {
2477 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2478 wakaba 1.1 }
2479    
2480 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2481     $self->{parse_error}-> (type => sprintf 'invalid character reference:U+%04X', $code);
2482     $code = 0xFFFD;
2483     } elsif ($code > 0x10FFFF) {
2484     $self->{parse_error}-> (type => sprintf 'invalid character reference:U-%08X', $code);
2485     $code = 0xFFFD;
2486     } elsif ($code == 0x000D) {
2487     $self->{parse_error}-> (type => 'CR character reference');
2488     $code = 0x000A;
2489 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2490 wakaba 1.30 $self->{parse_error}-> (type => sprintf 'C1 character reference:U+%04X', $code);
2491 wakaba 1.4 $code = $c1_entity_char->{$code};
2492 wakaba 1.1 }
2493    
2494     return {type => 'character', data => chr $code};
2495     } else {
2496 wakaba 1.3 $self->{parse_error}-> (type => 'bare nero');
2497 wakaba 1.1 unshift @{$self->{char}}, ($self->{next_input_character});
2498     $self->{next_input_character} = 0x0023; # #
2499     return undef;
2500     }
2501     } elsif ((0x0041 <= $self->{next_input_character} and
2502     $self->{next_input_character} <= 0x005A) or
2503     (0x0061 <= $self->{next_input_character} and
2504     $self->{next_input_character} <= 0x007A)) {
2505     my $entity_name = chr $self->{next_input_character};
2506    
2507     if (@{$self->{char}}) {
2508     $self->{next_input_character} = shift @{$self->{char}};
2509     } else {
2510     $self->{set_next_input_character}->($self);
2511     }
2512    
2513    
2514     my $value = $entity_name;
2515 wakaba 1.37 my $match = 0;
2516 wakaba 1.16 require Whatpm::_NamedEntityList;
2517     our $EntityChar;
2518 wakaba 1.1
2519     while (length $entity_name < 10 and
2520     ## NOTE: Some number greater than the maximum length of entity name
2521 wakaba 1.16 ((0x0041 <= $self->{next_input_character} and # a
2522     $self->{next_input_character} <= 0x005A) or # x
2523     (0x0061 <= $self->{next_input_character} and # a
2524     $self->{next_input_character} <= 0x007A) or # z
2525     (0x0030 <= $self->{next_input_character} and # 0
2526     $self->{next_input_character} <= 0x0039) or # 9
2527     $self->{next_input_character} == 0x003B)) { # ;
2528 wakaba 1.1 $entity_name .= chr $self->{next_input_character};
2529 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2530     if ($self->{next_input_character} == 0x003B) { # ;
2531 wakaba 1.26 $value = $EntityChar->{$entity_name};
2532 wakaba 1.16 $match = 1;
2533    
2534     if (@{$self->{char}}) {
2535     $self->{next_input_character} = shift @{$self->{char}};
2536     } else {
2537     $self->{set_next_input_character}->($self);
2538     }
2539    
2540     last;
2541 wakaba 1.37 } else {
2542 wakaba 1.26 $value = $EntityChar->{$entity_name};
2543     $match = -1;
2544 wakaba 1.37
2545     if (@{$self->{char}}) {
2546     $self->{next_input_character} = shift @{$self->{char}};
2547     } else {
2548     $self->{set_next_input_character}->($self);
2549     }
2550    
2551 wakaba 1.16 }
2552 wakaba 1.1 } else {
2553     $value .= chr $self->{next_input_character};
2554 wakaba 1.37 $match *= 2;
2555    
2556 wakaba 1.1 if (@{$self->{char}}) {
2557     $self->{next_input_character} = shift @{$self->{char}};
2558     } else {
2559     $self->{set_next_input_character}->($self);
2560     }
2561    
2562 wakaba 1.37 }
2563 wakaba 1.1 }
2564    
2565 wakaba 1.16 if ($match > 0) {
2566     return {type => 'character', data => $value};
2567     } elsif ($match < 0) {
2568 wakaba 1.30 $self->{parse_error}-> (type => 'no refc');
2569 wakaba 1.37 if ($in_attr and $match < -1) {
2570     return {type => 'character', data => '&'.$entity_name};
2571     } else {
2572     return {type => 'character', data => $value};
2573     }
2574 wakaba 1.1 } else {
2575 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2576 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
2577 wakaba 1.26 return {type => 'character', data => '&'.$value};
2578 wakaba 1.1 }
2579     } else {
2580     ## no characters are consumed
2581 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2582 wakaba 1.1 return undef;
2583     }
2584     } # _tokenize_attempt_to_consume_an_entity
2585    
2586     sub _initialize_tree_constructor ($) {
2587     my $self = shift;
2588     ## NOTE: $self->{document} MUST be specified before this method is called
2589     $self->{document}->strict_error_checking (0);
2590     ## TODO: Turn mutation events off # MUST
2591     ## TODO: Turn loose Document option (manakai extension) on
2592 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2593 wakaba 1.1 } # _initialize_tree_constructor
2594    
2595     sub _terminate_tree_constructor ($) {
2596     my $self = shift;
2597     $self->{document}->strict_error_checking (1);
2598     ## TODO: Turn mutation events on
2599     } # _terminate_tree_constructor
2600    
2601     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2602    
2603 wakaba 1.3 { # tree construction stage
2604     my $token;
2605    
2606 wakaba 1.1 sub _construct_tree ($) {
2607     my ($self) = @_;
2608    
2609     ## When an interactive UA render the $self->{document} available
2610     ## to the user, or when it begin accepting user input, are
2611     ## not defined.
2612    
2613     ## Append a character: collect it and all subsequent consecutive
2614     ## characters and insert one Text node whose data is concatenation
2615     ## of all those characters. # MUST
2616    
2617     $token = $self->_get_next_token;
2618    
2619 wakaba 1.3 $self->{insertion_mode} = 'before head';
2620     undef $self->{form_element};
2621     undef $self->{head_element};
2622     $self->{open_elements} = [];
2623     undef $self->{inner_html_node};
2624    
2625     $self->_tree_construction_initial; # MUST
2626     $self->_tree_construction_root_element;
2627     $self->_tree_construction_main;
2628     } # _construct_tree
2629    
2630     sub _tree_construction_initial ($) {
2631     my $self = shift;
2632 wakaba 1.18 INITIAL: {
2633     if ($token->{type} eq 'DOCTYPE') {
2634     ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2635     ## error, switch to a conformance checking mode for another
2636     ## language.
2637     my $doctype_name = $token->{name};
2638     $doctype_name = '' unless defined $doctype_name;
2639     $doctype_name =~ tr/a-z/A-Z/;
2640     if (not defined $token->{name} or # <!DOCTYPE>
2641     defined $token->{public_identifier} or
2642     defined $token->{system_identifier}) {
2643     $self->{parse_error}-> (type => 'not HTML5');
2644     } elsif ($doctype_name ne 'HTML') {
2645     ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2646     $self->{parse_error}-> (type => 'not HTML5');
2647     }
2648    
2649     my $doctype = $self->{document}->create_document_type_definition
2650     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2651     $doctype->public_id ($token->{public_identifier})
2652     if defined $token->{public_identifier};
2653     $doctype->system_id ($token->{system_identifier})
2654     if defined $token->{system_identifier};
2655     ## NOTE: Other DocumentType attributes are null or empty lists.
2656     ## ISSUE: internalSubset = null??
2657     $self->{document}->append_child ($doctype);
2658    
2659     if (not $token->{correct} or $doctype_name ne 'HTML') {
2660     $self->{document}->manakai_compat_mode ('quirks');
2661     } elsif (defined $token->{public_identifier}) {
2662     my $pubid = $token->{public_identifier};
2663     $pubid =~ tr/a-z/A-z/;
2664     if ({
2665     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2666     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2667     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2668     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2669     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2670     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2671     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2672     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2673     "-//IETF//DTD HTML 2.0//EN" => 1,
2674     "-//IETF//DTD HTML 2.1E//EN" => 1,
2675     "-//IETF//DTD HTML 3.0//EN" => 1,
2676     "-//IETF//DTD HTML 3.0//EN//" => 1,
2677     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2678     "-//IETF//DTD HTML 3.2//EN" => 1,
2679     "-//IETF//DTD HTML 3//EN" => 1,
2680     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2681     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2682     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2683     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2684     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2685     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2686     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2687     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2688     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2689     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2690     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2691     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2692     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2693     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2694     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2695     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2696     "-//IETF//DTD HTML STRICT//EN" => 1,
2697     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2698     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2699     "-//IETF//DTD HTML//EN" => 1,
2700     "-//IETF//DTD HTML//EN//2.0" => 1,
2701     "-//IETF//DTD HTML//EN//3.0" => 1,
2702     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2703     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2704     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2705     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2706     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2707     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2708     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2709     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2710     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2711     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2712     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2713     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2714     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2715     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2716     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2717     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2718     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2719     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2720     "-//W3C//DTD HTML 3.2//EN" => 1,
2721     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2722     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2723     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2724     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2725     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2726     "-//W3C//DTD W3 HTML//EN" => 1,
2727     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2728     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2729     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2730     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2731     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2732     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2733     "HTML" => 1,
2734     }->{$pubid}) {
2735     $self->{document}->manakai_compat_mode ('quirks');
2736     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2737     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2738     if (defined $token->{system_identifier}) {
2739     $self->{document}->manakai_compat_mode ('quirks');
2740     } else {
2741     $self->{document}->manakai_compat_mode ('limited quirks');
2742 wakaba 1.3 }
2743 wakaba 1.18 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 Frameset//EN" or
2744     $pubid eq "-//W3C//DTD XHTML 1.0 Transitional//EN") {
2745     $self->{document}->manakai_compat_mode ('limited quirks');
2746     }
2747     }
2748     if (defined $token->{system_identifier}) {
2749     my $sysid = $token->{system_identifier};
2750     $sysid =~ tr/A-Z/a-z/;
2751     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2752     $self->{document}->manakai_compat_mode ('quirks');
2753     }
2754     }
2755    
2756     ## Go to the root element phase.
2757     $token = $self->_get_next_token;
2758     return;
2759     } elsif ({
2760     'start tag' => 1,
2761     'end tag' => 1,
2762     'end-of-file' => 1,
2763     }->{$token->{type}}) {
2764     $self->{parse_error}-> (type => 'no DOCTYPE');
2765     $self->{document}->manakai_compat_mode ('quirks');
2766     ## Go to the root element phase
2767     ## reprocess
2768     return;
2769     } elsif ($token->{type} eq 'character') {
2770     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2771     ## Ignore the token
2772 wakaba 1.26
2773 wakaba 1.18 unless (length $token->{data}) {
2774     ## Stay in the phase
2775     $token = $self->_get_next_token;
2776     redo INITIAL;
2777 wakaba 1.3 }
2778     }
2779 wakaba 1.18
2780     $self->{parse_error}-> (type => 'no DOCTYPE');
2781     $self->{document}->manakai_compat_mode ('quirks');
2782     ## Go to the root element phase
2783     ## reprocess
2784     return;
2785     } elsif ($token->{type} eq 'comment') {
2786     my $comment = $self->{document}->create_comment ($token->{data});
2787     $self->{document}->append_child ($comment);
2788    
2789     ## Stay in the phase.
2790     $token = $self->_get_next_token;
2791     redo INITIAL;
2792     } else {
2793     die "$0: $token->{type}: Unknown token";
2794     }
2795     } # INITIAL
2796 wakaba 1.3 } # _tree_construction_initial
2797    
2798     sub _tree_construction_root_element ($) {
2799     my $self = shift;
2800    
2801     B: {
2802     if ($token->{type} eq 'DOCTYPE') {
2803     $self->{parse_error}-> (type => 'in html:#DOCTYPE');
2804     ## Ignore the token
2805     ## Stay in the phase
2806     $token = $self->_get_next_token;
2807     redo B;
2808     } elsif ($token->{type} eq 'comment') {
2809     my $comment = $self->{document}->create_comment ($token->{data});
2810     $self->{document}->append_child ($comment);
2811     ## Stay in the phase
2812     $token = $self->_get_next_token;
2813     redo B;
2814     } elsif ($token->{type} eq 'character') {
2815 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2816     ## Ignore the token.
2817    
2818 wakaba 1.3 unless (length $token->{data}) {
2819     ## Stay in the phase
2820     $token = $self->_get_next_token;
2821     redo B;
2822     }
2823     }
2824     #
2825     } elsif ({
2826     'start tag' => 1,
2827     'end tag' => 1,
2828     'end-of-file' => 1,
2829     }->{$token->{type}}) {
2830     ## ISSUE: There is an issue in the spec
2831     #
2832     } else {
2833     die "$0: $token->{type}: Unknown token";
2834     }
2835     my $root_element;
2836     $root_element = $self->{document}->create_element_ns
2837     (q<http://www.w3.org/1999/xhtml>, [undef, 'html']);
2838    
2839     $self->{document}->append_child ($root_element);
2840     push @{$self->{open_elements}}, [$root_element, 'html'];
2841     ## reprocess
2842     #redo B;
2843 wakaba 1.35 return; ## Go to the main phase.
2844 wakaba 1.3 } # B
2845     } # _tree_construction_root_element
2846    
2847     sub _reset_insertion_mode ($) {
2848     my $self = shift;
2849    
2850     ## Step 1
2851     my $last;
2852    
2853     ## Step 2
2854     my $i = -1;
2855     my $node = $self->{open_elements}->[$i];
2856    
2857     ## Step 3
2858     S3: {
2859 wakaba 1.29 ## ISSUE: Oops! "If node is the first node in the stack of open
2860     ## elements, then set last to true. If the context element of the
2861     ## HTML fragment parsing algorithm is neither a td element nor a
2862     ## th element, then set node to the context element. (fragment case)":
2863     ## The second "if" is in the scope of the first "if"!?
2864     if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
2865     $last = 1;
2866     if (defined $self->{inner_html_node}) {
2867     if ($self->{inner_html_node}->[1] eq 'td' or
2868     $self->{inner_html_node}->[1] eq 'th') {
2869     #
2870     } else {
2871     $node = $self->{inner_html_node};
2872     }
2873 wakaba 1.3 }
2874     }
2875    
2876     ## Step 4..13
2877     my $new_mode = {
2878     select => 'in select',
2879     td => 'in cell',
2880     th => 'in cell',
2881     tr => 'in row',
2882     tbody => 'in table body',
2883 wakaba 1.47 thead => 'in table body',
2884     tfoot => 'in table body',
2885 wakaba 1.3 caption => 'in caption',
2886     colgroup => 'in column group',
2887     table => 'in table',
2888     head => 'in body', # not in head!
2889     body => 'in body',
2890     frameset => 'in frameset',
2891     }->{$node->[1]};
2892     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2893    
2894     ## Step 14
2895     if ($node->[1] eq 'html') {
2896     unless (defined $self->{head_element}) {
2897     $self->{insertion_mode} = 'before head';
2898     } else {
2899     $self->{insertion_mode} = 'after head';
2900     }
2901     return;
2902     }
2903    
2904     ## Step 15
2905     $self->{insertion_mode} = 'in body' and return if $last;
2906    
2907     ## Step 16
2908     $i--;
2909     $node = $self->{open_elements}->[$i];
2910    
2911     ## Step 17
2912     redo S3;
2913     } # S3
2914     } # _reset_insertion_mode
2915    
2916     sub _tree_construction_main ($) {
2917     my $self = shift;
2918    
2919 wakaba 1.35 my $previous_insertion_mode;
2920 wakaba 1.1
2921     my $active_formatting_elements = [];
2922    
2923     my $reconstruct_active_formatting_elements = sub { # MUST
2924     my $insert = shift;
2925    
2926     ## Step 1
2927     return unless @$active_formatting_elements;
2928    
2929     ## Step 3
2930     my $i = -1;
2931     my $entry = $active_formatting_elements->[$i];
2932    
2933     ## Step 2
2934     return if $entry->[0] eq '#marker';
2935 wakaba 1.3 for (@{$self->{open_elements}}) {
2936 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2937     return;
2938     }
2939     }
2940    
2941     S4: {
2942     ## Step 4
2943     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2944    
2945     ## Step 5
2946     $i--;
2947     $entry = $active_formatting_elements->[$i];
2948    
2949     ## Step 6
2950     if ($entry->[0] eq '#marker') {
2951     #
2952     } else {
2953     my $in_open_elements;
2954 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
2955 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2956     $in_open_elements = 1;
2957     last OE;
2958     }
2959     }
2960     if ($in_open_elements) {
2961     #
2962     } else {
2963     redo S4;
2964     }
2965     }
2966    
2967     ## Step 7
2968     $i++;
2969     $entry = $active_formatting_elements->[$i];
2970     } # S4
2971    
2972     S7: {
2973     ## Step 8
2974     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2975    
2976     ## Step 9
2977     $insert->($clone->[0]);
2978 wakaba 1.3 push @{$self->{open_elements}}, $clone;
2979 wakaba 1.1
2980     ## Step 10
2981 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2982 wakaba 1.1
2983     ## Step 11
2984     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2985     ## Step 7'
2986     $i++;
2987     $entry = $active_formatting_elements->[$i];
2988    
2989     redo S7;
2990     }
2991     } # S7
2992     }; # $reconstruct_active_formatting_elements
2993    
2994     my $clear_up_to_marker = sub {
2995     for (reverse 0..$#$active_formatting_elements) {
2996     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2997     splice @$active_formatting_elements, $_;
2998     return;
2999     }
3000     }
3001     }; # $clear_up_to_marker
3002    
3003 wakaba 1.25 my $parse_rcdata = sub ($$) {
3004     my ($content_model_flag, $insert) = @_;
3005    
3006     ## Step 1
3007     my $start_tag_name = $token->{tag_name};
3008     my $el;
3009    
3010     $el = $self->{document}->create_element_ns
3011     (q<http://www.w3.org/1999/xhtml>, [undef, $start_tag_name]);
3012 wakaba 1.1
3013 wakaba 1.6 for my $attr_name (keys %{ $token->{attributes}}) {
3014 wakaba 1.25 $el->set_attribute_ns (undef, [undef, $attr_name],
3015 wakaba 1.6 $token->{attributes} ->{$attr_name}->{value});
3016     }
3017    
3018 wakaba 1.25
3019     ## Step 2
3020     $insert->($el); # /context node/->append_child ($el)
3021    
3022     ## Step 3
3023 wakaba 1.41 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3024 wakaba 1.13 delete $self->{escape}; # MUST
3025 wakaba 1.25
3026     ## Step 4
3027 wakaba 1.1 my $text = '';
3028     $token = $self->_get_next_token;
3029 wakaba 1.25 while ($token->{type} eq 'character') { # or until stop tokenizing
3030 wakaba 1.1 $text .= $token->{data};
3031     $token = $self->_get_next_token;
3032 wakaba 1.25 }
3033    
3034     ## Step 5
3035 wakaba 1.1 if (length $text) {
3036 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3037     $el->append_child ($text);
3038 wakaba 1.1 }
3039 wakaba 1.25
3040     ## Step 6
3041 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL;
3042 wakaba 1.25
3043     ## Step 7
3044     if ($token->{type} eq 'end tag' and $token->{tag_name} eq $start_tag_name) {
3045 wakaba 1.1 ## Ignore the token
3046 wakaba 1.41 } elsif ($content_model_flag == CDATA_CONTENT_MODEL) {
3047     $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
3048     } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3049     $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
3050 wakaba 1.1 } else {
3051 wakaba 1.41 die "$0: $content_model_flag in parse_rcdata";
3052 wakaba 1.1 }
3053     $token = $self->_get_next_token;
3054 wakaba 1.25 }; # $parse_rcdata
3055 wakaba 1.1
3056 wakaba 1.25 my $script_start_tag = sub ($) {
3057     my $insert = $_[0];
3058 wakaba 1.1 my $script_el;
3059    
3060     $script_el = $self->{document}->create_element_ns
3061     (q<http://www.w3.org/1999/xhtml>, [undef, 'script']);
3062    
3063     for my $attr_name (keys %{ $token->{attributes}}) {
3064     $script_el->set_attribute_ns (undef, [undef, $attr_name],
3065     $token->{attributes} ->{$attr_name}->{value});
3066     }
3067    
3068     ## TODO: mark as "parser-inserted"
3069    
3070 wakaba 1.41 $self->{content_model} = CDATA_CONTENT_MODEL;
3071 wakaba 1.13 delete $self->{escape}; # MUST
3072 wakaba 1.1
3073     my $text = '';
3074     $token = $self->_get_next_token;
3075     while ($token->{type} eq 'character') {
3076     $text .= $token->{data};
3077     $token = $self->_get_next_token;
3078     } # stop if non-character token or tokenizer stops tokenising
3079     if (length $text) {
3080     $script_el->manakai_append_text ($text);
3081     }
3082    
3083 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL;
3084 wakaba 1.1
3085     if ($token->{type} eq 'end tag' and
3086     $token->{tag_name} eq 'script') {
3087     ## Ignore the token
3088     } else {
3089 wakaba 1.3 $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
3090 wakaba 1.1 ## ISSUE: And ignore?
3091     ## TODO: mark as "already executed"
3092     }
3093    
3094 wakaba 1.3 if (defined $self->{inner_html_node}) {
3095     ## TODO: mark as "already executed"
3096     } else {
3097 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3098     ## TODO: insertion point = just before the next input character
3099 wakaba 1.25
3100     $insert->($script_el);
3101 wakaba 1.1
3102     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3103    
3104     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3105     }
3106    
3107     $token = $self->_get_next_token;
3108     }; # $script_start_tag
3109    
3110     my $formatting_end_tag = sub {
3111     my $tag_name = shift;
3112    
3113     FET: {
3114     ## Step 1
3115     my $formatting_element;
3116     my $formatting_element_i_in_active;
3117     AFE: for (reverse 0..$#$active_formatting_elements) {
3118     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
3119     $formatting_element = $active_formatting_elements->[$_];
3120     $formatting_element_i_in_active = $_;
3121     last AFE;
3122     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
3123     last AFE;
3124     }
3125     } # AFE
3126     unless (defined $formatting_element) {
3127 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$tag_name);
3128 wakaba 1.1 ## Ignore the token
3129     $token = $self->_get_next_token;
3130     return;
3131     }
3132     ## has an element in scope
3133     my $in_scope = 1;
3134     my $formatting_element_i_in_open;
3135 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3136     my $node = $self->{open_elements}->[$_];
3137 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3138     if ($in_scope) {
3139     $formatting_element_i_in_open = $_;
3140     last INSCOPE;
3141     } else { # in open elements but not in scope
3142 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3143 wakaba 1.1 ## Ignore the token
3144     $token = $self->_get_next_token;
3145     return;
3146     }
3147     } elsif ({
3148     table => 1, caption => 1, td => 1, th => 1,
3149     button => 1, marquee => 1, object => 1, html => 1,
3150     }->{$node->[1]}) {
3151     $in_scope = 0;
3152     }
3153     } # INSCOPE
3154     unless (defined $formatting_element_i_in_open) {
3155 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3156 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3157     $token = $self->_get_next_token; ## TODO: ok?
3158     return;
3159     }
3160 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3161 wakaba 1.4 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3162 wakaba 1.1 }
3163    
3164     ## Step 2
3165     my $furthest_block;
3166     my $furthest_block_i_in_open;
3167 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3168     my $node = $self->{open_elements}->[$_];
3169 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
3170     #not $phrasing_category->{$node->[1]} and
3171     ($special_category->{$node->[1]} or
3172     $scoping_category->{$node->[1]})) {
3173     $furthest_block = $node;
3174     $furthest_block_i_in_open = $_;
3175     } elsif ($node->[0] eq $formatting_element->[0]) {
3176     last OE;
3177     }
3178     } # OE
3179    
3180     ## Step 3
3181     unless (defined $furthest_block) { # MUST
3182 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3183 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3184     $token = $self->_get_next_token;
3185     return;
3186     }
3187    
3188     ## Step 4
3189 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3190 wakaba 1.1
3191     ## Step 5
3192     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3193     if (defined $furthest_block_parent) {
3194     $furthest_block_parent->remove_child ($furthest_block->[0]);
3195     }
3196    
3197     ## Step 6
3198     my $bookmark_prev_el
3199     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3200     ->[0];
3201    
3202     ## Step 7
3203     my $node = $furthest_block;
3204     my $node_i_in_open = $furthest_block_i_in_open;
3205     my $last_node = $furthest_block;
3206     S7: {
3207     ## Step 1
3208     $node_i_in_open--;
3209 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3210 wakaba 1.1
3211     ## Step 2
3212     my $node_i_in_active;
3213     S7S2: {
3214     for (reverse 0..$#$active_formatting_elements) {
3215     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3216     $node_i_in_active = $_;
3217     last S7S2;
3218     }
3219     }
3220 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3221 wakaba 1.1 redo S7;
3222     } # S7S2
3223    
3224     ## Step 3
3225     last S7 if $node->[0] eq $formatting_element->[0];
3226    
3227     ## Step 4
3228     if ($last_node->[0] eq $furthest_block->[0]) {
3229     $bookmark_prev_el = $node->[0];
3230     }
3231    
3232     ## Step 5
3233     if ($node->[0]->has_child_nodes ()) {
3234     my $clone = [$node->[0]->clone_node (0), $node->[1]];
3235     $active_formatting_elements->[$node_i_in_active] = $clone;
3236 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3237 wakaba 1.1 $node = $clone;
3238     }
3239    
3240     ## Step 6
3241     $node->[0]->append_child ($last_node->[0]);
3242    
3243     ## Step 7
3244     $last_node = $node;
3245    
3246     ## Step 8
3247     redo S7;
3248     } # S7
3249    
3250     ## Step 8
3251     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3252    
3253     ## Step 9
3254     my $clone = [$formatting_element->[0]->clone_node (0),
3255     $formatting_element->[1]];
3256    
3257     ## Step 10
3258     my @cn = @{$furthest_block->[0]->child_nodes};
3259     $clone->[0]->append_child ($_) for @cn;
3260    
3261     ## Step 11
3262     $furthest_block->[0]->append_child ($clone->[0]);
3263    
3264     ## Step 12
3265     my $i;
3266     AFE: for (reverse 0..$#$active_formatting_elements) {
3267     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3268     splice @$active_formatting_elements, $_, 1;
3269     $i-- and last AFE if defined $i;
3270     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3271     $i = $_;
3272     }
3273     } # AFE
3274     splice @$active_formatting_elements, $i + 1, 0, $clone;
3275    
3276     ## Step 13
3277     undef $i;
3278 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3279     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3280     splice @{$self->{open_elements}}, $_, 1;
3281 wakaba 1.1 $i-- and last OE if defined $i;
3282 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3283 wakaba 1.1 $i = $_;
3284     }
3285     } # OE
3286 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3287 wakaba 1.1
3288     ## Step 14
3289     redo FET;
3290     } # FET
3291     }; # $formatting_end_tag
3292    
3293     my $insert_to_current = sub {
3294 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3295 wakaba 1.1 }; # $insert_to_current
3296    
3297     my $insert_to_foster = sub {
3298     my $child = shift;
3299     if ({
3300     table => 1, tbody => 1, tfoot => 1,
3301     thead => 1, tr => 1,
3302 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3303 wakaba 1.1 # MUST
3304     my $foster_parent_element;
3305     my $next_sibling;
3306 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3307     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3308     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3309 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3310     $foster_parent_element = $parent;
3311 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3312 wakaba 1.1 } else {
3313     $foster_parent_element
3314 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3315 wakaba 1.1 }
3316     last OE;
3317     }
3318     } # OE
3319 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3320 wakaba 1.1 unless defined $foster_parent_element;
3321     $foster_parent_element->insert_before
3322     ($child, $next_sibling);
3323     } else {
3324 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
3325 wakaba 1.1 }
3326     }; # $insert_to_foster
3327    
3328     my $in_body = sub {
3329     my $insert = shift;
3330     if ($token->{type} eq 'start tag') {
3331     if ($token->{tag_name} eq 'script') {
3332 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
3333     $script_start_tag->($insert);
3334 wakaba 1.1 return;
3335     } elsif ($token->{tag_name} eq 'style') {
3336 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
3337 wakaba 1.41 $parse_rcdata->(CDATA_CONTENT_MODEL, $insert);
3338 wakaba 1.1 return;
3339     } elsif ({
3340 wakaba 1.35 base => 1, link => 1,
3341 wakaba 1.1 }->{$token->{tag_name}}) {
3342 wakaba 1.25 ## NOTE: This is an "as if in head" code clone, only "-t" differs
3343 wakaba 1.1
3344 wakaba 1.25 {
3345     my $el;
3346    
3347 wakaba 1.1 $el = $self->{document}->create_element_ns
3348     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3349    
3350 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
3351 wakaba 1.1 $el->set_attribute_ns (undef, [undef, $attr_name],
3352 wakaba 1.25 $token->{attributes} ->{$attr_name}->{value});
3353 wakaba 1.1 }
3354    
3355 wakaba 1.25 $insert->($el);
3356     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3357     }
3358    
3359     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3360 wakaba 1.1 $token = $self->_get_next_token;
3361     return;
3362 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
3363     ## NOTE: This is an "as if in head" code clone, only "-t" differs
3364    
3365     {
3366     my $el;
3367    
3368     $el = $self->{document}->create_element_ns
3369     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3370    
3371     for my $attr_name (keys %{ $token->{attributes}}) {
3372     $el->set_attribute_ns (undef, [undef, $attr_name],
3373     $token->{attributes} ->{$attr_name}->{value});
3374     }
3375    
3376     $insert->($el);
3377     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3378     }
3379    
3380     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3381    
3382     unless ($self->{confident}) {
3383     my $charset;
3384     if ($token->{attributes}->{charset}) { ## TODO: And if supported
3385     $charset = $token->{attributes}->{charset}->{value};
3386     }
3387     if ($token->{attributes}->{'http-equiv'}) {
3388 wakaba 1.35 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
3389 wakaba 1.34 if ($token->{attributes}->{'http-equiv'}->{value}
3390     =~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*=
3391     [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
3392     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
3393     $charset = defined $1 ? $1 : defined $2 ? $2 : $3;
3394     } ## TODO: And if supported
3395     }
3396     ## TODO: Change the encoding
3397     }
3398    
3399     $token = $self->_get_next_token;
3400     return;
3401 wakaba 1.1 } elsif ($token->{tag_name} eq 'title') {
3402 wakaba 1.3 $self->{parse_error}-> (type => 'in body:title');
3403 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
3404 wakaba 1.41 $parse_rcdata->(RCDATA_CONTENT_MODEL, sub {
3405 wakaba 1.31 if (defined $self->{head_element}) {
3406     $self->{head_element}->append_child ($_[0]);
3407     } else {
3408     $insert->($_[0]);
3409     }
3410     });
3411 wakaba 1.1 return;
3412     } elsif ($token->{tag_name} eq 'body') {
3413 wakaba 1.3 $self->{parse_error}-> (type => 'in body:body');
3414 wakaba 1.1
3415 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
3416     $self->{open_elements}->[1]->[1] ne 'body') {
3417 wakaba 1.1 ## Ignore the token
3418     } else {
3419 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
3420 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
3421     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
3422     $body_el->set_attribute_ns
3423     (undef, [undef, $attr_name],
3424     $token->{attributes}->{$attr_name}->{value});
3425     }
3426     }
3427     }
3428     $token = $self->_get_next_token;
3429     return;
3430     } elsif ({
3431     address => 1, blockquote => 1, center => 1, dir => 1,
3432     div => 1, dl => 1, fieldset => 1, listing => 1,
3433     menu => 1, ol => 1, p => 1, ul => 1,
3434     pre => 1,
3435     }->{$token->{tag_name}}) {
3436     ## has a p element in scope
3437 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3438 wakaba 1.1 if ($_->[1] eq 'p') {
3439     unshift @{$self->{token}}, $token;
3440     $token = {type => 'end tag', tag_name => 'p'};
3441     return;
3442     } elsif ({
3443     table => 1, caption => 1, td => 1, th => 1,
3444     button => 1, marquee => 1, object => 1, html => 1,
3445     }->{$_->[1]}) {
3446     last INSCOPE;
3447     }
3448     } # INSCOPE
3449    
3450    
3451     {
3452     my $el;
3453    
3454     $el = $self->{document}->create_element_ns
3455     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3456    
3457     for my $attr_name (keys %{ $token->{attributes}}) {
3458     $el->set_attribute_ns (undef, [undef, $attr_name],
3459     $token->{attributes} ->{$attr_name}->{value});
3460     }
3461    
3462     $insert->($el);
3463 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3464 wakaba 1.1 }
3465    
3466     if ($token->{tag_name} eq 'pre') {
3467     $token = $self->_get_next_token;
3468     if ($token->{type} eq 'character') {
3469     $token->{data} =~ s/^\x0A//;
3470     unless (length $token->{data}) {
3471     $token = $self->_get_next_token;
3472     }
3473     }
3474     } else {
3475     $token = $self->_get_next_token;
3476     }
3477     return;
3478     } elsif ($token->{tag_name} eq 'form') {
3479 wakaba 1.3 if (defined $self->{form_element}) {
3480     $self->{parse_error}-> (type => 'in form:form');
3481 wakaba 1.1 ## Ignore the token
3482 wakaba 1.7 $token = $self->_get_next_token;
3483     return;
3484 wakaba 1.1 } else {
3485     ## has a p element in scope
3486 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3487 wakaba 1.1 if ($_->[1] eq 'p') {
3488     unshift @{$self->{token}}, $token;
3489     $token = {type => 'end tag', tag_name => 'p'};
3490     return;
3491     } elsif ({
3492     table => 1, caption => 1, td => 1, th => 1,
3493     button => 1, marquee => 1, object => 1, html => 1,
3494     }->{$_->[1]}) {
3495     last INSCOPE;
3496     }
3497     } # INSCOPE
3498    
3499    
3500     {
3501     my $el;
3502    
3503     $el = $self->{document}->create_element_ns
3504     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3505    
3506     for my $attr_name (keys %{ $token->{attributes}}) {
3507     $el->set_attribute_ns (undef, [undef, $attr_name],
3508     $token->{attributes} ->{$attr_name}->{value});
3509     }
3510    
3511     $insert->($el);
3512 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3513 wakaba 1.1 }
3514    
3515 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
3516 wakaba 1.1 $token = $self->_get_next_token;
3517     return;
3518     }
3519     } elsif ($token->{tag_name} eq 'li') {
3520     ## has a p element in scope
3521 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3522 wakaba 1.1 if ($_->[1] eq 'p') {
3523     unshift @{$self->{token}}, $token;
3524     $token = {type => 'end tag', tag_name => 'p'};
3525     return;
3526     } elsif ({
3527     table => 1, caption => 1, td => 1, th => 1,
3528     button => 1, marquee => 1, object => 1, html => 1,
3529     }->{$_->[1]}) {
3530     last INSCOPE;
3531     }
3532     } # INSCOPE
3533    
3534     ## Step 1
3535     my $i = -1;
3536 wakaba 1.3 my $node = $self->{open_elements}->[$i];
3537 wakaba 1.1 LI: {
3538     ## Step 2
3539     if ($node->[1] eq 'li') {
3540 wakaba 1.8 if ($i != -1) {
3541     $self->{parse_error}-> (type => 'end tag missing:'.
3542     $self->{open_elements}->[-1]->[1]);
3543     }
3544 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3545 wakaba 1.1 last LI;
3546     }
3547    
3548     ## Step 3
3549     if (not $formatting_category->{$node->[1]} and
3550     #not $phrasing_category->{$node->[1]} and
3551     ($special_category->{$node->[1]} or
3552     $scoping_category->{$node->[1]}) and
3553     $node->[1] ne 'address' and $node->[1] ne 'div') {
3554     last LI;
3555     }
3556    
3557     ## Step 4
3558     $i--;
3559 wakaba 1.3 $node = $self->{open_elements}->[$i];
3560 wakaba 1.1 redo LI;
3561     } # LI
3562    
3563    
3564     {
3565     my $el;
3566    
3567     $el = $self->{document}->create_element_ns
3568     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3569    
3570     for my $attr_name (keys %{ $token->{attributes}}) {
3571     $el->set_attribute_ns (undef, [undef, $attr_name],
3572     $token->{attributes} ->{$attr_name}->{value});
3573     }
3574    
3575     $insert->($el);
3576 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3577 wakaba 1.1 }
3578    
3579     $token = $self->_get_next_token;
3580     return;
3581     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
3582     ## has a p element in scope
3583 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3584 wakaba 1.1 if ($_->[1] eq 'p') {
3585     unshift @{$self->{token}}, $token;
3586     $token = {type => 'end tag', tag_name => 'p'};
3587     return;
3588     } elsif ({
3589     table => 1, caption => 1, td => 1, th => 1,
3590     button => 1, marquee => 1, object => 1, html => 1,
3591     }->{$_->[1]}) {
3592     last INSCOPE;
3593     }
3594     } # INSCOPE
3595    
3596     ## Step 1
3597     my $i = -1;
3598 wakaba 1.3 my $node = $self->{open_elements}->[$i];
3599 wakaba 1.1 LI: {
3600     ## Step 2
3601     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
3602 wakaba 1.8 if ($i != -1) {
3603     $self->{parse_error}-> (type => 'end tag missing:'.
3604     $self->{open_elements}->[-1]->[1]);
3605     }
3606 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3607 wakaba 1.1 last LI;
3608     }
3609    
3610     ## Step 3
3611     if (not $formatting_category->{$node->[1]} and
3612     #not $phrasing_category->{$node->[1]} and
3613     ($special_category->{$node->[1]} or
3614     $scoping_category->{$node->[1]}) and
3615     $node->[1] ne 'address' and $node->[1] ne 'div') {
3616     last LI;
3617     }
3618    
3619     ## Step 4
3620     $i--;
3621 wakaba 1.3 $node = $self->{open_elements}->[$i];
3622 wakaba 1.1 redo LI;
3623     } # LI
3624    
3625    
3626     {
3627     my $el;
3628    
3629     $el = $self->{document}->create_element_ns
3630     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3631    
3632     for my $attr_name (keys %{ $token->{attributes}}) {
3633     $el->set_attribute_ns (undef, [undef, $attr_name],
3634     $token->{attributes} ->{$attr_name}->{value});
3635     }
3636    
3637     $insert->($el);
3638 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3639 wakaba 1.1 }
3640    
3641     $token = $self->_get_next_token;
3642     return;
3643     } elsif ($token->{tag_name} eq 'plaintext') {
3644     ## has a p element in scope
3645 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3646 wakaba 1.1 if ($_->[1] eq 'p') {
3647     unshift @{$self->{token}}, $token;
3648     $token = {type => 'end tag', tag_name => 'p'};
3649     return;
3650     } elsif ({
3651     table => 1, caption => 1, td => 1, th => 1,
3652     button => 1, marquee => 1, object => 1, html => 1,
3653     }->{$_->[1]}) {
3654     last INSCOPE;
3655     }
3656     } # INSCOPE
3657    
3658    
3659     {
3660     my $el;
3661    
3662     $el = $self->{document}->create_element_ns
3663     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3664    
3665     for my $attr_name (keys %{ $token->{attributes}}) {
3666     $el->set_attribute_ns (undef, [undef, $attr_name],
3667     $token->{attributes} ->{$attr_name}->{value});
3668     }
3669    
3670     $insert->($el);
3671 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3672 wakaba 1.1 }
3673    
3674    
3675 wakaba 1.41 $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
3676 wakaba 1.1
3677     $token = $self->_get_next_token;
3678     return;
3679     } elsif ({
3680     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3681     }->{$token->{tag_name}}) {
3682     ## has a p element in scope
3683 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3684     my $node = $self->{open_elements}->[$_];
3685 wakaba 1.1 if ($node->[1] eq 'p') {
3686     unshift @{$self->{token}}, $token;
3687     $token = {type => 'end tag', tag_name => 'p'};
3688     return;
3689     } elsif ({
3690     table => 1, caption => 1, td => 1, th => 1,
3691     button => 1, marquee => 1, object => 1, html => 1,
3692     }->{$node->[1]}) {
3693     last INSCOPE;
3694     }
3695     } # INSCOPE
3696    
3697 wakaba 1.23 ## NOTE: See <http://html5.org/tools/web-apps-tracker?from=925&to=926>
3698 wakaba 1.1 ## has an element in scope
3699 wakaba 1.23 #my $i;
3700     #INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3701     # my $node = $self->{open_elements}->[$_];
3702     # if ({
3703     # h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3704     # }->{$node->[1]}) {
3705     # $i = $_;
3706     # last INSCOPE;
3707     # } elsif ({
3708     # table => 1, caption => 1, td => 1, th => 1,
3709     # button => 1, marquee => 1, object => 1, html => 1,
3710     # }->{$node->[1]}) {
3711     # last INSCOPE;
3712     # }
3713     #} # INSCOPE
3714     #
3715     #if (defined $i) {
3716     # !!! parse-error (type => 'in hn:hn');
3717     # splice @{$self->{open_elements}}, $i;
3718     #}
3719 wakaba 1.1
3720    
3721     {
3722     my $el;
3723    
3724     $el = $self->{document}->create_element_ns
3725     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3726    
3727     for my $attr_name (keys %{ $token->{attributes}}) {
3728     $el->set_attribute_ns (undef, [undef, $attr_name],
3729     $token->{attributes} ->{$attr_name}->{value});
3730     }
3731    
3732     $insert->($el);
3733 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3734 wakaba 1.1 }
3735    
3736    
3737     $token = $self->_get_next_token;
3738     return;
3739     } elsif ($token->{tag_name} eq 'a') {
3740     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
3741     my $node = $active_formatting_elements->[$i];
3742     if ($node->[1] eq 'a') {
3743 wakaba 1.3 $self->{parse_error}-> (type => 'in a:a');
3744 wakaba 1.1
3745     unshift @{$self->{token}}, $token;
3746     $token = {type => 'end tag', tag_name => 'a'};
3747     $formatting_end_tag->($token->{tag_name});
3748    
3749     AFE2: for (reverse 0..$#$active_formatting_elements) {
3750     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3751     splice @$active_formatting_elements, $_, 1;
3752     last AFE2;
3753     }
3754     } # AFE2
3755 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3756     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
3757     splice @{$self->{open_elements}}, $_, 1;
3758 wakaba 1.1 last OE;
3759     }
3760     } # OE
3761     last AFE;
3762     } elsif ($node->[0] eq '#marker') {
3763     last AFE;
3764     }
3765     } # AFE
3766    
3767     $reconstruct_active_formatting_elements->($insert_to_current);
3768    
3769    
3770     {
3771     my $el;
3772    
3773     $el = $self->{document}->create_element_ns
3774     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3775    
3776     for my $attr_name (keys %{ $token->{attributes}}) {
3777     $el->set_attribute_ns (undef, [undef, $attr_name],
3778     $token->{attributes} ->{$attr_name}->{value});
3779     }
3780    
3781     $insert->($el);
3782 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3783 wakaba 1.1 }
3784    
3785 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
3786 wakaba 1.1
3787     $token = $self->_get_next_token;
3788     return;
3789     } elsif ({
3790     b => 1, big => 1, em => 1, font => 1, i => 1,
3791 wakaba 1.19 s => 1, small => 1, strile => 1,
3792 wakaba 1.1 strong => 1, tt => 1, u => 1,
3793     }->{$token->{tag_name}}) {
3794     $reconstruct_active_formatting_elements->($insert_to_current);
3795    
3796    
3797     {
3798     my $el;
3799    
3800     $el = $self->{document}->create_element_ns
3801     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3802    
3803     for my $attr_name (keys %{ $token->{attributes}}) {
3804     $el->set_attribute_ns (undef, [undef, $attr_name],
3805     $token->{attributes} ->{$attr_name}->{value});
3806     }
3807    
3808     $insert->($el);
3809 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3810 wakaba 1.1 }
3811    
3812 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
3813 wakaba 1.1
3814     $token = $self->_get_next_token;
3815     return;
3816 wakaba 1.19 } elsif ($token->{tag_name} eq 'nobr') {
3817     $reconstruct_active_formatting_elements->($insert_to_current);
3818    
3819     ## has a |nobr| element in scope
3820     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3821     my $node = $self->{open_elements}->[$_];
3822     if ($node->[1] eq 'nobr') {
3823 wakaba 1.31 $self->{parse_error}-> (type => 'not closed:nobr');
3824 wakaba 1.19 unshift @{$self->{token}}, $token;
3825     $token = {type => 'end tag', tag_name => 'nobr'};
3826     return;
3827     } elsif ({
3828     table => 1, caption => 1, td => 1, th => 1,
3829     button => 1, marquee => 1, object => 1, html => 1,
3830     }->{$node->[1]}) {
3831     last INSCOPE;
3832     }
3833     } # INSCOPE
3834    
3835    
3836     {
3837     my $el;
3838    
3839     $el = $self->{document}->create_element_ns
3840     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3841    
3842     for my $attr_name (keys %{ $token->{attributes}}) {
3843     $el->set_attribute_ns (undef, [undef, $attr_name],
3844     $token->{attributes} ->{$attr_name}->{value});
3845     }
3846    
3847     $insert->($el);
3848     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3849     }
3850    
3851     push @$active_formatting_elements, $self->{open_elements}->[-1];
3852    
3853     $token = $self->_get_next_token;
3854     return;
3855 wakaba 1.1 } elsif ($token->{tag_name} eq 'button') {
3856     ## has a button element in scope
3857 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3858     my $node = $self->{open_elements}->[$_];
3859 wakaba 1.1 if ($node->[1] eq 'button') {
3860 wakaba 1.3 $self->{parse_error}-> (type => 'in button:button');
3861 wakaba 1.1 unshift @{$self->{token}}, $token;
3862     $token = {type => 'end tag', tag_name => 'button'};
3863     return;
3864     } elsif ({
3865     table => 1, caption => 1, td => 1, th => 1,
3866     button => 1, marquee => 1, object => 1, html => 1,
3867     }->{$node->[1]}) {
3868     last INSCOPE;
3869     }
3870     } # INSCOPE
3871    
3872     $reconstruct_active_formatting_elements->($insert_to_current);
3873    
3874    
3875     {
3876     my $el;
3877    
3878     $el = $self->{document}->create_element_ns
3879     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3880    
3881     for my $attr_name (keys %{ $token->{attributes}}) {
3882     $el->set_attribute_ns (undef, [undef, $attr_name],
3883     $token->{attributes} ->{$attr_name}->{value});
3884     }
3885    
3886     $insert->($el);
3887 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3888 wakaba 1.1 }
3889    
3890     push @$active_formatting_elements, ['#marker', ''];
3891    
3892     $token = $self->_get_next_token;
3893     return;
3894     } elsif ($token->{tag_name} eq 'marquee' or
3895     $token->{tag_name} eq 'object') {
3896     $reconstruct_active_formatting_elements->($insert_to_current);
3897    
3898    
3899     {
3900     my $el;
3901    
3902     $el = $self->{document}->create_element_ns
3903     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3904    
3905     for my $attr_name (keys %{ $token->{attributes}}) {
3906     $el->set_attribute_ns (undef, [undef, $attr_name],
3907     $token->{attributes} ->{$attr_name}->{value});
3908     }
3909    
3910     $insert->($el);
3911 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3912 wakaba 1.1 }
3913    
3914     push @$active_formatting_elements, ['#marker', ''];
3915    
3916     $token = $self->_get_next_token;
3917     return;
3918     } elsif ($token->{tag_name} eq 'xmp') {
3919     $reconstruct_active_formatting_elements->($insert_to_current);
3920 wakaba 1.41 $parse_rcdata->(CDATA_CONTENT_MODEL, $insert);
3921 wakaba 1.1 return;
3922     } elsif ($token->{tag_name} eq 'table') {
3923     ## has a p element in scope
3924 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3925 wakaba 1.1 if ($_->[1] eq 'p') {
3926     unshift @{$self->{token}}, $token;
3927     $token = {type => 'end tag', tag_name => 'p'};
3928     return;
3929     } elsif ({
3930     table => 1, caption => 1, td => 1, th => 1,
3931     button => 1, marquee => 1, object => 1, html => 1,
3932     }->{$_->[1]}) {
3933     last INSCOPE;
3934     }
3935     } # INSCOPE
3936    
3937    
3938     {
3939     my $el;
3940    
3941     $el = $self->{document}->create_element_ns
3942     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3943    
3944     for my $attr_name (keys %{ $token->{attributes}}) {
3945     $el->set_attribute_ns (undef, [undef, $attr_name],
3946     $token->{attributes} ->{$attr_name}->{value});
3947     }
3948    
3949     $insert->($el);
3950 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3951 wakaba 1.1 }
3952    
3953    
3954 wakaba 1.3 $self->{insertion_mode} = 'in table';
3955 wakaba 1.1
3956     $token = $self->_get_next_token;
3957     return;
3958     } elsif ({
3959     area => 1, basefont => 1, bgsound => 1, br => 1,
3960     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
3961     image => 1,
3962     }->{$token->{tag_name}}) {
3963     if ($token->{tag_name} eq 'image') {
3964 wakaba 1.3 $self->{parse_error}-> (type => 'image');
3965 wakaba 1.1 $token->{tag_name} = 'img';
3966     }
3967 wakaba 1.31
3968     ## NOTE: There is an "as if <br>" code clone.
3969 wakaba 1.1 $reconstruct_active_formatting_elements->($insert_to_current);
3970    
3971    
3972     {
3973     my $el;
3974    
3975     $el = $self->{document}->create_element_ns
3976     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3977    
3978     for my $attr_name (keys %{ $token->{attributes}}) {
3979     $el->set_attribute_ns (undef, [undef, $attr_name],
3980     $token->{attributes} ->{$attr_name}->{value});
3981     }
3982    
3983     $insert->($el);
3984 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3985 wakaba 1.1 }
3986    
3987 wakaba 1.3 pop @{$self->{open_elements}};
3988 wakaba 1.1
3989     $token = $self->_get_next_token;
3990     return;
3991     } elsif ($token->{tag_name} eq 'hr') {
3992     ## has a p element in scope
3993 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3994 wakaba 1.1 if ($_->[1] eq 'p') {
3995     unshift @{$self->{token}}, $token;
3996     $token = {type => 'end tag', tag_name => 'p'};
3997     return;
3998     } elsif ({
3999     table => 1, caption => 1, td => 1, th => 1,
4000     button => 1, marquee => 1, object => 1, html => 1,
4001     }->{$_->[1]}) {
4002     last INSCOPE;
4003     }
4004     } # INSCOPE
4005    
4006    
4007     {
4008     my $el;
4009    
4010     $el = $self->{document}->create_element_ns
4011     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4012    
4013     for my $attr_name (keys %{ $token->{attributes}}) {
4014     $el->set_attribute_ns (undef, [undef, $attr_name],
4015     $token->{attributes} ->{$attr_name}->{value});
4016     }
4017    
4018     $insert->($el);
4019 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4020 wakaba 1.1 }
4021    
4022 wakaba 1.3 pop @{$self->{open_elements}};
4023 wakaba 1.1
4024     $token = $self->_get_next_token;
4025     return;
4026     } elsif ($token->{tag_name} eq 'input') {
4027     $reconstruct_active_formatting_elements->($insert_to_current);
4028    
4029    
4030     {
4031     my $el;
4032    
4033     $el = $self->{document}->create_element_ns
4034     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4035    
4036     for my $attr_name (keys %{ $token->{attributes}}) {
4037     $el->set_attribute_ns (undef, [undef, $attr_name],
4038     $token->{attributes} ->{$attr_name}->{value});
4039     }
4040    
4041     $insert->($el);
4042 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4043 wakaba 1.1 }
4044    
4045 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
4046     pop @{$self->{open_elements}};
4047 wakaba 1.1
4048     $token = $self->_get_next_token;
4049     return;
4050     } elsif ($token->{tag_name} eq 'isindex') {
4051 wakaba 1.3 $self->{parse_error}-> (type => 'isindex');
4052 wakaba 1.1
4053 wakaba 1.3 if (defined $self->{form_element}) {
4054 wakaba 1.1 ## Ignore the token
4055     $token = $self->_get_next_token;
4056     return;
4057     } else {
4058     my $at = $token->{attributes};
4059 wakaba 1.22 my $form_attrs;
4060     $form_attrs->{action} = $at->{action} if $at->{action};
4061     my $prompt_attr = $at->{prompt};
4062 wakaba 1.1 $at->{name} = {name => 'name', value => 'isindex'};
4063 wakaba 1.22 delete $at->{action};
4064     delete $at->{prompt};
4065 wakaba 1.1 my @tokens = (
4066 wakaba 1.22 {type => 'start tag', tag_name => 'form',
4067     attributes => $form_attrs},
4068 wakaba 1.1 {type => 'start tag', tag_name => 'hr'},
4069     {type => 'start tag', tag_name => 'p'},
4070     {type => 'start tag', tag_name => 'label'},
4071 wakaba 1.22 );
4072     if ($prompt_attr) {
4073     push @tokens, {type => 'character', data => $prompt_attr->{value}};
4074     } else {
4075     push @tokens, {type => 'character',
4076     data => 'This is a searchable index. Insert your search keywords here: '}; # SHOULD
4077     ## TODO: make this configurable
4078     }
4079     push @tokens,
4080 wakaba 1.1 {type => 'start tag', tag_name => 'input', attributes => $at},
4081     #{type => 'character', data => ''}, # SHOULD
4082     {type => 'end tag', tag_name => 'label'},
4083     {type => 'end tag', tag_name => 'p'},
4084     {type => 'start tag', tag_name => 'hr'},
4085 wakaba 1.22 {type => 'end tag', tag_name => 'form'};
4086 wakaba 1.1 $token = shift @tokens;
4087     unshift @{$self->{token}}, (@tokens);
4088     return;
4089     }
4090 wakaba 1.25 } elsif ($token->{tag_name} eq 'textarea') {
4091 wakaba 1.1 my $tag_name = $token->{tag_name};
4092     my $el;
4093    
4094     $el = $self->{document}->create_element_ns
4095     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4096    
4097     for my $attr_name (keys %{ $token->{attributes}}) {
4098     $el->set_attribute_ns (undef, [undef, $attr_name],
4099     $token->{attributes} ->{$attr_name}->{value});
4100     }
4101    
4102    
4103 wakaba 1.25 ## TODO: $self->{form_element} if defined
4104 wakaba 1.41 $self->{content_model} = RCDATA_CONTENT_MODEL;
4105 wakaba 1.13 delete $self->{escape}; # MUST
4106 wakaba 1.1
4107     $insert->($el);
4108    
4109     my $text = '';
4110 wakaba 1.25 $token = $self->_get_next_token;
4111     if ($token->{type} eq 'character') {
4112     $token->{data} =~ s/^\x0A//;
4113     unless (length $token->{data}) {
4114     $token = $self->_get_next_token;
4115 wakaba 1.8 }
4116     }
4117 wakaba 1.1 while ($token->{type} eq 'character') {
4118     $text .= $token->{data};
4119     $token = $self->_get_next_token;
4120     }
4121     if (length $text) {
4122     $el->manakai_append_text ($text);
4123     }
4124    
4125 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL;
4126 wakaba 1.1
4127     if ($token->{type} eq 'end tag' and
4128     $token->{tag_name} eq $tag_name) {
4129     ## Ignore the token
4130     } else {
4131 wakaba 1.25 $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
4132 wakaba 1.1 }
4133     $token = $self->_get_next_token;
4134     return;
4135 wakaba 1.25 } elsif ({
4136     iframe => 1,
4137     noembed => 1,
4138     noframes => 1,
4139     noscript => 0, ## TODO: 1 if scripting is enabled
4140     }->{$token->{tag_name}}) {
4141 wakaba 1.45 ## NOTE: There are two "as if in body" code clones.
4142 wakaba 1.41 $parse_rcdata->(CDATA_CONTENT_MODEL, $insert);
4143 wakaba 1.25 return;
4144 wakaba 1.1 } elsif ($token->{tag_name} eq 'select') {
4145     $reconstruct_active_formatting_elements->($insert_to_current);
4146    
4147    
4148     {
4149     my $el;
4150    
4151     $el = $self->{document}->create_element_ns
4152     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4153    
4154     for my $attr_name (keys %{ $token->{attributes}}) {
4155     $el->set_attribute_ns (undef, [undef, $attr_name],
4156     $token->{attributes} ->{$attr_name}->{value});
4157     }
4158    
4159     $insert->($el);
4160 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4161 wakaba 1.1 }
4162    
4163    
4164 wakaba 1.3 $self->{insertion_mode} = 'in select';
4165 wakaba 1.1 $token = $self->_get_next_token;
4166     return;
4167     } elsif ({
4168     caption => 1, col => 1, colgroup => 1, frame => 1,
4169     frameset => 1, head => 1, option => 1, optgroup => 1,
4170     tbody => 1, td => 1, tfoot => 1, th => 1,
4171     thead => 1, tr => 1,
4172     }->{$token->{tag_name}}) {
4173 wakaba 1.3 $self->{parse_error}-> (type => 'in body:'.$token->{tag_name});
4174 wakaba 1.1 ## Ignore the token
4175     $token = $self->_get_next_token;
4176     return;
4177    
4178     ## ISSUE: An issue on HTML5 new elements in the spec.
4179     } else {
4180     $reconstruct_active_formatting_elements->($insert_to_current);
4181    
4182    
4183     {
4184     my $el;
4185    
4186     $el = $self->{document}->create_element_ns
4187     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4188    
4189     for my $attr_name (keys %{ $token->{attributes}}) {
4190     $el->set_attribute_ns (undef, [undef, $attr_name],
4191     $token->{attributes} ->{$attr_name}->{value});
4192     }
4193    
4194     $insert->($el);
4195 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4196 wakaba 1.1 }
4197    
4198    
4199     $token = $self->_get_next_token;
4200     return;
4201     }
4202     } elsif ($token->{type} eq 'end tag') {
4203     if ($token->{tag_name} eq 'body') {
4204 wakaba 1.20 if (@{$self->{open_elements}} > 1 and
4205     $self->{open_elements}->[1]->[1] eq 'body') {
4206     for (@{$self->{open_elements}}) {
4207     unless ({
4208     dd => 1, dt => 1, li => 1, p => 1, td => 1,
4209     th => 1, tr => 1, body => 1, html => 1,
4210 wakaba 1.31 tbody => 1, tfoot => 1, thead => 1,
4211 wakaba 1.20 }->{$_->[1]}) {
4212     $self->{parse_error}-> (type => 'not closed:'.$_->[1]);
4213     }
4214 wakaba 1.1 }
4215 wakaba 1.20
4216 wakaba 1.3 $self->{insertion_mode} = 'after body';
4217 wakaba 1.1 $token = $self->_get_next_token;
4218     return;
4219     } else {
4220 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4221 wakaba 1.1 ## Ignore the token
4222     $token = $self->_get_next_token;
4223     return;
4224     }
4225     } elsif ($token->{tag_name} eq 'html') {
4226 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
4227 wakaba 1.1 ## ISSUE: There is an issue in the spec.
4228 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
4229     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
4230 wakaba 1.1 }
4231 wakaba 1.3 $self->{insertion_mode} = 'after body';
4232 wakaba 1.1 ## reprocess
4233     return;
4234     } else {
4235 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4236 wakaba 1.1 ## Ignore the token
4237     $token = $self->_get_next_token;
4238     return;
4239     }
4240     } elsif ({
4241     address => 1, blockquote => 1, center => 1, dir => 1,
4242     div => 1, dl => 1, fieldset => 1, listing => 1,
4243     menu => 1, ol => 1, pre => 1, ul => 1,
4244     p => 1,
4245     dd => 1, dt => 1, li => 1,
4246     button => 1, marquee => 1, object => 1,
4247     }->{$token->{tag_name}}) {
4248     ## has an element in scope
4249     my $i;
4250 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4251     my $node = $self->{open_elements}->[$_];
4252 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4253     ## generate implied end tags
4254     if ({
4255     dd => ($token->{tag_name} ne 'dd'),
4256     dt => ($token->{tag_name} ne 'dt'),
4257     li => ($token->{tag_name} ne 'li'),
4258     p => ($token->{tag_name} ne 'p'),
4259     td => 1, th => 1, tr => 1,
4260 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4261 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4262 wakaba 1.1 unshift @{$self->{token}}, $token;
4263     $token = {type => 'end tag',
4264 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4265 wakaba 1.1 return;
4266     }
4267     $i = $_;
4268     last INSCOPE unless $token->{tag_name} eq 'p';
4269     } elsif ({
4270     table => 1, caption => 1, td => 1, th => 1,
4271     button => 1, marquee => 1, object => 1, html => 1,
4272     }->{$node->[1]}) {
4273     last INSCOPE;
4274     }
4275     } # INSCOPE
4276    
4277 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4278 wakaba 1.32 if (defined $i) {
4279     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4280     } else {
4281     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4282     }
4283 wakaba 1.1 }
4284    
4285 wakaba 1.31 if (defined $i) {
4286     splice @{$self->{open_elements}}, $i;
4287     } elsif ($token->{tag_name} eq 'p') {
4288     ## As if <p>, then reprocess the current token
4289     my $el;
4290    
4291     $el = $self->{document}->create_element_ns
4292     (q<http://www.w3.org/1999/xhtml>, [undef, 'p']);
4293    
4294     $insert->($el);
4295     }
4296 wakaba 1.1 $clear_up_to_marker->()
4297     if {
4298     button => 1, marquee => 1, object => 1,
4299     }->{$token->{tag_name}};
4300 wakaba 1.12 $token = $self->_get_next_token;
4301     return;
4302     } elsif ($token->{tag_name} eq 'form') {
4303     ## has an element in scope
4304     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4305     my $node = $self->{open_elements}->[$_];
4306     if ($node->[1] eq $token->{tag_name}) {
4307     ## generate implied end tags
4308     if ({
4309     dd => 1, dt => 1, li => 1, p => 1,
4310     td => 1, th => 1, tr => 1,
4311 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4312 wakaba 1.12 }->{$self->{open_elements}->[-1]->[1]}) {
4313     unshift @{$self->{token}}, $token;
4314     $token = {type => 'end tag',
4315     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4316     return;
4317     }
4318     last INSCOPE;
4319     } elsif ({
4320     table => 1, caption => 1, td => 1, th => 1,
4321     button => 1, marquee => 1, object => 1, html => 1,
4322     }->{$node->[1]}) {
4323     last INSCOPE;
4324     }
4325     } # INSCOPE
4326    
4327     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
4328     pop @{$self->{open_elements}};
4329     } else {
4330     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4331     }
4332    
4333     undef $self->{form_element};
4334 wakaba 1.1 $token = $self->_get_next_token;
4335     return;
4336     } elsif ({
4337     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
4338     }->{$token->{tag_name}}) {
4339     ## has an element in scope
4340     my $i;
4341 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4342     my $node = $self->{open_elements}->[$_];
4343 wakaba 1.1 if ({
4344     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
4345     }->{$node->[1]}) {
4346     ## generate implied end tags
4347     if ({
4348     dd => 1, dt => 1, li => 1, p => 1,
4349     td => 1, th => 1, tr => 1,
4350 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4351 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4352 wakaba 1.1 unshift @{$self->{token}}, $token;
4353     $token = {type => 'end tag',
4354 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4355 wakaba 1.1 return;
4356     }
4357     $i = $_;
4358     last INSCOPE;
4359     } elsif ({
4360     table => 1, caption => 1, td => 1, th => 1,
4361     button => 1, marquee => 1, object => 1, html => 1,
4362     }->{$node->[1]}) {
4363     last INSCOPE;
4364     }
4365     } # INSCOPE
4366    
4367 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4368     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4369 wakaba 1.1 }
4370    
4371 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
4372 wakaba 1.1 $token = $self->_get_next_token;
4373     return;
4374     } elsif ({
4375     a => 1,
4376     b => 1, big => 1, em => 1, font => 1, i => 1,
4377     nobr => 1, s => 1, small => 1, strile => 1,
4378     strong => 1, tt => 1, u => 1,
4379     }->{$token->{tag_name}}) {
4380     $formatting_end_tag->($token->{tag_name});
4381 wakaba 1.31 return;
4382     } elsif ($token->{tag_name} eq 'br') {
4383     $self->{parse_error}-> (type => 'unmatched end tag:br');
4384    
4385     ## As if <br>
4386     $reconstruct_active_formatting_elements->($insert_to_current);
4387    
4388     my $el;
4389    
4390     $el = $self->{document}->create_element_ns
4391     (q<http://www.w3.org/1999/xhtml>, [undef, 'br']);
4392    
4393     $insert->($el);
4394    
4395     ## Ignore the token.
4396     $token = $self->_get_next_token;
4397 wakaba 1.1 return;
4398     } elsif ({
4399     caption => 1, col => 1, colgroup => 1, frame => 1,
4400     frameset => 1, head => 1, option => 1, optgroup => 1,
4401     tbody => 1, td => 1, tfoot => 1, th => 1,
4402     thead => 1, tr => 1,
4403 wakaba 1.31 area => 1, basefont => 1, bgsound => 1,
4404 wakaba 1.1 embed => 1, hr => 1, iframe => 1, image => 1,
4405 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
4406 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
4407     table => 1, textarea => 1, wbr => 1,
4408     noscript => 0, ## TODO: if scripting is enabled
4409     }->{$token->{tag_name}}) {
4410 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4411 wakaba 1.1 ## Ignore the token
4412     $token = $self->_get_next_token;
4413     return;
4414    
4415     ## ISSUE: Issue on HTML5 new elements in spec
4416    
4417     } else {
4418     ## Step 1
4419     my $node_i = -1;
4420 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
4421 wakaba 1.1
4422     ## Step 2
4423     S2: {
4424     if ($node->[1] eq $token->{tag_name}) {
4425     ## Step 1
4426     ## generate implied end tags
4427     if ({
4428     dd => 1, dt => 1, li => 1, p => 1,
4429     td => 1, th => 1, tr => 1,
4430 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4431 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4432 wakaba 1.1 unshift @{$self->{token}}, $token;
4433     $token = {type => 'end tag',
4434 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4435 wakaba 1.1 return;
4436     }
4437    
4438     ## Step 2
4439 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
4440     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4441 wakaba 1.1 }
4442    
4443     ## Step 3
4444 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
4445    
4446     $token = $self->_get_next_token;
4447 wakaba 1.1 last S2;
4448     } else {
4449     ## Step 3
4450     if (not $formatting_category->{$node->[1]} and
4451     #not $phrasing_category->{$node->[1]} and
4452     ($special_category->{$node->[1]} or
4453     $scoping_category->{$node->[1]})) {
4454 wakaba 1.25 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4455 wakaba 1.1 ## Ignore the token
4456     $token = $self->_get_next_token;
4457     last S2;
4458     }
4459     }
4460    
4461     ## Step 4
4462     $node_i--;
4463 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
4464 wakaba 1.1
4465     ## Step 5;
4466     redo S2;
4467     } # S2
4468 wakaba 1.3 return;
4469 wakaba 1.1 }
4470     }
4471     }; # $in_body
4472    
4473     B: {
4474 wakaba 1.36 if ($token->{type} eq 'DOCTYPE') {
4475     $self->{parse_error}-> (type => 'DOCTYPE in the middle');
4476     ## Ignore the token
4477     ## Stay in the phase
4478     $token = $self->_get_next_token;
4479     redo B;
4480     } elsif ($token->{type} eq 'end-of-file') {
4481     if ($token->{insertion_mode} ne 'trailing end') {
4482 wakaba 1.1 ## Generate implied end tags
4483     if ({
4484     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
4485 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4486 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4487 wakaba 1.1 unshift @{$self->{token}}, $token;
4488 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
4489 wakaba 1.1 redo B;
4490     }
4491    
4492 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
4493     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
4494     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4495     } elsif (defined $self->{inner_html_node} and
4496     @{$self->{open_elements}} > 1 and
4497     $self->{open_elements}->[1]->[1] ne 'body') {
4498     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4499 wakaba 1.1 }
4500    
4501 wakaba 1.36 ## ISSUE: There is an issue in the spec.
4502     }
4503 wakaba 1.1
4504 wakaba 1.36 ## Stop parsing
4505     last B;
4506     } elsif ($token->{type} eq 'start tag' and
4507     $token->{tag_name} eq 'html') {
4508     if ($self->{insertion_mode} eq 'trailing end') {
4509     ## Turn into the main phase
4510     $self->{parse_error}-> (type => 'after html:html');
4511     $self->{insertion_mode} = $previous_insertion_mode;
4512     }
4513    
4514     ## ISSUE: "aa<html>" is not a parse error.
4515     ## ISSUE: "<html>" in fragment is not a parse error.
4516     unless ($token->{first_start_tag}) {
4517     $self->{parse_error}-> (type => 'not first start tag');
4518     }
4519     my $top_el = $self->{open_elements}->[0]->[0];
4520     for my $attr_name (keys %{$token->{attributes}}) {
4521     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
4522     $top_el->set_attribute_ns
4523     (undef, [undef, $attr_name],
4524     $token->{attributes}->{$attr_name}->{value});
4525     }
4526     }
4527     $token = $self->_get_next_token;
4528     redo B;
4529     } elsif ($token->{type} eq 'comment') {
4530     my $comment = $self->{document}->create_comment ($token->{data});
4531     if ($self->{insertion_mode} eq 'trailing end') {
4532     $self->{document}->append_child ($comment);
4533     } elsif ($self->{insertion_mode} eq 'after body') {
4534     $self->{open_elements}->[0]->[0]->append_child ($comment);
4535 wakaba 1.1 } else {
4536 wakaba 1.36 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4537     }
4538     $token = $self->_get_next_token;
4539     redo B;
4540 wakaba 1.52 } elsif ($self->{insertion_mode} eq 'in head' or
4541     $self->{insertion_mode} eq 'in head noscript' or
4542     $self->{insertion_mode} eq 'after head' or
4543     $self->{insertion_mode} eq 'before head') {
4544     if ($token->{type} eq 'character') {
4545     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4546     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4547     unless (length $token->{data}) {
4548     $token = $self->_get_next_token;
4549 wakaba 1.1 redo B;
4550 wakaba 1.52 }
4551 wakaba 1.1 }
4552 wakaba 1.52
4553     if ($self->{insertion_mode} eq 'before head') {
4554     ## As if <head>
4555    
4556 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4557 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4558    
4559 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4560     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4561    
4562     ## Reprocess in the "in head" insertion mode...
4563     pop @{$self->{open_elements}};
4564    
4565     ## Reprocess in the "after head" insertion mode...
4566     } elsif ($self->{insertion_mode} eq 'in head noscript') {
4567     ## As if </noscript>
4568     pop @{$self->{open_elements}};
4569     $self->{parse_error}-> (type => 'in noscript:#character');
4570    
4571     ## Reprocess in the "in head" insertion mode...
4572     ## As if </head>
4573     pop @{$self->{open_elements}};
4574 wakaba 1.51
4575 wakaba 1.52 ## Reprocess in the "after head" insertion mode...
4576     } elsif ($self->{insertion_mode} eq 'in head') {
4577     pop @{$self->{open_elements}};
4578 wakaba 1.51
4579 wakaba 1.52 ## Reprocess in the "after head" insertion mode...
4580     }
4581 wakaba 1.51
4582 wakaba 1.52 ## "after head" insertion mode
4583 wakaba 1.51 ## As if <body>
4584    
4585     {
4586     my $el;
4587    
4588     $el = $self->{document}->create_element_ns
4589     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
4590    
4591     $self->{open_elements}->[-1]->[0]->append_child ($el);
4592     push @{$self->{open_elements}}, [$el, 'body'];
4593     }
4594    
4595     $self->{insertion_mode} = 'in body';
4596     ## reprocess
4597     redo B;
4598 wakaba 1.1 } elsif ($token->{type} eq 'start tag') {
4599 wakaba 1.52 if ($token->{tag_name} eq 'head') {
4600     if ($self->{insertion_mode} eq 'before head') {
4601    
4602     $self->{head_element} = $self->{document}->create_element_ns
4603     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4604    
4605     for my $attr_name (keys %{ $token->{attributes}}) {
4606     $self->{head_element}->set_attribute_ns (undef, [undef, $attr_name],
4607     $token->{attributes} ->{$attr_name}->{value});
4608     }
4609    
4610     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4611     push @{$self->{open_elements}}, [$self->{head_element}, $token->{tag_name}];
4612     $self->{insertion_mode} = 'in head';
4613     $token = $self->_get_next_token;
4614     redo B;
4615     } elsif ($self->{insertion_mode} ne 'after head') {
4616     $self->{parse_error}-> (type => 'in head:head'); # or in head noscript
4617     ## Ignore the token
4618     $token = $self->_get_next_token;
4619     redo B;
4620     } else {
4621     #
4622     }
4623     } elsif ($self->{insertion_mode} eq 'before head') {
4624     ## As if <head>
4625    
4626     $self->{head_element} = $self->{document}->create_element_ns
4627     (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4628    
4629     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4630     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4631    
4632     $self->{insertion_mode} = 'in head';
4633     ## Reprocess in the "in head" insertion mode...
4634     }
4635    
4636 wakaba 1.51 if ($token->{tag_name} eq 'base') {
4637     if ($self->{insertion_mode} eq 'in head noscript') {
4638     ## As if </noscript>
4639     pop @{$self->{open_elements}};
4640     $self->{parse_error}-> (type => 'in noscript:base');
4641    
4642     $self->{insertion_mode} = 'in head';
4643     ## Reprocess in the "in head" insertion mode...
4644     }
4645    
4646     ## NOTE: There is a "as if in head" code clone.
4647     if ($self->{insertion_mode} eq 'after head') {
4648     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4649     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4650     }
4651    
4652     {
4653     my $el;
4654    
4655     $el = $self->{document}->create_element_ns
4656     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4657    
4658     for my $attr_name (keys %{ $token->{attributes}}) {
4659     $el->set_attribute_ns (undef, [undef, $attr_name],
4660     $token->{attributes} ->{$attr_name}->{value});
4661     }
4662    
4663     $self->{open_elements}->[-1]->[0]->append_child ($el);
4664     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4665     }
4666    
4667     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4668     pop @{$self->{open_elements}}
4669     if $self->{insertion_mode} eq 'after head';
4670     $token = $self->_get_next_token;
4671     redo B;
4672     } elsif ($token->{tag_name} eq 'link') {
4673 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4674     if ($self->{insertion_mode} eq 'after head') {
4675     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4676     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4677     }
4678 wakaba 1.1
4679 wakaba 1.25 {
4680     my $el;
4681    
4682     $el = $self->{document}->create_element_ns
4683     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4684 wakaba 1.1
4685 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
4686     $el->set_attribute_ns (undef, [undef, $attr_name],
4687     $token->{attributes} ->{$attr_name}->{value});
4688 wakaba 1.1 }
4689    
4690 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($el);
4691     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4692     }
4693    
4694     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4695     pop @{$self->{open_elements}}
4696     if $self->{insertion_mode} eq 'after head';
4697 wakaba 1.1 $token = $self->_get_next_token;
4698 wakaba 1.25 redo B;
4699 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
4700     ## NOTE: There is a "as if in head" code clone.
4701     if ($self->{insertion_mode} eq 'after head') {
4702     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4703     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4704     }
4705    
4706     {
4707     my $el;
4708    
4709     $el = $self->{document}->create_element_ns
4710     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4711    
4712     for my $attr_name (keys %{ $token->{attributes}}) {
4713     $el->set_attribute_ns (undef, [undef, $attr_name],
4714     $token->{attributes} ->{$attr_name}->{value});
4715     }
4716    
4717     $self->{open_elements}->[-1]->[0]->append_child ($el);
4718     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4719     }
4720    
4721     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4722    
4723     unless ($self->{confident}) {
4724     my $charset;
4725     if ($token->{attributes}->{charset}) { ## TODO: And if supported
4726     $charset = $token->{attributes}->{charset}->{value};
4727     }
4728     if ($token->{attributes}->{'http-equiv'}) {
4729 wakaba 1.35 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
4730 wakaba 1.34 if ($token->{attributes}->{'http-equiv'}->{value}
4731     =~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*=
4732     [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4733     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
4734     $charset = defined $1 ? $1 : defined $2 ? $2 : $3;
4735     } ## TODO: And if supported
4736     }
4737     ## TODO: Change the encoding
4738     }
4739    
4740     ## TODO: Extracting |charset| from |meta|.
4741     pop @{$self->{open_elements}}
4742     if $self->{insertion_mode} eq 'after head';
4743     $token = $self->_get_next_token;
4744     redo B;
4745 wakaba 1.51 } elsif ($token->{tag_name} eq 'title') {
4746     if ($self->{insertion_mode} eq 'in head noscript') {
4747     ## As if </noscript>
4748     pop @{$self->{open_elements}};
4749     $self->{parse_error}-> (type => 'in noscript:title');
4750    
4751     $self->{insertion_mode} = 'in head';
4752     ## Reprocess in the "in head" insertion mode...
4753     } elsif ($self->{insertion_mode} eq 'after head') {
4754 wakaba 1.25 $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4755     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4756     }
4757 wakaba 1.51
4758     ## NOTE: There is a "as if in head" code clone.
4759 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
4760     : $self->{open_elements}->[-1]->[0];
4761 wakaba 1.41 $parse_rcdata->(RCDATA_CONTENT_MODEL,
4762     sub { $parent->append_child ($_[0]) });
4763 wakaba 1.25 pop @{$self->{open_elements}}
4764     if $self->{insertion_mode} eq 'after head';
4765 wakaba 1.1 redo B;
4766     } elsif ($token->{tag_name} eq 'style') {
4767 wakaba 1.25 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4768     ## insertion mode 'in head')
4769     ## NOTE: There is a "as if in head" code clone.
4770     if ($self->{insertion_mode} eq 'after head') {
4771     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4772     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4773     }
4774 wakaba 1.41 $parse_rcdata->(CDATA_CONTENT_MODEL, $insert_to_current);
4775 wakaba 1.25 pop @{$self->{open_elements}}
4776     if $self->{insertion_mode} eq 'after head';
4777     redo B;
4778     } elsif ($token->{tag_name} eq 'noscript') {
4779     if ($self->{insertion_mode} eq 'in head') {
4780     ## NOTE: and scripting is disalbed
4781    
4782     {
4783     my $el;
4784    
4785 wakaba 1.1 $el = $self->{document}->create_element_ns
4786     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4787    
4788 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
4789 wakaba 1.1 $el->set_attribute_ns (undef, [undef, $attr_name],
4790 wakaba 1.25 $token->{attributes} ->{$attr_name}->{value});
4791 wakaba 1.1 }
4792    
4793 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($el);
4794     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4795     }
4796    
4797     $self->{insertion_mode} = 'in head noscript';
4798     $token = $self->_get_next_token;
4799     redo B;
4800     } elsif ($self->{insertion_mode} eq 'in head noscript') {
4801 wakaba 1.30 $self->{parse_error}-> (type => 'in noscript:noscript');
4802 wakaba 1.25 ## Ignore the token
4803 wakaba 1.42 $token = $self->_get_next_token;
4804 wakaba 1.25 redo B;
4805 wakaba 1.24 } else {
4806 wakaba 1.25 #
4807 wakaba 1.24 }
4808 wakaba 1.51 } elsif ($token->{tag_name} eq 'script') {
4809     if ($self->{insertion_mode} eq 'in head noscript') {
4810     ## As if </noscript>
4811     pop @{$self->{open_elements}};
4812     $self->{parse_error}-> (type => 'in noscript:script');
4813    
4814     $self->{insertion_mode} = 'in head';
4815     ## Reprocess in the "in head" insertion mode...
4816     } elsif ($self->{insertion_mode} eq 'after head') {
4817 wakaba 1.25 $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4818     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4819     }
4820 wakaba 1.51
4821 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4822     $script_start_tag->($insert_to_current);
4823     pop @{$self->{open_elements}}
4824     if $self->{insertion_mode} eq 'after head';
4825 wakaba 1.1 redo B;
4826 wakaba 1.51 } elsif ($token->{tag_name} eq 'body' or
4827     $token->{tag_name} eq 'frameset') {
4828     if ($self->{insertion_mode} eq 'in head noscript') {
4829     ## As if </noscript>
4830     pop @{$self->{open_elements}};
4831     $self->{parse_error}-> (type => 'in noscript:'.$token->{tag_name});
4832    
4833     ## Reprocess in the "in head" insertion mode...
4834     ## As if </head>
4835     pop @{$self->{open_elements}};
4836    
4837     ## Reprocess in the "after head" insertion mode...
4838     } elsif ($self->{insertion_mode} eq 'in head') {
4839     pop @{$self->{open_elements}};
4840    
4841     ## Reprocess in the "after head" insertion mode...
4842     }
4843    
4844     ## "after head" insertion mode
4845 wakaba 1.1
4846     {
4847     my $el;
4848    
4849     $el = $self->{document}->create_element_ns
4850 wakaba 1.51 (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4851 wakaba 1.1
4852     for my $attr_name (keys %{ $token->{attributes}}) {
4853     $el->set_attribute_ns (undef, [undef, $attr_name],
4854     $token->{attributes} ->{$attr_name}->{value});
4855     }
4856    
4857 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4858 wakaba 1.51 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4859 wakaba 1.1 }
4860    
4861 wakaba 1.51 $self->{insertion_mode} = 'in '.$token->{tag_name};
4862 wakaba 1.1 $token = $self->_get_next_token;
4863     redo B;
4864 wakaba 1.51 } else {
4865     #
4866     }
4867    
4868     if ($self->{insertion_mode} eq 'in head noscript') {
4869     ## As if </noscript>
4870     pop @{$self->{open_elements}};
4871     $self->{parse_error}-> (type => 'in noscript:/'.$token->{tag_name});
4872 wakaba 1.1
4873 wakaba 1.51 ## Reprocess in the "in head" insertion mode...
4874     ## As if </head>
4875     pop @{$self->{open_elements}};
4876    
4877     ## Reprocess in the "after head" insertion mode...
4878     } elsif ($self->{insertion_mode} eq 'in head') {
4879     ## As if </head>
4880     pop @{$self->{open_elements}};
4881    
4882     ## Reprocess in the "after head" insertion mode...
4883     }
4884    
4885     ## "after head" insertion mode
4886     ## As if <body>
4887    
4888 wakaba 1.1 {
4889     my $el;
4890    
4891     $el = $self->{document}->create_element_ns
4892 wakaba 1.51 (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
4893 wakaba 1.1
4894 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4895 wakaba 1.51 push @{$self->{open_elements}}, [$el, 'body'];
4896 wakaba 1.1 }
4897    
4898 wakaba 1.51 $self->{insertion_mode} = 'in body';
4899     ## reprocess
4900     redo B;
4901 wakaba 1.25 } elsif ($token->{type} eq 'end tag') {
4902 wakaba 1.51 if ($token->{tag_name} eq 'head') {
4903 wakaba 1.52 if ($self->{insertion_mode} eq 'before head') {
4904     ## As if <head>
4905    
4906     $self->{head_element} = $self->{document}->create_element_ns
4907     (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4908    
4909     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4910     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4911    
4912     ## Reprocess in the "in head" insertion mode...
4913     pop @{$self->{open_elements}};
4914     $self->{insertion_mode} = 'after head';
4915     $token = $self->_get_next_token;
4916     redo B;
4917     } elsif ($self->{insertion_mode} eq 'in head noscript') {
4918 wakaba 1.51 ## As if </noscript>
4919     pop @{$self->{open_elements}};
4920     $self->{parse_error}-> (type => 'in noscript:script');
4921    
4922     ## Reprocess in the "in head" insertion mode...
4923 wakaba 1.52 pop @{$self->{open_elements}};
4924     $self->{insertion_mode} = 'after head';
4925     $token = $self->_get_next_token;
4926     redo B;
4927     } elsif ($self->{insertion_mode} eq 'in head') {
4928 wakaba 1.51 pop @{$self->{open_elements}};
4929     $self->{insertion_mode} = 'after head';
4930     $token = $self->_get_next_token;
4931     redo B;
4932     } else {
4933     #
4934     }
4935     } elsif ($token->{tag_name} eq 'noscript') {
4936     if ($self->{insertion_mode} eq 'in head noscript') {
4937     pop @{$self->{open_elements}};
4938     $self->{insertion_mode} = 'in head';
4939     $token = $self->_get_next_token;
4940     redo B;
4941 wakaba 1.52 } elsif ($self->{insertion_mode} eq 'before head') {
4942     $self->{parse_error}-> (type => 'unmatched end tag:noscript');
4943     ## Ignore the token ## ISSUE: An issue in the spec.
4944     $token = $self->_get_next_token;
4945     redo B;
4946 wakaba 1.51 } else {
4947     #
4948     }
4949     } elsif ({
4950 wakaba 1.31 body => 1, html => 1,
4951     }->{$token->{tag_name}}) {
4952 wakaba 1.52 if ($self->{insertion_mode} eq 'before head') {
4953     ## As if <head>
4954    
4955     $self->{head_element} = $self->{document}->create_element_ns
4956     (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4957    
4958     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4959     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4960    
4961     $self->{insertion_mode} = 'in head';
4962     ## Reprocess in the "in head" insertion mode...
4963     } elsif ($self->{insertion_mode} eq 'in head noscript') {
4964 wakaba 1.51 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4965     ## Ignore the token
4966     $token = $self->_get_next_token;
4967     redo B;
4968     }
4969 wakaba 1.52
4970     #
4971 wakaba 1.51 } elsif ({
4972 wakaba 1.31 p => 1, br => 1,
4973     }->{$token->{tag_name}}) {
4974 wakaba 1.52 if ($self->{insertion_mode} eq 'before head') {
4975     ## As if <head>
4976    
4977     $self->{head_element} = $self->{document}->create_element_ns
4978     (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4979    
4980     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4981     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4982    
4983     $self->{insertion_mode} = 'in head';
4984     ## Reprocess in the "in head" insertion mode...
4985     }
4986    
4987 wakaba 1.25 #
4988 wakaba 1.1 } else {
4989 wakaba 1.51 if ($self->{insertion_mode} ne 'after head') {
4990     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4991     ## Ignore the token
4992     $token = $self->_get_next_token;
4993     redo B;
4994     } else {
4995     #
4996     }
4997     }
4998    
4999     if ($self->{insertion_mode} eq 'in head noscript') {
5000     ## As if </noscript>
5001     pop @{$self->{open_elements}};
5002     $self->{parse_error}-> (type => 'in noscript:/'.$token->{tag_name});
5003    
5004     ## Reprocess in the "in head" insertion mode...
5005     ## As if </head>
5006     pop @{$self->{open_elements}};
5007    
5008     ## Reprocess in the "after head" insertion mode...
5009     } elsif ($self->{insertion_mode} eq 'in head') {
5010     ## As if </head>
5011     pop @{$self->{open_elements}};
5012    
5013     ## Reprocess in the "after head" insertion mode...
5014 wakaba 1.52 } elsif ($self->{insertion_mode} eq 'before head') {
5015     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5016     ## Ignore the token ## ISSUE: An issue in the spec.
5017     $token = $self->_get_next_token;
5018     redo B;
5019 wakaba 1.25 }
5020    
5021 wakaba 1.51 ## "after head" insertion mode
5022     ## As if <body>
5023 wakaba 1.25
5024 wakaba 1.1 {
5025     my $el;
5026    
5027     $el = $self->{document}->create_element_ns
5028     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
5029    
5030 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5031     push @{$self->{open_elements}}, [$el, 'body'];
5032 wakaba 1.1 }
5033    
5034 wakaba 1.25 $self->{insertion_mode} = 'in body';
5035 wakaba 1.51 ## reprocess
5036     redo B;
5037     } else {
5038     die "$0: $token->{type}: Unknown token type";
5039 wakaba 1.25 }
5040    
5041     ## ISSUE: An issue in the spec.
5042 wakaba 1.52 } elsif ($self->{insertion_mode} eq 'in body' or
5043     $self->{insertion_mode} eq 'in cell' or
5044     $self->{insertion_mode} eq 'in caption') {
5045 wakaba 1.1 if ($token->{type} eq 'character') {
5046     ## NOTE: There is a code clone of "character in body".
5047     $reconstruct_active_formatting_elements->($insert_to_current);
5048    
5049 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5050 wakaba 1.1
5051     $token = $self->_get_next_token;
5052     redo B;
5053 wakaba 1.43 } elsif ($token->{type} eq 'start tag') {
5054     if ({
5055     caption => 1, col => 1, colgroup => 1, tbody => 1,
5056     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
5057 wakaba 1.44 }->{$token->{tag_name}}) {
5058     if ($self->{insertion_mode} eq 'in cell') {
5059     ## have an element in table scope
5060     my $tn;
5061     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5062     my $node = $self->{open_elements}->[$_];
5063     if ($node->[1] eq 'td' or $node->[1] eq 'th') {
5064     $tn = $node->[1];
5065     last INSCOPE;
5066     } elsif ({
5067     table => 1, html => 1,
5068     }->{$node->[1]}) {
5069     last INSCOPE;
5070     }
5071     } # INSCOPE
5072     unless (defined $tn) {
5073     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5074     ## Ignore the token
5075     $token = $self->_get_next_token;
5076     redo B;
5077     }
5078    
5079     ## Close the cell
5080     unshift @{$self->{token}}, $token; # <?>
5081     $token = {type => 'end tag', tag_name => $tn};
5082     redo B;
5083     } elsif ($self->{insertion_mode} eq 'in caption') {
5084     $self->{parse_error}-> (type => 'not closed:caption');
5085    
5086     ## As if </caption>
5087     ## have a table element in table scope
5088     my $i;
5089     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5090     my $node = $self->{open_elements}->[$_];
5091     if ($node->[1] eq 'caption') {
5092     $i = $_;
5093     last INSCOPE;
5094     } elsif ({
5095     table => 1, html => 1,
5096     }->{$node->[1]}) {
5097     last INSCOPE;
5098     }
5099     } # INSCOPE
5100     unless (defined $i) {
5101     $self->{parse_error}-> (type => 'unmatched end tag:caption');
5102     ## Ignore the token
5103     $token = $self->_get_next_token;
5104     redo B;
5105     }
5106    
5107     ## generate implied end tags
5108     if ({
5109     dd => 1, dt => 1, li => 1, p => 1,
5110     td => 1, th => 1, tr => 1,
5111     tbody => 1, tfoot=> 1, thead => 1,
5112     }->{$self->{open_elements}->[-1]->[1]}) {
5113     unshift @{$self->{token}}, $token; # <?>
5114     $token = {type => 'end tag', tag_name => 'caption'};
5115     unshift @{$self->{token}}, $token;
5116     $token = {type => 'end tag',
5117     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5118     redo B;
5119     }
5120 wakaba 1.43
5121 wakaba 1.44 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
5122     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5123     }
5124    
5125     splice @{$self->{open_elements}}, $i;
5126    
5127     $clear_up_to_marker->();
5128    
5129     $self->{insertion_mode} = 'in table';
5130    
5131     ## reprocess
5132     redo B;
5133     } else {
5134     #
5135     }
5136     } else {
5137     #
5138     }
5139     } elsif ($token->{type} eq 'end tag') {
5140     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
5141     if ($self->{insertion_mode} eq 'in cell') {
5142     ## have an element in table scope
5143     my $i;
5144     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5145     my $node = $self->{open_elements}->[$_];
5146     if ($node->[1] eq $token->{tag_name}) {
5147     $i = $_;
5148     last INSCOPE;
5149     } elsif ({
5150     table => 1, html => 1,
5151     }->{$node->[1]}) {
5152     last INSCOPE;
5153     }
5154     } # INSCOPE
5155     unless (defined $i) {
5156     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5157     ## Ignore the token
5158     $token = $self->_get_next_token;
5159     redo B;
5160     }
5161    
5162     ## generate implied end tags
5163     if ({
5164     dd => 1, dt => 1, li => 1, p => 1,
5165     td => ($token->{tag_name} eq 'th'),
5166     th => ($token->{tag_name} eq 'td'),
5167     tr => 1,
5168     tbody => 1, tfoot=> 1, thead => 1,
5169     }->{$self->{open_elements}->[-1]->[1]}) {
5170     unshift @{$self->{token}}, $token;
5171     $token = {type => 'end tag',
5172     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5173     redo B;
5174     }
5175    
5176     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
5177     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5178 wakaba 1.43 }
5179 wakaba 1.44
5180     splice @{$self->{open_elements}}, $i;
5181    
5182     $clear_up_to_marker->();
5183    
5184     $self->{insertion_mode} = 'in row';
5185    
5186     $token = $self->_get_next_token;
5187     redo B;
5188     } elsif ($self->{insertion_mode} eq 'in caption') {
5189     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5190 wakaba 1.43 ## Ignore the token
5191     $token = $self->_get_next_token;
5192     redo B;
5193 wakaba 1.44 } else {
5194     #
5195 wakaba 1.43 }
5196 wakaba 1.44 } elsif ($token->{tag_name} eq 'caption') {
5197     if ($self->{insertion_mode} eq 'in caption') {
5198     ## have a table element in table scope
5199     my $i;
5200     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5201     my $node = $self->{open_elements}->[$_];
5202     if ($node->[1] eq $token->{tag_name}) {
5203     $i = $_;
5204     last INSCOPE;
5205     } elsif ({
5206     table => 1, html => 1,
5207     }->{$node->[1]}) {
5208     last INSCOPE;
5209     }
5210     } # INSCOPE
5211     unless (defined $i) {
5212     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5213     ## Ignore the token
5214     $token = $self->_get_next_token;
5215     redo B;
5216     }
5217    
5218     ## generate implied end tags
5219     if ({
5220     dd => 1, dt => 1, li => 1, p => 1,
5221     td => 1, th => 1, tr => 1,
5222     tbody => 1, tfoot=> 1, thead => 1,
5223     }->{$self->{open_elements}->[-1]->[1]}) {
5224     unshift @{$self->{token}}, $token;
5225     $token = {type => 'end tag',
5226     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5227     redo B;
5228     }
5229    
5230     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
5231     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5232     }
5233    
5234     splice @{$self->{open_elements}}, $i;
5235    
5236     $clear_up_to_marker->();
5237    
5238     $self->{insertion_mode} = 'in table';
5239    
5240     $token = $self->_get_next_token;
5241     redo B;
5242     } elsif ($self->{insertion_mode} eq 'in cell') {
5243     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5244     ## Ignore the token
5245     $token = $self->_get_next_token;
5246 wakaba 1.43 redo B;
5247 wakaba 1.44 } else {
5248     #
5249 wakaba 1.43 }
5250 wakaba 1.44 } elsif ({
5251     table => 1, tbody => 1, tfoot => 1,
5252     thead => 1, tr => 1,
5253     }->{$token->{tag_name}} and
5254     $self->{insertion_mode} eq 'in cell') {
5255     ## have an element in table scope
5256 wakaba 1.43 my $i;
5257 wakaba 1.44 my $tn;
5258 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5259     my $node = $self->{open_elements}->[$_];
5260     if ($node->[1] eq $token->{tag_name}) {
5261     $i = $_;
5262     last INSCOPE;
5263 wakaba 1.44 } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
5264     $tn = $node->[1];
5265     ## NOTE: There is exactly one |td| or |th| element
5266     ## in scope in the stack of open elements by definition.
5267 wakaba 1.43 } elsif ({
5268     table => 1, html => 1,
5269     }->{$node->[1]}) {
5270     last INSCOPE;
5271     }
5272     } # INSCOPE
5273     unless (defined $i) {
5274     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5275     ## Ignore the token
5276     $token = $self->_get_next_token;
5277     redo B;
5278     }
5279    
5280 wakaba 1.44 ## Close the cell
5281     unshift @{$self->{token}}, $token; # </?>
5282     $token = {type => 'end tag', tag_name => $tn};
5283 wakaba 1.43 redo B;
5284     } elsif ($token->{tag_name} eq 'table' and
5285     $self->{insertion_mode} eq 'in caption') {
5286     $self->{parse_error}-> (type => 'not closed:caption');
5287    
5288     ## As if </caption>
5289     ## have a table element in table scope
5290     my $i;
5291     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5292     my $node = $self->{open_elements}->[$_];
5293     if ($node->[1] eq 'caption') {
5294     $i = $_;
5295     last INSCOPE;
5296     } elsif ({
5297     table => 1, html => 1,
5298     }->{$node->[1]}) {
5299     last INSCOPE;
5300     }
5301     } # INSCOPE
5302     unless (defined $i) {
5303     $self->{parse_error}-> (type => 'unmatched end tag:caption');
5304     ## Ignore the token
5305     $token = $self->_get_next_token;
5306     redo B;
5307     }
5308    
5309     ## generate implied end tags
5310     if ({
5311     dd => 1, dt => 1, li => 1, p => 1,
5312     td => 1, th => 1, tr => 1,
5313     tbody => 1, tfoot=> 1, thead => 1,
5314     }->{$self->{open_elements}->[-1]->[1]}) {
5315     unshift @{$self->{token}}, $token; # </table>
5316     $token = {type => 'end tag', tag_name => 'caption'};
5317     unshift @{$self->{token}}, $token;
5318     $token = {type => 'end tag',
5319     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5320     redo B;
5321     }
5322    
5323     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
5324     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5325     }
5326    
5327     splice @{$self->{open_elements}}, $i;
5328    
5329     $clear_up_to_marker->();
5330    
5331     $self->{insertion_mode} = 'in table';
5332    
5333     ## reprocess
5334     redo B;
5335     } elsif ({
5336 wakaba 1.44 body => 1, col => 1, colgroup => 1, html => 1,
5337     }->{$token->{tag_name}}) {
5338     if ($self->{insertion_mode} eq 'in cell' or
5339     $self->{insertion_mode} eq 'in caption') {
5340     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5341     ## Ignore the token
5342     $token = $self->_get_next_token;
5343     redo B;
5344     } else {
5345     #
5346     }
5347     } elsif ({
5348     tbody => 1, tfoot => 1,
5349     thead => 1, tr => 1,
5350 wakaba 1.43 }->{$token->{tag_name}} and
5351     $self->{insertion_mode} eq 'in caption') {
5352     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5353     ## Ignore the token
5354     $token = $self->_get_next_token;
5355     redo B;
5356     } else {
5357     #
5358     }
5359 wakaba 1.1 } else {
5360 wakaba 1.43 #
5361 wakaba 1.1 }
5362 wakaba 1.44
5363 wakaba 1.43 $in_body->($insert_to_current);
5364     redo B;
5365 wakaba 1.49 } elsif ($self->{insertion_mode} eq 'in row' or
5366     $self->{insertion_mode} eq 'in table body' or
5367 wakaba 1.48 $self->{insertion_mode} eq 'in table') {
5368 wakaba 1.1 if ($token->{type} eq 'character') {
5369     ## NOTE: There are "character in table" code clones.
5370     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5371 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5372 wakaba 1.1
5373     unless (length $token->{data}) {
5374     $token = $self->_get_next_token;
5375     redo B;
5376     }
5377     }
5378    
5379 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
5380    
5381 wakaba 1.1 ## As if in body, but insert into foster parent element
5382     ## ISSUE: Spec says that "whenever a node would be inserted
5383     ## into the current node" while characters might not be
5384     ## result in a new Text node.
5385     $reconstruct_active_formatting_elements->($insert_to_foster);
5386    
5387     if ({
5388     table => 1, tbody => 1, tfoot => 1,
5389     thead => 1, tr => 1,
5390 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5391 wakaba 1.1 # MUST
5392     my $foster_parent_element;
5393     my $next_sibling;
5394     my $prev_sibling;
5395 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
5396     if ($self->{open_elements}->[$_]->[1] eq 'table') {
5397     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5398 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
5399     $foster_parent_element = $parent;
5400 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
5401 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
5402     } else {
5403 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5404 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
5405     }
5406     last OE;
5407     }
5408     } # OE
5409 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5410 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
5411     unless defined $foster_parent_element;
5412     if (defined $prev_sibling and
5413     $prev_sibling->node_type == 3) {
5414     $prev_sibling->manakai_append_text ($token->{data});
5415     } else {
5416     $foster_parent_element->insert_before
5417     ($self->{document}->create_text_node ($token->{data}),
5418     $next_sibling);
5419     }
5420     } else {
5421 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5422 wakaba 1.1 }
5423    
5424     $token = $self->_get_next_token;
5425     redo B;
5426     } elsif ($token->{type} eq 'start tag') {
5427     if ({
5428 wakaba 1.49 tr => ($self->{insertion_mode} ne 'in row'),
5429 wakaba 1.48 th => 1, td => 1,
5430 wakaba 1.1 }->{$token->{tag_name}}) {
5431 wakaba 1.48 if ($self->{insertion_mode} eq 'in table') {
5432     ## Clear back to table context
5433     while ($self->{open_elements}->[-1]->[1] ne 'table' and
5434     $self->{open_elements}->[-1]->[1] ne 'html') {
5435     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5436     pop @{$self->{open_elements}};
5437     }
5438    
5439    
5440     {
5441     my $el;
5442    
5443     $el = $self->{document}->create_element_ns
5444     (q<http://www.w3.org/1999/xhtml>, [undef, 'tbody']);
5445    
5446     $self->{open_elements}->[-1]->[0]->append_child ($el);
5447     push @{$self->{open_elements}}, [$el, 'tbody'];
5448     }
5449    
5450     $self->{insertion_mode} = 'in table body';
5451     ## reprocess in the "in table body" insertion mode...
5452     }
5453    
5454 wakaba 1.49 if ($self->{insertion_mode} eq 'in table body') {
5455     unless ($token->{tag_name} eq 'tr') {
5456     $self->{parse_error}-> (type => 'missing start tag:tr');
5457     }
5458    
5459     ## Clear back to table body context
5460     while (not {
5461     tbody => 1, tfoot => 1, thead => 1, html => 1,
5462     }->{$self->{open_elements}->[-1]->[1]}) {
5463     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5464     pop @{$self->{open_elements}};
5465     }
5466 wakaba 1.48
5467 wakaba 1.49 $self->{insertion_mode} = 'in row';
5468     if ($token->{tag_name} eq 'tr') {
5469    
5470 wakaba 1.1 {
5471     my $el;
5472    
5473     $el = $self->{document}->create_element_ns
5474     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5475    
5476     for my $attr_name (keys %{ $token->{attributes}}) {
5477     $el->set_attribute_ns (undef, [undef, $attr_name],
5478     $token->{attributes} ->{$attr_name}->{value});
5479     }
5480    
5481 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5482     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5483 wakaba 1.1 }
5484    
5485 wakaba 1.49 $token = $self->_get_next_token;
5486     redo B;
5487     } else {
5488    
5489 wakaba 1.48 {
5490     my $el;
5491    
5492     $el = $self->{document}->create_element_ns
5493     (q<http://www.w3.org/1999/xhtml>, [undef, 'tr']);
5494    
5495     $self->{open_elements}->[-1]->[0]->append_child ($el);
5496     push @{$self->{open_elements}}, [$el, 'tr'];
5497     }
5498    
5499 wakaba 1.49 ## reprocess in the "in row" insertion mode
5500     }
5501     }
5502    
5503     ## Clear back to table row context
5504     while (not {
5505     tr => 1, html => 1,
5506     }->{$self->{open_elements}->[-1]->[1]}) {
5507     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5508     pop @{$self->{open_elements}};
5509     }
5510    
5511    
5512     {
5513     my $el;
5514    
5515     $el = $self->{document}->create_element_ns
5516     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5517    
5518     for my $attr_name (keys %{ $token->{attributes}}) {
5519     $el->set_attribute_ns (undef, [undef, $attr_name],
5520     $token->{attributes} ->{$attr_name}->{value});
5521     }
5522    
5523     $self->{open_elements}->[-1]->[0]->append_child ($el);
5524     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5525     }
5526    
5527     $self->{insertion_mode} = 'in cell';
5528    
5529     push @$active_formatting_elements, ['#marker', ''];
5530    
5531     $token = $self->_get_next_token;
5532     redo B;
5533     } elsif ({
5534     caption => 1, col => 1, colgroup => 1,
5535 wakaba 1.50 tbody => 1, tfoot => 1, thead => 1,
5536     tr => 1, # $self->{insertion_mode} eq 'in row'
5537     }->{$token->{tag_name}}) {
5538     if ($self->{insertion_mode} eq 'in row') {
5539     ## As if </tr>
5540     ## have an element in table scope
5541     my $i;
5542     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5543     my $node = $self->{open_elements}->[$_];
5544     if ($node->[1] eq 'tr') {
5545     $i = $_;
5546     last INSCOPE;
5547     } elsif ({
5548     table => 1, html => 1,
5549     }->{$node->[1]}) {
5550     last INSCOPE;
5551     }
5552     } # INSCOPE
5553     unless (defined $i) {
5554     $self->{parse_error}-> (type => 'unmacthed end tag:'.$token->{tag_name});
5555     ## Ignore the token
5556     $token = $self->_get_next_token;
5557     redo B;
5558     }
5559    
5560     ## Clear back to table row context
5561     while (not {
5562     tr => 1, html => 1,
5563     }->{$self->{open_elements}->[-1]->[1]}) {
5564     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5565     pop @{$self->{open_elements}};
5566     }
5567    
5568     pop @{$self->{open_elements}}; # tr
5569     $self->{insertion_mode} = 'in table body';
5570     if ($token->{tag_name} eq 'tr') {
5571     ## reprocess
5572     redo B;
5573     } else {
5574     ## reprocess in the "in table body" insertion mode...
5575 wakaba 1.49 }
5576     }
5577    
5578 wakaba 1.50 if ($self->{insertion_mode} eq 'in table body') {
5579     ## have an element in table scope
5580     my $i;
5581     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5582     my $node = $self->{open_elements}->[$_];
5583     if ({
5584     tbody => 1, thead => 1, tfoot => 1,
5585     }->{$node->[1]}) {
5586     $i = $_;
5587     last INSCOPE;
5588     } elsif ({
5589     table => 1, html => 1,
5590     }->{$node->[1]}) {
5591     last INSCOPE;
5592     }
5593     } # INSCOPE
5594     unless (defined $i) {
5595     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5596     ## Ignore the token
5597     $token = $self->_get_next_token;
5598     redo B;
5599     }
5600 wakaba 1.49
5601 wakaba 1.50 ## Clear back to table body context
5602     while (not {
5603     tbody => 1, tfoot => 1, thead => 1, html => 1,
5604     }->{$self->{open_elements}->[-1]->[1]}) {
5605     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5606     pop @{$self->{open_elements}};
5607 wakaba 1.48 }
5608 wakaba 1.50
5609     ## As if <{current node}>
5610     ## have an element in table scope
5611     ## true by definition
5612    
5613     ## Clear back to table body context
5614     ## nop by definition
5615    
5616 wakaba 1.48 pop @{$self->{open_elements}};
5617 wakaba 1.50 $self->{insertion_mode} = 'in table';
5618     ## reprocess in "in table" insertion mode...
5619 wakaba 1.48 }
5620    
5621 wakaba 1.50 if ($token->{tag_name} eq 'col') {
5622     ## Clear back to table context
5623     while ($self->{open_elements}->[-1]->[1] ne 'table' and
5624     $self->{open_elements}->[-1]->[1] ne 'html') {
5625     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5626     pop @{$self->{open_elements}};
5627     }
5628    
5629    
5630 wakaba 1.1 {
5631     my $el;
5632    
5633     $el = $self->{document}->create_element_ns
5634 wakaba 1.48 (q<http://www.w3.org/1999/xhtml>, [undef, 'colgroup']);
5635 wakaba 1.1
5636 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5637 wakaba 1.48 push @{$self->{open_elements}}, [$el, 'colgroup'];
5638 wakaba 1.1 }
5639    
5640 wakaba 1.50 $self->{insertion_mode} = 'in column group';
5641     ## reprocess
5642     redo B;
5643     } elsif ({
5644     caption => 1,
5645     colgroup => 1,
5646     tbody => 1, tfoot => 1, thead => 1,
5647     }->{$token->{tag_name}}) {
5648     ## Clear back to table context
5649     while ($self->{open_elements}->[-1]->[1] ne 'table' and
5650     $self->{open_elements}->[-1]->[1] ne 'html') {
5651     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5652     pop @{$self->{open_elements}};
5653     }
5654    
5655     push @$active_formatting_elements, ['#marker', '']
5656     if $token->{tag_name} eq 'caption';
5657    
5658    
5659 wakaba 1.48 {
5660     my $el;
5661    
5662     $el = $self->{document}->create_element_ns
5663     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5664    
5665     for my $attr_name (keys %{ $token->{attributes}}) {
5666     $el->set_attribute_ns (undef, [undef, $attr_name],
5667     $token->{attributes} ->{$attr_name}->{value});
5668     }
5669    
5670     $self->{open_elements}->[-1]->[0]->append_child ($el);
5671     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5672     }
5673    
5674 wakaba 1.50 $self->{insertion_mode} = {
5675     caption => 'in caption',
5676     colgroup => 'in column group',
5677     tbody => 'in table body',
5678     tfoot => 'in table body',
5679     thead => 'in table body',
5680     }->{$token->{tag_name}};
5681     $token = $self->_get_next_token;
5682     redo B;
5683     } else {
5684     die "$0: in table: <>: $token->{tag_name}";
5685     }
5686 wakaba 1.1 } elsif ($token->{tag_name} eq 'table') {
5687     ## NOTE: There are code clones for this "table in table"
5688 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5689 wakaba 1.1
5690     ## As if </table>
5691     ## have a table element in table scope
5692     my $i;
5693 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5694     my $node = $self->{open_elements}->[$_];
5695 wakaba 1.1 if ($node->[1] eq 'table') {
5696     $i = $_;
5697     last INSCOPE;
5698     } elsif ({
5699     table => 1, html => 1,
5700     }->{$node->[1]}) {
5701     last INSCOPE;
5702     }
5703     } # INSCOPE
5704     unless (defined $i) {
5705 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
5706 wakaba 1.1 ## Ignore tokens </table><table>
5707     $token = $self->_get_next_token;
5708     redo B;
5709     }
5710    
5711     ## generate implied end tags
5712     if ({
5713     dd => 1, dt => 1, li => 1, p => 1,
5714     td => 1, th => 1, tr => 1,
5715 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
5716 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5717 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
5718     $token = {type => 'end tag', tag_name => 'table'};
5719     unshift @{$self->{token}}, $token;
5720     $token = {type => 'end tag',
5721 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5722 wakaba 1.1 redo B;
5723     }
5724    
5725 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5726     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5727 wakaba 1.1 }
5728    
5729 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5730 wakaba 1.1
5731 wakaba 1.3 $self->_reset_insertion_mode;
5732 wakaba 1.1
5733     ## reprocess
5734     redo B;
5735     } else {
5736     #
5737     }
5738     } elsif ($token->{type} eq 'end tag') {
5739 wakaba 1.49 if ($token->{tag_name} eq 'tr' and
5740     $self->{insertion_mode} eq 'in row') {
5741     ## have an element in table scope
5742     my $i;
5743     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5744     my $node = $self->{open_elements}->[$_];
5745     if ($node->[1] eq $token->{tag_name}) {
5746     $i = $_;
5747     last INSCOPE;
5748     } elsif ({
5749     table => 1, html => 1,
5750     }->{$node->[1]}) {
5751     last INSCOPE;
5752     }
5753     } # INSCOPE
5754     unless (defined $i) {
5755     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5756     ## Ignore the token
5757     $token = $self->_get_next_token;
5758     redo B;
5759     }
5760    
5761     ## Clear back to table row context
5762     while (not {
5763     tr => 1, html => 1,
5764     }->{$self->{open_elements}->[-1]->[1]}) {
5765     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5766     pop @{$self->{open_elements}};
5767     }
5768    
5769     pop @{$self->{open_elements}}; # tr
5770     $self->{insertion_mode} = 'in table body';
5771     $token = $self->_get_next_token;
5772     redo B;
5773     } elsif ($token->{tag_name} eq 'table') {
5774     if ($self->{insertion_mode} eq 'in row') {
5775     ## As if </tr>
5776     ## have an element in table scope
5777     my $i;
5778     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5779     my $node = $self->{open_elements}->[$_];
5780     if ($node->[1] eq 'tr') {
5781     $i = $_;
5782     last INSCOPE;
5783     } elsif ({
5784     table => 1, html => 1,
5785     }->{$node->[1]}) {
5786     last INSCOPE;
5787     }
5788     } # INSCOPE
5789 wakaba 1.50 unless (defined $i) {
5790     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{type});
5791     ## Ignore the token
5792     $token = $self->_get_next_token;
5793     redo B;
5794     }
5795 wakaba 1.49
5796     ## Clear back to table row context
5797     while (not {
5798     tr => 1, html => 1,
5799     }->{$self->{open_elements}->[-1]->[1]}) {
5800     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5801     pop @{$self->{open_elements}};
5802     }
5803    
5804     pop @{$self->{open_elements}}; # tr
5805     $self->{insertion_mode} = 'in table body';
5806     ## reprocess in the "in table body" insertion mode...
5807     }
5808    
5809 wakaba 1.48 if ($self->{insertion_mode} eq 'in table body') {
5810     ## have an element in table scope
5811     my $i;
5812     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5813     my $node = $self->{open_elements}->[$_];
5814     if ({
5815     tbody => 1, thead => 1, tfoot => 1,
5816     }->{$node->[1]}) {
5817     $i = $_;
5818     last INSCOPE;
5819     } elsif ({
5820     table => 1, html => 1,
5821     }->{$node->[1]}) {
5822     last INSCOPE;
5823     }
5824     } # INSCOPE
5825     unless (defined $i) {
5826     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5827     ## Ignore the token
5828     $token = $self->_get_next_token;
5829     redo B;
5830     }
5831    
5832     ## Clear back to table body context
5833     while (not {
5834     tbody => 1, tfoot => 1, thead => 1, html => 1,
5835     }->{$self->{open_elements}->[-1]->[1]}) {
5836     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5837     pop @{$self->{open_elements}};
5838     }
5839    
5840     ## As if <{current node}>
5841     ## have an element in table scope
5842     ## true by definition
5843    
5844     ## Clear back to table body context
5845     ## nop by definition
5846    
5847     pop @{$self->{open_elements}};
5848     $self->{insertion_mode} = 'in table';
5849     ## reprocess in the "in table" insertion mode...
5850     }
5851    
5852 wakaba 1.1 ## have a table element in table scope
5853     my $i;
5854 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5855     my $node = $self->{open_elements}->[$_];
5856 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5857     $i = $_;
5858     last INSCOPE;
5859     } elsif ({
5860     table => 1, html => 1,
5861     }->{$node->[1]}) {
5862     last INSCOPE;
5863     }
5864     } # INSCOPE
5865     unless (defined $i) {
5866 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5867 wakaba 1.1 ## Ignore the token
5868     $token = $self->_get_next_token;
5869     redo B;
5870     }
5871 wakaba 1.48
5872 wakaba 1.1 ## generate implied end tags
5873     if ({
5874     dd => 1, dt => 1, li => 1, p => 1,
5875     td => 1, th => 1, tr => 1,
5876 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
5877 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5878 wakaba 1.1 unshift @{$self->{token}}, $token;
5879     $token = {type => 'end tag',
5880 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5881 wakaba 1.1 redo B;
5882     }
5883 wakaba 1.48
5884 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5885     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5886 wakaba 1.1 }
5887 wakaba 1.48
5888 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5889 wakaba 1.48
5890     $self->_reset_insertion_mode;
5891    
5892     $token = $self->_get_next_token;
5893     redo B;
5894     } elsif ({
5895     tbody => 1, tfoot => 1, thead => 1,
5896     }->{$token->{tag_name}} and
5897 wakaba 1.49 ($self->{insertion_mode} eq 'in row' or
5898     $self->{insertion_mode} eq 'in table body')) {
5899     if ($self->{insertion_mode} eq 'in row') {
5900     ## have an element in table scope
5901     my $i;
5902     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5903     my $node = $self->{open_elements}->[$_];
5904     if ($node->[1] eq $token->{tag_name}) {
5905     $i = $_;
5906     last INSCOPE;
5907     } elsif ({
5908     table => 1, html => 1,
5909     }->{$node->[1]}) {
5910     last INSCOPE;
5911     }
5912     } # INSCOPE
5913     unless (defined $i) {
5914     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5915     ## Ignore the token
5916     $token = $self->_get_next_token;
5917     redo B;
5918     }
5919    
5920     ## As if </tr>
5921     ## have an element in table scope
5922     my $i;
5923     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5924     my $node = $self->{open_elements}->[$_];
5925     if ($node->[1] eq 'tr') {
5926     $i = $_;
5927     last INSCOPE;
5928     } elsif ({
5929     table => 1, html => 1,
5930     }->{$node->[1]}) {
5931     last INSCOPE;
5932     }
5933     } # INSCOPE
5934     unless (defined $i) {
5935     $self->{parse_error}-> (type => 'unmatched end tag:tr');
5936     ## Ignore the token
5937     $token = $self->_get_next_token;
5938     redo B;
5939     }
5940    
5941     ## Clear back to table row context
5942     while (not {
5943     tr => 1, html => 1,
5944     }->{$self->{open_elements}->[-1]->[1]}) {
5945     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5946     pop @{$self->{open_elements}};
5947     }
5948    
5949     pop @{$self->{open_elements}}; # tr
5950     $self->{insertion_mode} = 'in table body';
5951     ## reprocess in the "in table body" insertion mode...
5952     }
5953    
5954 wakaba 1.48 ## have an element in table scope
5955     my $i;
5956     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5957     my $node = $self->{open_elements}->[$_];
5958     if ($node->[1] eq $token->{tag_name}) {
5959     $i = $_;
5960     last INSCOPE;
5961     } elsif ({
5962     table => 1, html => 1,
5963     }->{$node->[1]}) {
5964     last INSCOPE;
5965     }
5966     } # INSCOPE
5967     unless (defined $i) {
5968     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5969     ## Ignore the token
5970     $token = $self->_get_next_token;
5971     redo B;
5972     }
5973 wakaba 1.1
5974 wakaba 1.48 ## Clear back to table body context
5975     while (not {
5976     tbody => 1, tfoot => 1, thead => 1, html => 1,
5977     }->{$self->{open_elements}->[-1]->[1]}) {
5978     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5979     pop @{$self->{open_elements}};
5980     }
5981 wakaba 1.1
5982 wakaba 1.48 pop @{$self->{open_elements}};
5983     $self->{insertion_mode} = 'in table';
5984 wakaba 1.1 $token = $self->_get_next_token;
5985     redo B;
5986     } elsif ({
5987     body => 1, caption => 1, col => 1, colgroup => 1,
5988 wakaba 1.49 html => 1, td => 1, th => 1,
5989     tr => 1, # $self->{insertion_mode} eq 'in row'
5990 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} eq 'in table'
5991 wakaba 1.1 }->{$token->{tag_name}}) {
5992 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5993 wakaba 1.1 ## Ignore the token
5994     $token = $self->_get_next_token;
5995     redo B;
5996     } else {
5997     #
5998     }
5999     } else {
6000 wakaba 1.48 die "$0: $token->{type}: Unknown token type";
6001 wakaba 1.1 }
6002    
6003 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
6004 wakaba 1.1 $in_body->($insert_to_foster);
6005     redo B;
6006 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
6007 wakaba 1.1 if ($token->{type} eq 'character') {
6008     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6009 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6010 wakaba 1.1 unless (length $token->{data}) {
6011     $token = $self->_get_next_token;
6012     redo B;
6013     }
6014     }
6015    
6016     #
6017     } elsif ($token->{type} eq 'start tag') {
6018     if ($token->{tag_name} eq 'col') {
6019    
6020     {
6021     my $el;
6022    
6023     $el = $self->{document}->create_element_ns
6024     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6025    
6026     for my $attr_name (keys %{ $token->{attributes}}) {
6027     $el->set_attribute_ns (undef, [undef, $attr_name],
6028     $token->{attributes} ->{$attr_name}->{value});
6029     }
6030    
6031 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6032     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6033 wakaba 1.1 }
6034    
6035 wakaba 1.3 pop @{$self->{open_elements}};
6036 wakaba 1.1 $token = $self->_get_next_token;
6037     redo B;
6038     } else {
6039     #
6040     }
6041     } elsif ($token->{type} eq 'end tag') {
6042     if ($token->{tag_name} eq 'colgroup') {
6043 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
6044     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
6045 wakaba 1.1 ## Ignore the token
6046     $token = $self->_get_next_token;
6047     redo B;
6048     } else {
6049 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
6050     $self->{insertion_mode} = 'in table';
6051 wakaba 1.1 $token = $self->_get_next_token;
6052     redo B;
6053     }
6054     } elsif ($token->{tag_name} eq 'col') {
6055 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:col');
6056 wakaba 1.1 ## Ignore the token
6057     $token = $self->_get_next_token;
6058     redo B;
6059     } else {
6060     #
6061     }
6062     } else {
6063     #
6064     }
6065    
6066     ## As if </colgroup>
6067 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
6068     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
6069 wakaba 1.1 ## Ignore the token
6070     $token = $self->_get_next_token;
6071     redo B;
6072     } else {
6073 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
6074     $self->{insertion_mode} = 'in table';
6075 wakaba 1.1 ## reprocess
6076     redo B;
6077     }
6078 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
6079 wakaba 1.1 if ($token->{type} eq 'character') {
6080 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6081 wakaba 1.1 $token = $self->_get_next_token;
6082     redo B;
6083     } elsif ($token->{type} eq 'start tag') {
6084     if ($token->{tag_name} eq 'option') {
6085 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6086 wakaba 1.1 ## As if </option>
6087 wakaba 1.3 pop @{$self->{open_elements}};
6088 wakaba 1.1 }
6089    
6090    
6091     {
6092     my $el;
6093    
6094     $el = $self->{document}->create_element_ns
6095     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6096    
6097     for my $attr_name (keys %{ $token->{attributes}}) {
6098     $el->set_attribute_ns (undef, [undef, $attr_name],
6099     $token->{attributes} ->{$attr_name}->{value});
6100     }
6101    
6102 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6103     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6104 wakaba 1.1 }
6105    
6106     $token = $self->_get_next_token;
6107     redo B;
6108     } elsif ($token->{tag_name} eq 'optgroup') {
6109 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6110 wakaba 1.1 ## As if </option>
6111 wakaba 1.3 pop @{$self->{open_elements}};
6112 wakaba 1.1 }
6113    
6114 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
6115 wakaba 1.1 ## As if </optgroup>
6116 wakaba 1.3 pop @{$self->{open_elements}};
6117 wakaba 1.1 }
6118    
6119    
6120     {
6121     my $el;
6122    
6123     $el = $self->{document}->create_element_ns
6124     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6125    
6126     for my $attr_name (keys %{ $token->{attributes}}) {
6127     $el->set_attribute_ns (undef, [undef, $attr_name],
6128     $token->{attributes} ->{$attr_name}->{value});
6129     }
6130    
6131 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6132     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6133 wakaba 1.1 }
6134    
6135     $token = $self->_get_next_token;
6136     redo B;
6137     } elsif ($token->{tag_name} eq 'select') {
6138 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:select');
6139 wakaba 1.1 ## As if </select> instead
6140     ## have an element in table scope
6141     my $i;
6142 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6143     my $node = $self->{open_elements}->[$_];
6144 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6145     $i = $_;
6146     last INSCOPE;
6147     } elsif ({
6148     table => 1, html => 1,
6149     }->{$node->[1]}) {
6150     last INSCOPE;
6151     }
6152     } # INSCOPE
6153     unless (defined $i) {
6154 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:select');
6155 wakaba 1.1 ## Ignore the token
6156     $token = $self->_get_next_token;
6157     redo B;
6158     }
6159    
6160 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6161 wakaba 1.1
6162 wakaba 1.3 $self->_reset_insertion_mode;
6163 wakaba 1.1
6164     $token = $self->_get_next_token;
6165     redo B;
6166     } else {
6167     #
6168     }
6169     } elsif ($token->{type} eq 'end tag') {
6170     if ($token->{tag_name} eq 'optgroup') {
6171 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
6172     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
6173 wakaba 1.1 ## As if </option>
6174 wakaba 1.3 splice @{$self->{open_elements}}, -2;
6175     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
6176     pop @{$self->{open_elements}};
6177 wakaba 1.1 } else {
6178 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6179 wakaba 1.1 ## Ignore the token
6180     }
6181     $token = $self->_get_next_token;
6182     redo B;
6183     } elsif ($token->{tag_name} eq 'option') {
6184 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6185     pop @{$self->{open_elements}};
6186 wakaba 1.1 } else {
6187 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6188 wakaba 1.1 ## Ignore the token
6189     }
6190     $token = $self->_get_next_token;
6191     redo B;
6192     } elsif ($token->{tag_name} eq 'select') {
6193     ## have an element in table scope
6194     my $i;
6195 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6196     my $node = $self->{open_elements}->[$_];
6197 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6198     $i = $_;
6199     last INSCOPE;
6200     } elsif ({
6201     table => 1, html => 1,
6202     }->{$node->[1]}) {
6203     last INSCOPE;
6204     }
6205     } # INSCOPE
6206     unless (defined $i) {
6207 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6208 wakaba 1.1 ## Ignore the token
6209     $token = $self->_get_next_token;
6210     redo B;
6211     }
6212    
6213 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6214 wakaba 1.1
6215 wakaba 1.3 $self->_reset_insertion_mode;
6216 wakaba 1.1
6217     $token = $self->_get_next_token;
6218     redo B;
6219     } elsif ({
6220     caption => 1, table => 1, tbody => 1,
6221     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6222     }->{$token->{tag_name}}) {
6223 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6224 wakaba 1.1
6225     ## have an element in table scope
6226     my $i;
6227 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6228     my $node = $self->{open_elements}->[$_];
6229 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6230     $i = $_;
6231     last INSCOPE;
6232     } elsif ({
6233     table => 1, html => 1,
6234     }->{$node->[1]}) {
6235     last INSCOPE;
6236     }
6237     } # INSCOPE
6238     unless (defined $i) {
6239     ## Ignore the token
6240     $token = $self->_get_next_token;
6241     redo B;
6242     }
6243    
6244     ## As if </select>
6245     ## have an element in table scope
6246     undef $i;
6247 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6248     my $node = $self->{open_elements}->[$_];
6249 wakaba 1.1 if ($node->[1] eq 'select') {
6250     $i = $_;
6251     last INSCOPE;
6252     } elsif ({
6253     table => 1, html => 1,
6254     }->{$node->[1]}) {
6255     last INSCOPE;
6256     }
6257     } # INSCOPE
6258     unless (defined $i) {
6259 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:select');
6260 wakaba 1.1 ## Ignore the </select> token
6261     $token = $self->_get_next_token; ## TODO: ok?
6262     redo B;
6263     }
6264    
6265 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6266 wakaba 1.1
6267 wakaba 1.3 $self->_reset_insertion_mode;
6268 wakaba 1.1
6269     ## reprocess
6270     redo B;
6271     } else {
6272     #
6273     }
6274     } else {
6275     #
6276     }
6277    
6278 wakaba 1.3 $self->{parse_error}-> (type => 'in select:'.$token->{tag_name});
6279 wakaba 1.1 ## Ignore the token
6280     $token = $self->_get_next_token;
6281     redo B;
6282 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
6283 wakaba 1.1 if ($token->{type} eq 'character') {
6284     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6285 wakaba 1.35 my $data = $1;
6286 wakaba 1.1 ## As if in body
6287     $reconstruct_active_formatting_elements->($insert_to_current);
6288    
6289 wakaba 1.35 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6290 wakaba 1.1
6291     unless (length $token->{data}) {
6292     $token = $self->_get_next_token;
6293     redo B;
6294     }
6295     }
6296    
6297     #
6298 wakaba 1.36 $self->{parse_error}-> (type => 'after body:#character');
6299 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
6300     $self->{parse_error}-> (type => 'after body:'.$token->{tag_name});
6301     #
6302 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
6303     if ($token->{tag_name} eq 'html') {
6304 wakaba 1.3 if (defined $self->{inner_html_node}) {
6305     $self->{parse_error}-> (type => 'unmatched end tag:html');
6306     ## Ignore the token
6307     $token = $self->_get_next_token;
6308     redo B;
6309     } else {
6310 wakaba 1.35 $previous_insertion_mode = $self->{insertion_mode};
6311     $self->{insertion_mode} = 'trailing end';
6312 wakaba 1.3 $token = $self->_get_next_token;
6313     redo B;
6314     }
6315 wakaba 1.1 } else {
6316 wakaba 1.3 $self->{parse_error}-> (type => 'after body:/'.$token->{tag_name});
6317 wakaba 1.1 }
6318     } else {
6319 wakaba 1.36 die "$0: $token->{type}: Unknown token type";
6320 wakaba 1.1 }
6321    
6322 wakaba 1.3 $self->{insertion_mode} = 'in body';
6323 wakaba 1.1 ## reprocess
6324     redo B;
6325 wakaba 1.36 } elsif ($self->{insertion_mode} eq 'in frameset') {
6326     if ($token->{type} eq 'character') {
6327     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6328     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6329 wakaba 1.1
6330 wakaba 1.36 unless (length $token->{data}) {
6331 wakaba 1.1 $token = $self->_get_next_token;
6332     redo B;
6333 wakaba 1.36 }
6334     }
6335    
6336     $self->{parse_error}-> (type => 'in frameset:#character');
6337     ## Ignore the token
6338     $token = $self->_get_next_token;
6339     redo B;
6340     } elsif ($token->{type} eq 'start tag') {
6341     if ($token->{tag_name} eq 'frameset') {
6342    
6343 wakaba 1.1 {
6344     my $el;
6345    
6346     $el = $self->{document}->create_element_ns
6347     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6348    
6349     for my $attr_name (keys %{ $token->{attributes}}) {
6350     $el->set_attribute_ns (undef, [undef, $attr_name],
6351     $token->{attributes} ->{$attr_name}->{value});
6352     }
6353    
6354 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6355     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6356 wakaba 1.1 }
6357    
6358 wakaba 1.36 $token = $self->_get_next_token;
6359     redo B;
6360     } elsif ($token->{tag_name} eq 'frame') {
6361    
6362 wakaba 1.1 {
6363     my $el;
6364    
6365     $el = $self->{document}->create_element_ns
6366     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6367    
6368     for my $attr_name (keys %{ $token->{attributes}}) {
6369     $el->set_attribute_ns (undef, [undef, $attr_name],
6370     $token->{attributes} ->{$attr_name}->{value});
6371     }
6372    
6373 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6374     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6375 wakaba 1.1 }
6376    
6377 wakaba 1.36 pop @{$self->{open_elements}};
6378     $token = $self->_get_next_token;
6379     redo B;
6380     } elsif ($token->{tag_name} eq 'noframes') {
6381 wakaba 1.45 ## NOTE: As if in body.
6382     $parse_rcdata->(CDATA_CONTENT_MODEL, $insert_to_current);
6383 wakaba 1.36 redo B;
6384     } else {
6385     $self->{parse_error}-> (type => 'in frameset:'.$token->{tag_name});
6386     ## Ignore the token
6387     $token = $self->_get_next_token;
6388     redo B;
6389     }
6390     } elsif ($token->{type} eq 'end tag') {
6391     if ($token->{tag_name} eq 'frameset') {
6392     if ($self->{open_elements}->[-1]->[1] eq 'html' and
6393     @{$self->{open_elements}} == 1) {
6394     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6395     ## Ignore the token
6396     $token = $self->_get_next_token;
6397 wakaba 1.1 } else {
6398 wakaba 1.36 pop @{$self->{open_elements}};
6399     $token = $self->_get_next_token;
6400 wakaba 1.1 }
6401 wakaba 1.36
6402     if (not defined $self->{inner_html_node} and
6403     $self->{open_elements}->[-1]->[1] ne 'frameset') {
6404     $self->{insertion_mode} = 'after frameset';
6405 wakaba 1.3 }
6406 wakaba 1.36 redo B;
6407     } else {
6408     $self->{parse_error}-> (type => 'in frameset:/'.$token->{tag_name});
6409 wakaba 1.1 ## Ignore the token
6410     $token = $self->_get_next_token;
6411     redo B;
6412 wakaba 1.36 }
6413     } else {
6414     die "$0: $token->{type}: Unknown token type";
6415     }
6416     } elsif ($self->{insertion_mode} eq 'after frameset') {
6417     if ($token->{type} eq 'character') {
6418 wakaba 1.1 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6419 wakaba 1.35 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6420 wakaba 1.1
6421     unless (length $token->{data}) {
6422     $token = $self->_get_next_token;
6423     redo B;
6424     }
6425     }
6426    
6427 wakaba 1.35 if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
6428     $self->{parse_error}-> (type => 'after frameset:#character');
6429    
6430     ## Ignore the token.
6431     if (length $token->{data}) {
6432     ## reprocess the rest of characters
6433     } else {
6434     $token = $self->_get_next_token;
6435     }
6436     redo B;
6437     }
6438 wakaba 1.36
6439     die qq[$0: Character "$token->{data}"];
6440     } elsif ($token->{type} eq 'start tag') {
6441     if ($token->{tag_name} eq 'noframes') {
6442 wakaba 1.45 ## NOTE: As if in body.
6443     $parse_rcdata->(CDATA_CONTENT_MODEL, $insert_to_current);
6444 wakaba 1.36 redo B;
6445     } else {
6446     $self->{parse_error}-> (type => 'after frameset:'.$token->{tag_name});
6447 wakaba 1.1 ## Ignore the token
6448     $token = $self->_get_next_token;
6449     redo B;
6450 wakaba 1.36 }
6451     } elsif ($token->{type} eq 'end tag') {
6452     if ($token->{tag_name} eq 'html') {
6453     $previous_insertion_mode = $self->{insertion_mode};
6454     $self->{insertion_mode} = 'trailing end';
6455     $token = $self->_get_next_token;
6456     redo B;
6457 wakaba 1.1 } else {
6458 wakaba 1.36 $self->{parse_error}-> (type => 'after frameset:/'.$token->{tag_name});
6459     ## Ignore the token
6460     $token = $self->_get_next_token;
6461     redo B;
6462 wakaba 1.1 }
6463 wakaba 1.36 } else {
6464     die "$0: $token->{type}: Unknown token type";
6465 wakaba 1.1 }
6466 wakaba 1.36
6467     ## ISSUE: An issue in spec here
6468 wakaba 1.35 } elsif ($self->{insertion_mode} eq 'trailing end') {
6469 wakaba 1.1 ## states in the main stage is preserved yet # MUST
6470    
6471 wakaba 1.36 if ($token->{type} eq 'character') {
6472 wakaba 1.1 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6473     my $data = $1;
6474     ## As if in the main phase.
6475     ## NOTE: The insertion mode in the main phase
6476     ## just before the phase has been changed to the trailing
6477     ## end phase is either "after body" or "after frameset".
6478 wakaba 1.35 $reconstruct_active_formatting_elements->($insert_to_current);
6479 wakaba 1.1
6480 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
6481 wakaba 1.1
6482     unless (length $token->{data}) {
6483     $token = $self->_get_next_token;
6484     redo B;
6485     }
6486     }
6487    
6488 wakaba 1.3 $self->{parse_error}-> (type => 'after html:#character');
6489 wakaba 1.35 $self->{insertion_mode} = $previous_insertion_mode;
6490 wakaba 1.1 ## reprocess
6491     redo B;
6492 wakaba 1.36 } elsif ($token->{type} eq 'start tag') {
6493     $self->{parse_error}-> (type => 'after html:'.$token->{tag_name});
6494     $self->{insertion_mode} = $previous_insertion_mode;
6495     ## reprocess
6496     redo B;
6497     } elsif ($token->{type} eq 'end tag') {
6498     $self->{parse_error}-> (type => 'after html:/'.$token->{tag_name});
6499 wakaba 1.35 $self->{insertion_mode} = $previous_insertion_mode;
6500 wakaba 1.1 ## reprocess
6501     redo B;
6502     } else {
6503     die "$0: $token->{type}: Unknown token";
6504     }
6505 wakaba 1.36 } else {
6506     die "$0: $self->{insertion_mode}: Unknown insertion mode";
6507 wakaba 1.1 }
6508     } # B
6509    
6510     ## Stop parsing # MUST
6511    
6512     ## TODO: script stuffs
6513 wakaba 1.3 } # _tree_construct_main
6514    
6515     sub set_inner_html ($$$) {
6516     my $class = shift;
6517     my $node = shift;
6518     my $s = \$_[0];
6519     my $onerror = $_[1];
6520    
6521     my $nt = $node->node_type;
6522     if ($nt == 9) {
6523     # MUST
6524    
6525     ## Step 1 # MUST
6526     ## TODO: If the document has an active parser, ...
6527     ## ISSUE: There is an issue in the spec.
6528    
6529     ## Step 2 # MUST
6530     my @cn = @{$node->child_nodes};
6531     for (@cn) {
6532     $node->remove_child ($_);
6533     }
6534    
6535     ## Step 3, 4, 5 # MUST
6536     $class->parse_string ($$s => $node, $onerror);
6537     } elsif ($nt == 1) {
6538     ## TODO: If non-html element
6539    
6540     ## NOTE: Most of this code is copied from |parse_string|
6541    
6542     ## Step 1 # MUST
6543 wakaba 1.14 my $this_doc = $node->owner_document;
6544     my $doc = $this_doc->implementation->create_document;
6545 wakaba 1.18 $doc->manakai_is_html (1);
6546 wakaba 1.3 my $p = $class->new;
6547     $p->{document} = $doc;
6548    
6549     ## Step 9 # MUST
6550     my $i = 0;
6551     my $line = 1;
6552     my $column = 0;
6553     $p->{set_next_input_character} = sub {
6554     my $self = shift;
6555 wakaba 1.14
6556     pop @{$self->{prev_input_character}};
6557     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
6558    
6559 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
6560     $self->{next_input_character} = ord substr $$s, $i++, 1;
6561     $column++;
6562 wakaba 1.4
6563     if ($self->{next_input_character} == 0x000A) { # LF
6564     $line++;
6565     $column = 0;
6566     } elsif ($self->{next_input_character} == 0x000D) { # CR
6567 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6568 wakaba 1.3 $self->{next_input_character} = 0x000A; # LF # MUST
6569     $line++;
6570 wakaba 1.4 $column = 0;
6571 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
6572     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6573     } elsif ($self->{next_input_character} == 0x0000) { # NULL
6574 wakaba 1.14 $self->{parse_error}-> (type => 'NULL');
6575 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6576     }
6577     };
6578 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
6579     $p->{next_input_character} = -1;
6580 wakaba 1.3
6581     my $ponerror = $onerror || sub {
6582     my (%opt) = @_;
6583     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
6584     };
6585     $p->{parse_error} = sub {
6586     $ponerror->(@_, line => $line, column => $column);
6587     };
6588    
6589     $p->_initialize_tokenizer;
6590     $p->_initialize_tree_constructor;
6591    
6592     ## Step 2
6593     my $node_ln = $node->local_name;
6594 wakaba 1.41 $p->{content_model} = {
6595     title => RCDATA_CONTENT_MODEL,
6596     textarea => RCDATA_CONTENT_MODEL,
6597     style => CDATA_CONTENT_MODEL,
6598     script => CDATA_CONTENT_MODEL,
6599     xmp => CDATA_CONTENT_MODEL,
6600     iframe => CDATA_CONTENT_MODEL,
6601     noembed => CDATA_CONTENT_MODEL,
6602     noframes => CDATA_CONTENT_MODEL,
6603     noscript => CDATA_CONTENT_MODEL,
6604     plaintext => PLAINTEXT_CONTENT_MODEL,
6605     }->{$node_ln};
6606     $p->{content_model} = PCDATA_CONTENT_MODEL
6607     unless defined $p->{content_model};
6608     ## ISSUE: What is "the name of the element"? local name?
6609 wakaba 1.3
6610     $p->{inner_html_node} = [$node, $node_ln];
6611    
6612     ## Step 4
6613     my $root = $doc->create_element_ns
6614     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6615    
6616     ## Step 5 # MUST
6617     $doc->append_child ($root);
6618    
6619     ## Step 6 # MUST
6620     push @{$p->{open_elements}}, [$root, 'html'];
6621    
6622     undef $p->{head_element};
6623    
6624     ## Step 7 # MUST
6625     $p->_reset_insertion_mode;
6626    
6627     ## Step 8 # MUST
6628     my $anode = $node;
6629     AN: while (defined $anode) {
6630     if ($anode->node_type == 1) {
6631     my $nsuri = $anode->namespace_uri;
6632     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6633     if ($anode->local_name eq 'form') { ## TODO: case?
6634     $p->{form_element} = $anode;
6635     last AN;
6636     }
6637     }
6638     }
6639     $anode = $anode->parent_node;
6640     } # AN
6641    
6642     ## Step 3 # MUST
6643     ## Step 10 # MUST
6644     {
6645     my $self = $p;
6646     $token = $self->_get_next_token;
6647     }
6648     $p->_tree_construction_main;
6649    
6650     ## Step 11 # MUST
6651     my @cn = @{$node->child_nodes};
6652     for (@cn) {
6653     $node->remove_child ($_);
6654     }
6655     ## ISSUE: mutation events? read-only?
6656    
6657     ## Step 12 # MUST
6658     @cn = @{$root->child_nodes};
6659     for (@cn) {
6660 wakaba 1.14 $this_doc->adopt_node ($_);
6661 wakaba 1.3 $node->append_child ($_);
6662     }
6663 wakaba 1.14 ## ISSUE: mutation events?
6664 wakaba 1.3
6665     $p->_terminate_tree_constructor;
6666     } else {
6667     die "$0: |set_inner_html| is not defined for node of type $nt";
6668     }
6669     } # set_inner_html
6670    
6671     } # tree construction stage
6672 wakaba 1.1
6673     sub get_inner_html ($$$) {
6674 wakaba 1.3 my (undef, $node, $on_error) = @_;
6675 wakaba 1.1
6676     ## Step 1
6677     my $s = '';
6678    
6679     my $in_cdata;
6680     my $parent = $node;
6681     while (defined $parent) {
6682     if ($parent->node_type == 1 and
6683     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
6684     {
6685     style => 1, script => 1, xmp => 1, iframe => 1,
6686     noembed => 1, noframes => 1, noscript => 1,
6687     }->{$parent->local_name}) { ## TODO: case thingy
6688     $in_cdata = 1;
6689     }
6690     $parent = $parent->parent_node;
6691     }
6692    
6693     ## Step 2
6694     my @node = @{$node->child_nodes};
6695     C: while (@node) {
6696     my $child = shift @node;
6697     unless (ref $child) {
6698     if ($child eq 'cdata-out') {
6699     $in_cdata = 0;
6700     } else {
6701     $s .= $child; # end tag
6702     }
6703     next C;
6704     }
6705    
6706     my $nt = $child->node_type;
6707     if ($nt == 1) { # Element
6708 wakaba 1.27 my $tag_name = $child->tag_name; ## TODO: manakai_tag_name
6709 wakaba 1.1 $s .= '<' . $tag_name;
6710 wakaba 1.27 ## NOTE: Non-HTML case:
6711     ## <http://permalink.gmane.org/gmane.org.w3c.whatwg.discuss/11191>
6712 wakaba 1.1
6713     my @attrs = @{$child->attributes}; # sort order MUST be stable
6714     for my $attr (@attrs) { # order is implementation dependent
6715 wakaba 1.27 my $attr_name = $attr->name; ## TODO: manakai_name
6716 wakaba 1.1 $s .= ' ' . $attr_name . '="';
6717     my $attr_value = $attr->value;
6718     ## escape
6719     $attr_value =~ s/&/&amp;/g;
6720     $attr_value =~ s/</&lt;/g;
6721     $attr_value =~ s/>/&gt;/g;
6722     $attr_value =~ s/"/&quot;/g;
6723     $s .= $attr_value . '"';
6724     }
6725     $s .= '>';
6726    
6727     next C if {
6728     area => 1, base => 1, basefont => 1, bgsound => 1,
6729     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
6730     img => 1, input => 1, link => 1, meta => 1, param => 1,
6731     spacer => 1, wbr => 1,
6732     }->{$tag_name};
6733    
6734 wakaba 1.23 $s .= "\x0A" if $tag_name eq 'pre' or $tag_name eq 'textarea';
6735    
6736 wakaba 1.1 if (not $in_cdata and {
6737     style => 1, script => 1, xmp => 1, iframe => 1,
6738     noembed => 1, noframes => 1, noscript => 1,
6739 wakaba 1.26 plaintext => 1,
6740 wakaba 1.1 }->{$tag_name}) {
6741     unshift @node, 'cdata-out';
6742     $in_cdata = 1;
6743     }
6744    
6745     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
6746     } elsif ($nt == 3 or $nt == 4) {
6747     if ($in_cdata) {
6748     $s .= $child->data;
6749     } else {
6750     my $value = $child->data;
6751     $value =~ s/&/&amp;/g;
6752     $value =~ s/</&lt;/g;
6753     $value =~ s/>/&gt;/g;
6754     $value =~ s/"/&quot;/g;
6755     $s .= $value;
6756     }
6757     } elsif ($nt == 8) {
6758     $s .= '<!--' . $child->data . '-->';
6759     } elsif ($nt == 10) {
6760     $s .= '<!DOCTYPE ' . $child->name . '>';
6761     } elsif ($nt == 5) { # entrefs
6762     push @node, @{$child->child_nodes};
6763     } else {
6764     $on_error->($child) if defined $on_error;
6765     }
6766     ## ISSUE: This code does not support PIs.
6767     } # C
6768    
6769     ## Step 3
6770     return \$s;
6771     } # get_inner_html
6772    
6773     1;
6774 wakaba 1.52 # $Date: 2007/07/21 10:39:45 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24