/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.43 - (hide annotations) (download)
Sat Jul 21 06:59:16 2007 UTC (17 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.42: +177 -185 lines
++ whatpm/t/ChangeLog	21 Jul 2007 06:59:10 -0000
	* tree-test-1.dat: More tests for start tags "in caption"
	insertion mode and for tags "in cell" insertion
	mode are added.

2007-07-21  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	21 Jul 2007 06:57:52 -0000
	* HTML.pm.src: Codes for "in body" and "in caption"
	insertion modes are merged.

2007-07-21  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.43 our $VERSION=do{my @r=(q$Revision: 1.41 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 ## ISSUE:
6     ## var doc = implementation.createDocument (null, null, null);
7     ## doc.write ('');
8     ## alert (doc.compatMode);
9 wakaba 1.1
10 wakaba 1.31 ## ISSUE: HTML5 revision 967 says that the encoding layer MUST NOT
11     ## strip BOM and the HTML layer MUST ignore it. Whether we can do it
12     ## is not yet clear.
13     ## "{U+FEFF}..." in UTF-16BE/UTF-16LE is three or four characters?
14     ## "{U+FEFF}..." in GB18030?
15    
16 wakaba 1.1 my $permitted_slash_tag_name = {
17     base => 1,
18     link => 1,
19     meta => 1,
20     hr => 1,
21     br => 1,
22     img=> 1,
23     embed => 1,
24     param => 1,
25     area => 1,
26     col => 1,
27     input => 1,
28     };
29    
30 wakaba 1.4 my $c1_entity_char = {
31 wakaba 1.9 0x80 => 0x20AC,
32     0x81 => 0xFFFD,
33     0x82 => 0x201A,
34     0x83 => 0x0192,
35     0x84 => 0x201E,
36     0x85 => 0x2026,
37     0x86 => 0x2020,
38     0x87 => 0x2021,
39     0x88 => 0x02C6,
40     0x89 => 0x2030,
41     0x8A => 0x0160,
42     0x8B => 0x2039,
43     0x8C => 0x0152,
44     0x8D => 0xFFFD,
45     0x8E => 0x017D,
46     0x8F => 0xFFFD,
47     0x90 => 0xFFFD,
48     0x91 => 0x2018,
49     0x92 => 0x2019,
50     0x93 => 0x201C,
51     0x94 => 0x201D,
52     0x95 => 0x2022,
53     0x96 => 0x2013,
54     0x97 => 0x2014,
55     0x98 => 0x02DC,
56     0x99 => 0x2122,
57     0x9A => 0x0161,
58     0x9B => 0x203A,
59     0x9C => 0x0153,
60     0x9D => 0xFFFD,
61     0x9E => 0x017E,
62     0x9F => 0x0178,
63 wakaba 1.4 }; # $c1_entity_char
64 wakaba 1.1
65     my $special_category = {
66     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
67     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
68     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
69     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
70     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
71     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
72     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
73     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
74     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
75     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
76     };
77     my $scoping_category = {
78     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
79     table => 1, td => 1, th => 1,
80     };
81     my $formatting_category = {
82     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
83     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
84     };
85     # $phrasing_category: all other elements
86    
87     sub parse_string ($$$;$) {
88     my $self = shift->new;
89     my $s = \$_[0];
90     $self->{document} = $_[1];
91    
92 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
93    
94 wakaba 1.1 my $i = 0;
95 wakaba 1.3 my $line = 1;
96     my $column = 0;
97 wakaba 1.1 $self->{set_next_input_character} = sub {
98     my $self = shift;
99 wakaba 1.13
100     pop @{$self->{prev_input_character}};
101     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
102    
103 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
104     $self->{next_input_character} = ord substr $$s, $i++, 1;
105 wakaba 1.3 $column++;
106 wakaba 1.1
107 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
108     $line++;
109     $column = 0;
110     } elsif ($self->{next_input_character} == 0x000D) { # CR
111 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
112 wakaba 1.1 $self->{next_input_character} = 0x000A; # LF # MUST
113 wakaba 1.3 $line++;
114 wakaba 1.4 $column = 0;
115 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
116     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
117     } elsif ($self->{next_input_character} == 0x0000) { # NULL
118 wakaba 1.8 $self->{parse_error}-> (type => 'NULL');
119 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
120     }
121     };
122 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
123     $self->{next_input_character} = -1;
124 wakaba 1.1
125 wakaba 1.3 my $onerror = $_[2] || sub {
126     my (%opt) = @_;
127     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
128     };
129     $self->{parse_error} = sub {
130     $onerror->(@_, line => $line, column => $column);
131 wakaba 1.1 };
132    
133     $self->_initialize_tokenizer;
134     $self->_initialize_tree_constructor;
135     $self->_construct_tree;
136     $self->_terminate_tree_constructor;
137    
138     return $self->{document};
139     } # parse_string
140    
141     sub new ($) {
142     my $class = shift;
143     my $self = bless {}, $class;
144     $self->{set_next_input_character} = sub {
145     $self->{next_input_character} = -1;
146     };
147     $self->{parse_error} = sub {
148     #
149     };
150     return $self;
151     } # new
152    
153 wakaba 1.41 sub CM_ENTITY () { 0b001 } # & markup in data
154     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
155     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
156    
157     sub PLAINTEXT_CONTENT_MODEL () { 0 }
158     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
159     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
160     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
161    
162 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
163    
164     sub _initialize_tokenizer ($) {
165     my $self = shift;
166     $self->{state} = 'data'; # MUST
167 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
168 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
169     undef $self->{current_attribute};
170     undef $self->{last_emitted_start_tag_name};
171     undef $self->{last_attribute_value_state};
172     $self->{char} = [];
173     # $self->{next_input_character}
174    
175     if (@{$self->{char}}) {
176     $self->{next_input_character} = shift @{$self->{char}};
177     } else {
178     $self->{set_next_input_character}->($self);
179     }
180    
181     $self->{token} = [];
182 wakaba 1.18 # $self->{escape}
183 wakaba 1.1 } # _initialize_tokenizer
184    
185     ## A token has:
186     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
187     ## 'character', or 'end-of-file'
188 wakaba 1.18 ## ->{name} (DOCTYPE, start tag (tag name), end tag (tag name))
189     ## ->{public_identifier} (DOCTYPE)
190     ## ->{system_identifier} (DOCTYPE)
191     ## ->{correct} == 1 or 0 (DOCTYPE)
192 wakaba 1.1 ## ->{attributes} isa HASH (start tag, end tag)
193     ## ->{data} (comment, character)
194    
195     ## Emitted token MUST immediately be handled by the tree construction state.
196    
197     ## Before each step, UA MAY check to see if either one of the scripts in
198     ## "list of scripts that will execute as soon as possible" or the first
199     ## script in the "list of scripts that will execute asynchronously",
200     ## has completed loading. If one has, then it MUST be executed
201     ## and removed from the list.
202    
203     sub _get_next_token ($) {
204     my $self = shift;
205     if (@{$self->{token}}) {
206     return shift @{$self->{token}};
207     }
208    
209     A: {
210     if ($self->{state} eq 'data') {
211     if ($self->{next_input_character} == 0x0026) { # &
212 wakaba 1.41 if ($self->{content_model} & CM_ENTITY) { # PCDATA | RCDATA
213 wakaba 1.1 $self->{state} = 'entity data';
214    
215     if (@{$self->{char}}) {
216     $self->{next_input_character} = shift @{$self->{char}};
217     } else {
218     $self->{set_next_input_character}->($self);
219     }
220    
221     redo A;
222     } else {
223     #
224     }
225 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
226 wakaba 1.41 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
227 wakaba 1.13 unless ($self->{escape}) {
228     if ($self->{prev_input_character}->[0] == 0x002D and # -
229     $self->{prev_input_character}->[1] == 0x0021 and # !
230     $self->{prev_input_character}->[2] == 0x003C) { # <
231     $self->{escape} = 1;
232     }
233     }
234     }
235    
236     #
237 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
238 wakaba 1.41 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
239     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
240 wakaba 1.13 not $self->{escape})) {
241 wakaba 1.1 $self->{state} = 'tag open';
242    
243     if (@{$self->{char}}) {
244     $self->{next_input_character} = shift @{$self->{char}};
245     } else {
246     $self->{set_next_input_character}->($self);
247     }
248    
249     redo A;
250     } else {
251     #
252     }
253 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
254     if ($self->{escape} and
255 wakaba 1.41 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
256 wakaba 1.13 if ($self->{prev_input_character}->[0] == 0x002D and # -
257     $self->{prev_input_character}->[1] == 0x002D) { # -
258     delete $self->{escape};
259     }
260     }
261    
262     #
263 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
264     return ({type => 'end-of-file'});
265     last A; ## TODO: ok?
266     }
267     # Anything else
268     my $token = {type => 'character',
269     data => chr $self->{next_input_character}};
270     ## Stay in the data state
271    
272     if (@{$self->{char}}) {
273     $self->{next_input_character} = shift @{$self->{char}};
274     } else {
275     $self->{set_next_input_character}->($self);
276     }
277    
278    
279     return ($token);
280    
281     redo A;
282     } elsif ($self->{state} eq 'entity data') {
283     ## (cannot happen in CDATA state)
284    
285 wakaba 1.26 my $token = $self->_tokenize_attempt_to_consume_an_entity (0);
286 wakaba 1.1
287     $self->{state} = 'data';
288     # next-input-character is already done
289    
290     unless (defined $token) {
291     return ({type => 'character', data => '&'});
292     } else {
293     return ($token);
294     }
295    
296     redo A;
297     } elsif ($self->{state} eq 'tag open') {
298 wakaba 1.41 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
299 wakaba 1.1 if ($self->{next_input_character} == 0x002F) { # /
300    
301     if (@{$self->{char}}) {
302     $self->{next_input_character} = shift @{$self->{char}};
303     } else {
304     $self->{set_next_input_character}->($self);
305     }
306    
307     $self->{state} = 'close tag open';
308     redo A;
309     } else {
310     ## reconsume
311     $self->{state} = 'data';
312    
313     return ({type => 'character', data => '<'});
314    
315     redo A;
316     }
317 wakaba 1.41 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
318 wakaba 1.1 if ($self->{next_input_character} == 0x0021) { # !
319     $self->{state} = 'markup declaration open';
320    
321     if (@{$self->{char}}) {
322     $self->{next_input_character} = shift @{$self->{char}};
323     } else {
324     $self->{set_next_input_character}->($self);
325     }
326    
327     redo A;
328     } elsif ($self->{next_input_character} == 0x002F) { # /
329     $self->{state} = 'close tag open';
330    
331     if (@{$self->{char}}) {
332     $self->{next_input_character} = shift @{$self->{char}};
333     } else {
334     $self->{set_next_input_character}->($self);
335     }
336    
337     redo A;
338     } elsif (0x0041 <= $self->{next_input_character} and
339     $self->{next_input_character} <= 0x005A) { # A..Z
340     $self->{current_token}
341     = {type => 'start tag',
342     tag_name => chr ($self->{next_input_character} + 0x0020)};
343     $self->{state} = 'tag name';
344    
345     if (@{$self->{char}}) {
346     $self->{next_input_character} = shift @{$self->{char}};
347     } else {
348     $self->{set_next_input_character}->($self);
349     }
350    
351     redo A;
352     } elsif (0x0061 <= $self->{next_input_character} and
353     $self->{next_input_character} <= 0x007A) { # a..z
354     $self->{current_token} = {type => 'start tag',
355     tag_name => chr ($self->{next_input_character})};
356     $self->{state} = 'tag name';
357    
358     if (@{$self->{char}}) {
359     $self->{next_input_character} = shift @{$self->{char}};
360     } else {
361     $self->{set_next_input_character}->($self);
362     }
363    
364     redo A;
365     } elsif ($self->{next_input_character} == 0x003E) { # >
366 wakaba 1.3 $self->{parse_error}-> (type => 'empty start tag');
367 wakaba 1.1 $self->{state} = 'data';
368    
369     if (@{$self->{char}}) {
370     $self->{next_input_character} = shift @{$self->{char}};
371     } else {
372     $self->{set_next_input_character}->($self);
373     }
374    
375    
376     return ({type => 'character', data => '<>'});
377    
378     redo A;
379     } elsif ($self->{next_input_character} == 0x003F) { # ?
380 wakaba 1.3 $self->{parse_error}-> (type => 'pio');
381 wakaba 1.1 $self->{state} = 'bogus comment';
382     ## $self->{next_input_character} is intentionally left as is
383     redo A;
384     } else {
385 wakaba 1.3 $self->{parse_error}-> (type => 'bare stago');
386 wakaba 1.1 $self->{state} = 'data';
387     ## reconsume
388    
389     return ({type => 'character', data => '<'});
390    
391     redo A;
392     }
393     } else {
394 wakaba 1.41 die "$0: $self->{content_model} in tag open";
395 wakaba 1.1 }
396     } elsif ($self->{state} eq 'close tag open') {
397 wakaba 1.41 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
398 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
399 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
400 wakaba 1.23 my @next_char;
401     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
402     push @next_char, $self->{next_input_character};
403     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
404     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
405     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
406    
407 wakaba 1.1 if (@{$self->{char}}) {
408     $self->{next_input_character} = shift @{$self->{char}};
409     } else {
410     $self->{set_next_input_character}->($self);
411     }
412    
413 wakaba 1.23 next TAGNAME;
414     } else {
415     $self->{next_input_character} = shift @next_char; # reconsume
416     unshift @{$self->{char}}, (@next_char);
417     $self->{state} = 'data';
418    
419     return ({type => 'character', data => '</'});
420    
421     redo A;
422     }
423     }
424     push @next_char, $self->{next_input_character};
425    
426     unless ($self->{next_input_character} == 0x0009 or # HT
427     $self->{next_input_character} == 0x000A or # LF
428     $self->{next_input_character} == 0x000B or # VT
429     $self->{next_input_character} == 0x000C or # FF
430     $self->{next_input_character} == 0x0020 or # SP
431     $self->{next_input_character} == 0x003E or # >
432     $self->{next_input_character} == 0x002F or # /
433     $self->{next_input_character} == -1) {
434 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
435     unshift @{$self->{char}}, (@next_char);
436     $self->{state} = 'data';
437     return ({type => 'character', data => '</'});
438     redo A;
439 wakaba 1.23 } else {
440     $self->{next_input_character} = shift @next_char;
441     unshift @{$self->{char}}, (@next_char);
442     # and consume...
443 wakaba 1.1 }
444 wakaba 1.23 } else {
445     ## No start tag token has ever been emitted
446     # next-input-character is already done
447 wakaba 1.1 $self->{state} = 'data';
448     return ({type => 'character', data => '</'});
449     redo A;
450     }
451     }
452    
453     if (0x0041 <= $self->{next_input_character} and
454     $self->{next_input_character} <= 0x005A) { # A..Z
455     $self->{current_token} = {type => 'end tag',
456     tag_name => chr ($self->{next_input_character} + 0x0020)};
457     $self->{state} = 'tag name';
458    
459     if (@{$self->{char}}) {
460     $self->{next_input_character} = shift @{$self->{char}};
461     } else {
462     $self->{set_next_input_character}->($self);
463     }
464    
465     redo A;
466     } elsif (0x0061 <= $self->{next_input_character} and
467     $self->{next_input_character} <= 0x007A) { # a..z
468     $self->{current_token} = {type => 'end tag',
469     tag_name => chr ($self->{next_input_character})};
470     $self->{state} = 'tag name';
471    
472     if (@{$self->{char}}) {
473     $self->{next_input_character} = shift @{$self->{char}};
474     } else {
475     $self->{set_next_input_character}->($self);
476     }
477    
478     redo A;
479     } elsif ($self->{next_input_character} == 0x003E) { # >
480 wakaba 1.3 $self->{parse_error}-> (type => 'empty end tag');
481 wakaba 1.1 $self->{state} = 'data';
482    
483     if (@{$self->{char}}) {
484     $self->{next_input_character} = shift @{$self->{char}};
485     } else {
486     $self->{set_next_input_character}->($self);
487     }
488    
489     redo A;
490     } elsif ($self->{next_input_character} == -1) {
491 wakaba 1.3 $self->{parse_error}-> (type => 'bare etago');
492 wakaba 1.1 $self->{state} = 'data';
493     # reconsume
494    
495     return ({type => 'character', data => '</'});
496    
497     redo A;
498     } else {
499 wakaba 1.3 $self->{parse_error}-> (type => 'bogus end tag');
500 wakaba 1.1 $self->{state} = 'bogus comment';
501     ## $self->{next_input_character} is intentionally left as is
502     redo A;
503     }
504     } elsif ($self->{state} eq 'tag name') {
505     if ($self->{next_input_character} == 0x0009 or # HT
506     $self->{next_input_character} == 0x000A or # LF
507     $self->{next_input_character} == 0x000B or # VT
508     $self->{next_input_character} == 0x000C or # FF
509     $self->{next_input_character} == 0x0020) { # SP
510     $self->{state} = 'before attribute name';
511    
512     if (@{$self->{char}}) {
513     $self->{next_input_character} = shift @{$self->{char}};
514     } else {
515     $self->{set_next_input_character}->($self);
516     }
517    
518     redo A;
519     } elsif ($self->{next_input_character} == 0x003E) { # >
520     if ($self->{current_token}->{type} eq 'start tag') {
521 wakaba 1.28 $self->{current_token}->{first_start_tag}
522     = not defined $self->{last_emitted_start_tag_name};
523 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
524     } elsif ($self->{current_token}->{type} eq 'end tag') {
525 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
526 wakaba 1.1 if ($self->{current_token}->{attributes}) {
527 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
528 wakaba 1.1 }
529     } else {
530     die "$0: $self->{current_token}->{type}: Unknown token type";
531     }
532     $self->{state} = 'data';
533    
534     if (@{$self->{char}}) {
535     $self->{next_input_character} = shift @{$self->{char}};
536     } else {
537     $self->{set_next_input_character}->($self);
538     }
539    
540    
541     return ($self->{current_token}); # start tag or end tag
542    
543     redo A;
544     } elsif (0x0041 <= $self->{next_input_character} and
545     $self->{next_input_character} <= 0x005A) { # A..Z
546     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
547     # start tag or end tag
548     ## Stay in this state
549    
550     if (@{$self->{char}}) {
551     $self->{next_input_character} = shift @{$self->{char}};
552     } else {
553     $self->{set_next_input_character}->($self);
554     }
555    
556     redo A;
557 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
558 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
559 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
560 wakaba 1.28 $self->{current_token}->{first_start_tag}
561     = not defined $self->{last_emitted_start_tag_name};
562 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
563     } elsif ($self->{current_token}->{type} eq 'end tag') {
564 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
565 wakaba 1.1 if ($self->{current_token}->{attributes}) {
566 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
567 wakaba 1.1 }
568     } else {
569     die "$0: $self->{current_token}->{type}: Unknown token type";
570     }
571     $self->{state} = 'data';
572     # reconsume
573    
574     return ($self->{current_token}); # start tag or end tag
575    
576     redo A;
577     } elsif ($self->{next_input_character} == 0x002F) { # /
578    
579     if (@{$self->{char}}) {
580     $self->{next_input_character} = shift @{$self->{char}};
581     } else {
582     $self->{set_next_input_character}->($self);
583     }
584    
585     if ($self->{next_input_character} == 0x003E and # >
586     $self->{current_token}->{type} eq 'start tag' and
587     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
588     # permitted slash
589     #
590     } else {
591 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
592 wakaba 1.1 }
593     $self->{state} = 'before attribute name';
594     # next-input-character is already done
595     redo A;
596     } else {
597     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
598     # start tag or end tag
599     ## Stay in the state
600    
601     if (@{$self->{char}}) {
602     $self->{next_input_character} = shift @{$self->{char}};
603     } else {
604     $self->{set_next_input_character}->($self);
605     }
606    
607     redo A;
608     }
609     } elsif ($self->{state} eq 'before attribute name') {
610     if ($self->{next_input_character} == 0x0009 or # HT
611     $self->{next_input_character} == 0x000A or # LF
612     $self->{next_input_character} == 0x000B or # VT
613     $self->{next_input_character} == 0x000C or # FF
614     $self->{next_input_character} == 0x0020) { # SP
615     ## Stay in the state
616    
617     if (@{$self->{char}}) {
618     $self->{next_input_character} = shift @{$self->{char}};
619     } else {
620     $self->{set_next_input_character}->($self);
621     }
622    
623     redo A;
624     } elsif ($self->{next_input_character} == 0x003E) { # >
625     if ($self->{current_token}->{type} eq 'start tag') {
626 wakaba 1.28 $self->{current_token}->{first_start_tag}
627     = not defined $self->{last_emitted_start_tag_name};
628 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
629     } elsif ($self->{current_token}->{type} eq 'end tag') {
630 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
631 wakaba 1.1 if ($self->{current_token}->{attributes}) {
632 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
633 wakaba 1.1 }
634     } else {
635     die "$0: $self->{current_token}->{type}: Unknown token type";
636     }
637     $self->{state} = 'data';
638    
639     if (@{$self->{char}}) {
640     $self->{next_input_character} = shift @{$self->{char}};
641     } else {
642     $self->{set_next_input_character}->($self);
643     }
644    
645    
646     return ($self->{current_token}); # start tag or end tag
647    
648     redo A;
649     } elsif (0x0041 <= $self->{next_input_character} and
650     $self->{next_input_character} <= 0x005A) { # A..Z
651     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
652     value => ''};
653     $self->{state} = 'attribute name';
654    
655     if (@{$self->{char}}) {
656     $self->{next_input_character} = shift @{$self->{char}};
657     } else {
658     $self->{set_next_input_character}->($self);
659     }
660    
661     redo A;
662     } elsif ($self->{next_input_character} == 0x002F) { # /
663    
664     if (@{$self->{char}}) {
665     $self->{next_input_character} = shift @{$self->{char}};
666     } else {
667     $self->{set_next_input_character}->($self);
668     }
669    
670     if ($self->{next_input_character} == 0x003E and # >
671     $self->{current_token}->{type} eq 'start tag' and
672     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
673     # permitted slash
674     #
675     } else {
676 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
677 wakaba 1.1 }
678     ## Stay in the state
679     # next-input-character is already done
680     redo A;
681 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
682 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
683 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
684 wakaba 1.28 $self->{current_token}->{first_start_tag}
685     = not defined $self->{last_emitted_start_tag_name};
686 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
687     } elsif ($self->{current_token}->{type} eq 'end tag') {
688 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
689 wakaba 1.1 if ($self->{current_token}->{attributes}) {
690 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
691 wakaba 1.1 }
692     } else {
693     die "$0: $self->{current_token}->{type}: Unknown token type";
694     }
695     $self->{state} = 'data';
696     # reconsume
697    
698     return ($self->{current_token}); # start tag or end tag
699    
700     redo A;
701     } else {
702     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
703     value => ''};
704     $self->{state} = 'attribute name';
705    
706     if (@{$self->{char}}) {
707     $self->{next_input_character} = shift @{$self->{char}};
708     } else {
709     $self->{set_next_input_character}->($self);
710     }
711    
712     redo A;
713     }
714     } elsif ($self->{state} eq 'attribute name') {
715     my $before_leave = sub {
716     if (exists $self->{current_token}->{attributes} # start tag or end tag
717     ->{$self->{current_attribute}->{name}}) { # MUST
718 wakaba 1.40 $self->{parse_error}-> (type => 'duplicate attribute:'.$self->{current_attribute}->{name});
719 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
720     } else {
721     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
722     = $self->{current_attribute};
723     }
724     }; # $before_leave
725    
726     if ($self->{next_input_character} == 0x0009 or # HT
727     $self->{next_input_character} == 0x000A or # LF
728     $self->{next_input_character} == 0x000B or # VT
729     $self->{next_input_character} == 0x000C or # FF
730     $self->{next_input_character} == 0x0020) { # SP
731     $before_leave->();
732     $self->{state} = 'after attribute name';
733    
734     if (@{$self->{char}}) {
735     $self->{next_input_character} = shift @{$self->{char}};
736     } else {
737     $self->{set_next_input_character}->($self);
738     }
739    
740     redo A;
741     } elsif ($self->{next_input_character} == 0x003D) { # =
742     $before_leave->();
743     $self->{state} = 'before attribute value';
744    
745     if (@{$self->{char}}) {
746     $self->{next_input_character} = shift @{$self->{char}};
747     } else {
748     $self->{set_next_input_character}->($self);
749     }
750    
751     redo A;
752     } elsif ($self->{next_input_character} == 0x003E) { # >
753     $before_leave->();
754     if ($self->{current_token}->{type} eq 'start tag') {
755 wakaba 1.28 $self->{current_token}->{first_start_tag}
756     = not defined $self->{last_emitted_start_tag_name};
757 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
758     } elsif ($self->{current_token}->{type} eq 'end tag') {
759 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
760 wakaba 1.1 if ($self->{current_token}->{attributes}) {
761 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
762 wakaba 1.1 }
763     } else {
764     die "$0: $self->{current_token}->{type}: Unknown token type";
765     }
766     $self->{state} = 'data';
767    
768     if (@{$self->{char}}) {
769     $self->{next_input_character} = shift @{$self->{char}};
770     } else {
771     $self->{set_next_input_character}->($self);
772     }
773    
774    
775     return ($self->{current_token}); # start tag or end tag
776    
777     redo A;
778     } elsif (0x0041 <= $self->{next_input_character} and
779     $self->{next_input_character} <= 0x005A) { # A..Z
780     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
781     ## Stay in the state
782    
783     if (@{$self->{char}}) {
784     $self->{next_input_character} = shift @{$self->{char}};
785     } else {
786     $self->{set_next_input_character}->($self);
787     }
788    
789     redo A;
790     } elsif ($self->{next_input_character} == 0x002F) { # /
791     $before_leave->();
792    
793     if (@{$self->{char}}) {
794     $self->{next_input_character} = shift @{$self->{char}};
795     } else {
796     $self->{set_next_input_character}->($self);
797     }
798    
799     if ($self->{next_input_character} == 0x003E and # >
800     $self->{current_token}->{type} eq 'start tag' and
801     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
802     # permitted slash
803     #
804     } else {
805 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
806 wakaba 1.1 }
807     $self->{state} = 'before attribute name';
808     # next-input-character is already done
809     redo A;
810 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
811 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
812 wakaba 1.1 $before_leave->();
813     if ($self->{current_token}->{type} eq 'start tag') {
814 wakaba 1.28 $self->{current_token}->{first_start_tag}
815     = not defined $self->{last_emitted_start_tag_name};
816 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
817     } elsif ($self->{current_token}->{type} eq 'end tag') {
818 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
819 wakaba 1.1 if ($self->{current_token}->{attributes}) {
820 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
821 wakaba 1.1 }
822     } else {
823     die "$0: $self->{current_token}->{type}: Unknown token type";
824     }
825     $self->{state} = 'data';
826     # reconsume
827    
828     return ($self->{current_token}); # start tag or end tag
829    
830     redo A;
831     } else {
832     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
833     ## Stay in the state
834    
835     if (@{$self->{char}}) {
836     $self->{next_input_character} = shift @{$self->{char}};
837     } else {
838     $self->{set_next_input_character}->($self);
839     }
840    
841     redo A;
842     }
843     } elsif ($self->{state} eq 'after attribute name') {
844     if ($self->{next_input_character} == 0x0009 or # HT
845     $self->{next_input_character} == 0x000A or # LF
846     $self->{next_input_character} == 0x000B or # VT
847     $self->{next_input_character} == 0x000C or # FF
848     $self->{next_input_character} == 0x0020) { # SP
849     ## Stay in the state
850    
851     if (@{$self->{char}}) {
852     $self->{next_input_character} = shift @{$self->{char}};
853     } else {
854     $self->{set_next_input_character}->($self);
855     }
856    
857     redo A;
858     } elsif ($self->{next_input_character} == 0x003D) { # =
859     $self->{state} = 'before attribute value';
860    
861     if (@{$self->{char}}) {
862     $self->{next_input_character} = shift @{$self->{char}};
863     } else {
864     $self->{set_next_input_character}->($self);
865     }
866    
867     redo A;
868     } elsif ($self->{next_input_character} == 0x003E) { # >
869     if ($self->{current_token}->{type} eq 'start tag') {
870 wakaba 1.28 $self->{current_token}->{first_start_tag}
871     = not defined $self->{last_emitted_start_tag_name};
872 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
873     } elsif ($self->{current_token}->{type} eq 'end tag') {
874 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
875 wakaba 1.1 if ($self->{current_token}->{attributes}) {
876 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
877 wakaba 1.1 }
878     } else {
879     die "$0: $self->{current_token}->{type}: Unknown token type";
880     }
881     $self->{state} = 'data';
882    
883     if (@{$self->{char}}) {
884     $self->{next_input_character} = shift @{$self->{char}};
885     } else {
886     $self->{set_next_input_character}->($self);
887     }
888    
889    
890     return ($self->{current_token}); # start tag or end tag
891    
892     redo A;
893     } elsif (0x0041 <= $self->{next_input_character} and
894     $self->{next_input_character} <= 0x005A) { # A..Z
895     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
896     value => ''};
897     $self->{state} = 'attribute name';
898    
899     if (@{$self->{char}}) {
900     $self->{next_input_character} = shift @{$self->{char}};
901     } else {
902     $self->{set_next_input_character}->($self);
903     }
904    
905     redo A;
906     } elsif ($self->{next_input_character} == 0x002F) { # /
907    
908     if (@{$self->{char}}) {
909     $self->{next_input_character} = shift @{$self->{char}};
910     } else {
911     $self->{set_next_input_character}->($self);
912     }
913    
914     if ($self->{next_input_character} == 0x003E and # >
915     $self->{current_token}->{type} eq 'start tag' and
916     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
917     # permitted slash
918     #
919     } else {
920 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
921 wakaba 1.33 ## TODO: Different error type for <aa / bb> than <aa/>
922 wakaba 1.1 }
923     $self->{state} = 'before attribute name';
924     # next-input-character is already done
925     redo A;
926 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
927 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
928 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
929 wakaba 1.28 $self->{current_token}->{first_start_tag}
930     = not defined $self->{last_emitted_start_tag_name};
931 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
932     } elsif ($self->{current_token}->{type} eq 'end tag') {
933 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
934 wakaba 1.1 if ($self->{current_token}->{attributes}) {
935 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
936 wakaba 1.1 }
937     } else {
938     die "$0: $self->{current_token}->{type}: Unknown token type";
939     }
940     $self->{state} = 'data';
941     # reconsume
942    
943     return ($self->{current_token}); # start tag or end tag
944    
945     redo A;
946     } else {
947     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
948     value => ''};
949     $self->{state} = 'attribute name';
950    
951     if (@{$self->{char}}) {
952     $self->{next_input_character} = shift @{$self->{char}};
953     } else {
954     $self->{set_next_input_character}->($self);
955     }
956    
957     redo A;
958     }
959     } elsif ($self->{state} eq 'before attribute value') {
960     if ($self->{next_input_character} == 0x0009 or # HT
961     $self->{next_input_character} == 0x000A or # LF
962     $self->{next_input_character} == 0x000B or # VT
963     $self->{next_input_character} == 0x000C or # FF
964     $self->{next_input_character} == 0x0020) { # SP
965     ## Stay in the state
966    
967     if (@{$self->{char}}) {
968     $self->{next_input_character} = shift @{$self->{char}};
969     } else {
970     $self->{set_next_input_character}->($self);
971     }
972    
973     redo A;
974     } elsif ($self->{next_input_character} == 0x0022) { # "
975     $self->{state} = 'attribute value (double-quoted)';
976    
977     if (@{$self->{char}}) {
978     $self->{next_input_character} = shift @{$self->{char}};
979     } else {
980     $self->{set_next_input_character}->($self);
981     }
982    
983     redo A;
984     } elsif ($self->{next_input_character} == 0x0026) { # &
985     $self->{state} = 'attribute value (unquoted)';
986     ## reconsume
987     redo A;
988     } elsif ($self->{next_input_character} == 0x0027) { # '
989     $self->{state} = 'attribute value (single-quoted)';
990    
991     if (@{$self->{char}}) {
992     $self->{next_input_character} = shift @{$self->{char}};
993     } else {
994     $self->{set_next_input_character}->($self);
995     }
996    
997     redo A;
998     } elsif ($self->{next_input_character} == 0x003E) { # >
999     if ($self->{current_token}->{type} eq 'start tag') {
1000 wakaba 1.28 $self->{current_token}->{first_start_tag}
1001     = not defined $self->{last_emitted_start_tag_name};
1002 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1003     } elsif ($self->{current_token}->{type} eq 'end tag') {
1004 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1005 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1006 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1007 wakaba 1.1 }
1008     } else {
1009     die "$0: $self->{current_token}->{type}: Unknown token type";
1010     }
1011     $self->{state} = 'data';
1012    
1013     if (@{$self->{char}}) {
1014     $self->{next_input_character} = shift @{$self->{char}};
1015     } else {
1016     $self->{set_next_input_character}->($self);
1017     }
1018    
1019    
1020     return ($self->{current_token}); # start tag or end tag
1021    
1022     redo A;
1023 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
1024 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
1025 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1026 wakaba 1.28 $self->{current_token}->{first_start_tag}
1027     = not defined $self->{last_emitted_start_tag_name};
1028 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1029     } elsif ($self->{current_token}->{type} eq 'end tag') {
1030 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1031 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1032 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1033 wakaba 1.1 }
1034     } else {
1035     die "$0: $self->{current_token}->{type}: Unknown token type";
1036     }
1037     $self->{state} = 'data';
1038     ## reconsume
1039    
1040     return ($self->{current_token}); # start tag or end tag
1041    
1042     redo A;
1043     } else {
1044     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1045     $self->{state} = 'attribute value (unquoted)';
1046    
1047     if (@{$self->{char}}) {
1048     $self->{next_input_character} = shift @{$self->{char}};
1049     } else {
1050     $self->{set_next_input_character}->($self);
1051     }
1052    
1053     redo A;
1054     }
1055     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
1056     if ($self->{next_input_character} == 0x0022) { # "
1057     $self->{state} = 'before attribute name';
1058    
1059     if (@{$self->{char}}) {
1060     $self->{next_input_character} = shift @{$self->{char}};
1061     } else {
1062     $self->{set_next_input_character}->($self);
1063     }
1064    
1065     redo A;
1066     } elsif ($self->{next_input_character} == 0x0026) { # &
1067     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
1068     $self->{state} = 'entity in attribute value';
1069    
1070     if (@{$self->{char}}) {
1071     $self->{next_input_character} = shift @{$self->{char}};
1072     } else {
1073     $self->{set_next_input_character}->($self);
1074     }
1075    
1076     redo A;
1077     } elsif ($self->{next_input_character} == -1) {
1078 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1079 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1080 wakaba 1.28 $self->{current_token}->{first_start_tag}
1081     = not defined $self->{last_emitted_start_tag_name};
1082 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1083     } elsif ($self->{current_token}->{type} eq 'end tag') {
1084 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1085 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1086 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1087 wakaba 1.1 }
1088     } else {
1089     die "$0: $self->{current_token}->{type}: Unknown token type";
1090     }
1091     $self->{state} = 'data';
1092     ## reconsume
1093    
1094     return ($self->{current_token}); # start tag or end tag
1095    
1096     redo A;
1097     } else {
1098     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1099     ## Stay in the state
1100    
1101     if (@{$self->{char}}) {
1102     $self->{next_input_character} = shift @{$self->{char}};
1103     } else {
1104     $self->{set_next_input_character}->($self);
1105     }
1106    
1107     redo A;
1108     }
1109     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1110     if ($self->{next_input_character} == 0x0027) { # '
1111     $self->{state} = 'before attribute name';
1112    
1113     if (@{$self->{char}}) {
1114     $self->{next_input_character} = shift @{$self->{char}};
1115     } else {
1116     $self->{set_next_input_character}->($self);
1117     }
1118    
1119     redo A;
1120     } elsif ($self->{next_input_character} == 0x0026) { # &
1121     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1122     $self->{state} = 'entity in attribute value';
1123    
1124     if (@{$self->{char}}) {
1125     $self->{next_input_character} = shift @{$self->{char}};
1126     } else {
1127     $self->{set_next_input_character}->($self);
1128     }
1129    
1130     redo A;
1131     } elsif ($self->{next_input_character} == -1) {
1132 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1133 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1134 wakaba 1.28 $self->{current_token}->{first_start_tag}
1135     = not defined $self->{last_emitted_start_tag_name};
1136 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1137     } elsif ($self->{current_token}->{type} eq 'end tag') {
1138 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1139 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1140 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1141 wakaba 1.1 }
1142     } else {
1143     die "$0: $self->{current_token}->{type}: Unknown token type";
1144     }
1145     $self->{state} = 'data';
1146     ## reconsume
1147    
1148     return ($self->{current_token}); # start tag or end tag
1149    
1150     redo A;
1151     } else {
1152     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1153     ## Stay in the state
1154    
1155     if (@{$self->{char}}) {
1156     $self->{next_input_character} = shift @{$self->{char}};
1157     } else {
1158     $self->{set_next_input_character}->($self);
1159     }
1160    
1161     redo A;
1162     }
1163     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1164     if ($self->{next_input_character} == 0x0009 or # HT
1165     $self->{next_input_character} == 0x000A or # LF
1166     $self->{next_input_character} == 0x000B or # HT
1167     $self->{next_input_character} == 0x000C or # FF
1168     $self->{next_input_character} == 0x0020) { # SP
1169     $self->{state} = 'before attribute name';
1170    
1171     if (@{$self->{char}}) {
1172     $self->{next_input_character} = shift @{$self->{char}};
1173     } else {
1174     $self->{set_next_input_character}->($self);
1175     }
1176    
1177     redo A;
1178     } elsif ($self->{next_input_character} == 0x0026) { # &
1179     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1180     $self->{state} = 'entity in attribute value';
1181    
1182     if (@{$self->{char}}) {
1183     $self->{next_input_character} = shift @{$self->{char}};
1184     } else {
1185     $self->{set_next_input_character}->($self);
1186     }
1187    
1188     redo A;
1189     } elsif ($self->{next_input_character} == 0x003E) { # >
1190     if ($self->{current_token}->{type} eq 'start tag') {
1191 wakaba 1.28 $self->{current_token}->{first_start_tag}
1192     = not defined $self->{last_emitted_start_tag_name};
1193 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1194     } elsif ($self->{current_token}->{type} eq 'end tag') {
1195 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1196 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1197 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1198 wakaba 1.1 }
1199     } else {
1200     die "$0: $self->{current_token}->{type}: Unknown token type";
1201     }
1202     $self->{state} = 'data';
1203    
1204     if (@{$self->{char}}) {
1205     $self->{next_input_character} = shift @{$self->{char}};
1206     } else {
1207     $self->{set_next_input_character}->($self);
1208     }
1209    
1210    
1211     return ($self->{current_token}); # start tag or end tag
1212    
1213     redo A;
1214 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
1215 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
1216 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1217 wakaba 1.28 $self->{current_token}->{first_start_tag}
1218     = not defined $self->{last_emitted_start_tag_name};
1219 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1220     } elsif ($self->{current_token}->{type} eq 'end tag') {
1221 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1222 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1223 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1224 wakaba 1.1 }
1225     } else {
1226     die "$0: $self->{current_token}->{type}: Unknown token type";
1227     }
1228     $self->{state} = 'data';
1229     ## reconsume
1230    
1231     return ($self->{current_token}); # start tag or end tag
1232    
1233     redo A;
1234     } else {
1235     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1236     ## Stay in the state
1237    
1238     if (@{$self->{char}}) {
1239     $self->{next_input_character} = shift @{$self->{char}};
1240     } else {
1241     $self->{set_next_input_character}->($self);
1242     }
1243    
1244     redo A;
1245     }
1246     } elsif ($self->{state} eq 'entity in attribute value') {
1247 wakaba 1.26 my $token = $self->_tokenize_attempt_to_consume_an_entity (1);
1248 wakaba 1.1
1249     unless (defined $token) {
1250     $self->{current_attribute}->{value} .= '&';
1251     } else {
1252     $self->{current_attribute}->{value} .= $token->{data};
1253     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1254     }
1255    
1256     $self->{state} = $self->{last_attribute_value_state};
1257     # next-input-character is already done
1258     redo A;
1259     } elsif ($self->{state} eq 'bogus comment') {
1260     ## (only happen if PCDATA state)
1261    
1262     my $token = {type => 'comment', data => ''};
1263    
1264     BC: {
1265     if ($self->{next_input_character} == 0x003E) { # >
1266     $self->{state} = 'data';
1267    
1268     if (@{$self->{char}}) {
1269     $self->{next_input_character} = shift @{$self->{char}};
1270     } else {
1271     $self->{set_next_input_character}->($self);
1272     }
1273    
1274    
1275     return ($token);
1276    
1277     redo A;
1278     } elsif ($self->{next_input_character} == -1) {
1279     $self->{state} = 'data';
1280     ## reconsume
1281    
1282     return ($token);
1283    
1284     redo A;
1285     } else {
1286     $token->{data} .= chr ($self->{next_input_character});
1287    
1288     if (@{$self->{char}}) {
1289     $self->{next_input_character} = shift @{$self->{char}};
1290     } else {
1291     $self->{set_next_input_character}->($self);
1292     }
1293    
1294     redo BC;
1295     }
1296     } # BC
1297     } elsif ($self->{state} eq 'markup declaration open') {
1298     ## (only happen if PCDATA state)
1299    
1300     my @next_char;
1301     push @next_char, $self->{next_input_character};
1302    
1303     if ($self->{next_input_character} == 0x002D) { # -
1304    
1305     if (@{$self->{char}}) {
1306     $self->{next_input_character} = shift @{$self->{char}};
1307     } else {
1308     $self->{set_next_input_character}->($self);
1309     }
1310    
1311     push @next_char, $self->{next_input_character};
1312     if ($self->{next_input_character} == 0x002D) { # -
1313     $self->{current_token} = {type => 'comment', data => ''};
1314 wakaba 1.23 $self->{state} = 'comment start';
1315 wakaba 1.1
1316     if (@{$self->{char}}) {
1317     $self->{next_input_character} = shift @{$self->{char}};
1318     } else {
1319     $self->{set_next_input_character}->($self);
1320     }
1321    
1322     redo A;
1323     }
1324     } elsif ($self->{next_input_character} == 0x0044 or # D
1325     $self->{next_input_character} == 0x0064) { # d
1326    
1327     if (@{$self->{char}}) {
1328     $self->{next_input_character} = shift @{$self->{char}};
1329     } else {
1330     $self->{set_next_input_character}->($self);
1331     }
1332    
1333     push @next_char, $self->{next_input_character};
1334     if ($self->{next_input_character} == 0x004F or # O
1335     $self->{next_input_character} == 0x006F) { # o
1336    
1337     if (@{$self->{char}}) {
1338     $self->{next_input_character} = shift @{$self->{char}};
1339     } else {
1340     $self->{set_next_input_character}->($self);
1341     }
1342    
1343     push @next_char, $self->{next_input_character};
1344     if ($self->{next_input_character} == 0x0043 or # C
1345     $self->{next_input_character} == 0x0063) { # c
1346    
1347     if (@{$self->{char}}) {
1348     $self->{next_input_character} = shift @{$self->{char}};
1349     } else {
1350     $self->{set_next_input_character}->($self);
1351     }
1352    
1353     push @next_char, $self->{next_input_character};
1354     if ($self->{next_input_character} == 0x0054 or # T
1355     $self->{next_input_character} == 0x0074) { # t
1356    
1357     if (@{$self->{char}}) {
1358     $self->{next_input_character} = shift @{$self->{char}};
1359     } else {
1360     $self->{set_next_input_character}->($self);
1361     }
1362    
1363     push @next_char, $self->{next_input_character};
1364     if ($self->{next_input_character} == 0x0059 or # Y
1365     $self->{next_input_character} == 0x0079) { # y
1366    
1367     if (@{$self->{char}}) {
1368     $self->{next_input_character} = shift @{$self->{char}};
1369     } else {
1370     $self->{set_next_input_character}->($self);
1371     }
1372    
1373     push @next_char, $self->{next_input_character};
1374     if ($self->{next_input_character} == 0x0050 or # P
1375     $self->{next_input_character} == 0x0070) { # p
1376    
1377     if (@{$self->{char}}) {
1378     $self->{next_input_character} = shift @{$self->{char}};
1379     } else {
1380     $self->{set_next_input_character}->($self);
1381     }
1382    
1383     push @next_char, $self->{next_input_character};
1384     if ($self->{next_input_character} == 0x0045 or # E
1385     $self->{next_input_character} == 0x0065) { # e
1386     ## ISSUE: What a stupid code this is!
1387     $self->{state} = 'DOCTYPE';
1388    
1389     if (@{$self->{char}}) {
1390     $self->{next_input_character} = shift @{$self->{char}};
1391     } else {
1392     $self->{set_next_input_character}->($self);
1393     }
1394    
1395     redo A;
1396     }
1397     }
1398     }
1399     }
1400     }
1401     }
1402     }
1403    
1404 wakaba 1.30 $self->{parse_error}-> (type => 'bogus comment');
1405 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1406     unshift @{$self->{char}}, (@next_char);
1407     $self->{state} = 'bogus comment';
1408     redo A;
1409    
1410     ## ISSUE: typos in spec: chacacters, is is a parse error
1411     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1412 wakaba 1.23 } elsif ($self->{state} eq 'comment start') {
1413     if ($self->{next_input_character} == 0x002D) { # -
1414     $self->{state} = 'comment start dash';
1415    
1416     if (@{$self->{char}}) {
1417     $self->{next_input_character} = shift @{$self->{char}};
1418     } else {
1419     $self->{set_next_input_character}->($self);
1420     }
1421    
1422     redo A;
1423     } elsif ($self->{next_input_character} == 0x003E) { # >
1424     $self->{parse_error}-> (type => 'bogus comment');
1425     $self->{state} = 'data';
1426    
1427     if (@{$self->{char}}) {
1428     $self->{next_input_character} = shift @{$self->{char}};
1429     } else {
1430     $self->{set_next_input_character}->($self);
1431     }
1432    
1433    
1434     return ($self->{current_token}); # comment
1435    
1436     redo A;
1437     } elsif ($self->{next_input_character} == -1) {
1438     $self->{parse_error}-> (type => 'unclosed comment');
1439     $self->{state} = 'data';
1440     ## reconsume
1441    
1442     return ($self->{current_token}); # comment
1443    
1444     redo A;
1445     } else {
1446     $self->{current_token}->{data} # comment
1447     .= chr ($self->{next_input_character});
1448     $self->{state} = 'comment';
1449    
1450     if (@{$self->{char}}) {
1451     $self->{next_input_character} = shift @{$self->{char}};
1452     } else {
1453     $self->{set_next_input_character}->($self);
1454     }
1455    
1456     redo A;
1457     }
1458     } elsif ($self->{state} eq 'comment start dash') {
1459     if ($self->{next_input_character} == 0x002D) { # -
1460     $self->{state} = 'comment end';
1461    
1462     if (@{$self->{char}}) {
1463     $self->{next_input_character} = shift @{$self->{char}};
1464     } else {
1465     $self->{set_next_input_character}->($self);
1466     }
1467    
1468     redo A;
1469     } elsif ($self->{next_input_character} == 0x003E) { # >
1470     $self->{parse_error}-> (type => 'bogus comment');
1471     $self->{state} = 'data';
1472    
1473     if (@{$self->{char}}) {
1474     $self->{next_input_character} = shift @{$self->{char}};
1475     } else {
1476     $self->{set_next_input_character}->($self);
1477     }
1478    
1479    
1480     return ($self->{current_token}); # comment
1481    
1482     redo A;
1483     } elsif ($self->{next_input_character} == -1) {
1484     $self->{parse_error}-> (type => 'unclosed comment');
1485     $self->{state} = 'data';
1486     ## reconsume
1487    
1488     return ($self->{current_token}); # comment
1489    
1490     redo A;
1491     } else {
1492     $self->{current_token}->{data} # comment
1493 wakaba 1.33 .= '-' . chr ($self->{next_input_character});
1494 wakaba 1.23 $self->{state} = 'comment';
1495    
1496     if (@{$self->{char}}) {
1497     $self->{next_input_character} = shift @{$self->{char}};
1498     } else {
1499     $self->{set_next_input_character}->($self);
1500     }
1501    
1502     redo A;
1503     }
1504 wakaba 1.1 } elsif ($self->{state} eq 'comment') {
1505     if ($self->{next_input_character} == 0x002D) { # -
1506 wakaba 1.23 $self->{state} = 'comment end dash';
1507 wakaba 1.1
1508     if (@{$self->{char}}) {
1509     $self->{next_input_character} = shift @{$self->{char}};
1510     } else {
1511     $self->{set_next_input_character}->($self);
1512     }
1513    
1514     redo A;
1515     } elsif ($self->{next_input_character} == -1) {
1516 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1517 wakaba 1.1 $self->{state} = 'data';
1518     ## reconsume
1519    
1520     return ($self->{current_token}); # comment
1521    
1522     redo A;
1523     } else {
1524     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1525     ## Stay in the state
1526    
1527     if (@{$self->{char}}) {
1528     $self->{next_input_character} = shift @{$self->{char}};
1529     } else {
1530     $self->{set_next_input_character}->($self);
1531     }
1532    
1533     redo A;
1534     }
1535 wakaba 1.23 } elsif ($self->{state} eq 'comment end dash') {
1536 wakaba 1.1 if ($self->{next_input_character} == 0x002D) { # -
1537     $self->{state} = 'comment end';
1538    
1539     if (@{$self->{char}}) {
1540     $self->{next_input_character} = shift @{$self->{char}};
1541     } else {
1542     $self->{set_next_input_character}->($self);
1543     }
1544    
1545     redo A;
1546     } elsif ($self->{next_input_character} == -1) {
1547 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1548 wakaba 1.1 $self->{state} = 'data';
1549     ## reconsume
1550    
1551     return ($self->{current_token}); # comment
1552    
1553     redo A;
1554     } else {
1555     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1556     $self->{state} = 'comment';
1557    
1558     if (@{$self->{char}}) {
1559     $self->{next_input_character} = shift @{$self->{char}};
1560     } else {
1561     $self->{set_next_input_character}->($self);
1562     }
1563    
1564     redo A;
1565     }
1566     } elsif ($self->{state} eq 'comment end') {
1567     if ($self->{next_input_character} == 0x003E) { # >
1568     $self->{state} = 'data';
1569    
1570     if (@{$self->{char}}) {
1571     $self->{next_input_character} = shift @{$self->{char}};
1572     } else {
1573     $self->{set_next_input_character}->($self);
1574     }
1575    
1576    
1577     return ($self->{current_token}); # comment
1578    
1579     redo A;
1580     } elsif ($self->{next_input_character} == 0x002D) { # -
1581 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1582 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1583     ## Stay in the state
1584    
1585     if (@{$self->{char}}) {
1586     $self->{next_input_character} = shift @{$self->{char}};
1587     } else {
1588     $self->{set_next_input_character}->($self);
1589     }
1590    
1591     redo A;
1592     } elsif ($self->{next_input_character} == -1) {
1593 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1594 wakaba 1.1 $self->{state} = 'data';
1595     ## reconsume
1596    
1597     return ($self->{current_token}); # comment
1598    
1599     redo A;
1600     } else {
1601 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1602 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1603     $self->{state} = 'comment';
1604    
1605     if (@{$self->{char}}) {
1606     $self->{next_input_character} = shift @{$self->{char}};
1607     } else {
1608     $self->{set_next_input_character}->($self);
1609     }
1610    
1611     redo A;
1612     }
1613     } elsif ($self->{state} eq 'DOCTYPE') {
1614     if ($self->{next_input_character} == 0x0009 or # HT
1615     $self->{next_input_character} == 0x000A or # LF
1616     $self->{next_input_character} == 0x000B or # VT
1617     $self->{next_input_character} == 0x000C or # FF
1618     $self->{next_input_character} == 0x0020) { # SP
1619     $self->{state} = 'before DOCTYPE name';
1620    
1621     if (@{$self->{char}}) {
1622     $self->{next_input_character} = shift @{$self->{char}};
1623     } else {
1624     $self->{set_next_input_character}->($self);
1625     }
1626    
1627     redo A;
1628     } else {
1629 wakaba 1.3 $self->{parse_error}-> (type => 'no space before DOCTYPE name');
1630 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1631     ## reconsume
1632     redo A;
1633     }
1634     } elsif ($self->{state} eq 'before DOCTYPE name') {
1635     if ($self->{next_input_character} == 0x0009 or # HT
1636     $self->{next_input_character} == 0x000A or # LF
1637     $self->{next_input_character} == 0x000B or # VT
1638     $self->{next_input_character} == 0x000C or # FF
1639     $self->{next_input_character} == 0x0020) { # SP
1640     ## Stay in the state
1641    
1642     if (@{$self->{char}}) {
1643     $self->{next_input_character} = shift @{$self->{char}};
1644     } else {
1645     $self->{set_next_input_character}->($self);
1646     }
1647    
1648     redo A;
1649 wakaba 1.18 } elsif ($self->{next_input_character} == 0x003E) { # >
1650     $self->{parse_error}-> (type => 'no DOCTYPE name');
1651     $self->{state} = 'data';
1652    
1653     if (@{$self->{char}}) {
1654     $self->{next_input_character} = shift @{$self->{char}};
1655     } else {
1656     $self->{set_next_input_character}->($self);
1657     }
1658    
1659    
1660     return ({type => 'DOCTYPE'}); # incorrect
1661    
1662     redo A;
1663     } elsif ($self->{next_input_character} == -1) {
1664     $self->{parse_error}-> (type => 'no DOCTYPE name');
1665     $self->{state} = 'data';
1666     ## reconsume
1667    
1668     return ({type => 'DOCTYPE'}); # incorrect
1669    
1670     redo A;
1671     } else {
1672     $self->{current_token}
1673     = {type => 'DOCTYPE',
1674     name => chr ($self->{next_input_character}),
1675     correct => 1};
1676 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1677 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1678    
1679     if (@{$self->{char}}) {
1680     $self->{next_input_character} = shift @{$self->{char}};
1681     } else {
1682     $self->{set_next_input_character}->($self);
1683     }
1684    
1685     redo A;
1686 wakaba 1.18 }
1687     } elsif ($self->{state} eq 'DOCTYPE name') {
1688     ## ISSUE: Redundant "First," in the spec.
1689     if ($self->{next_input_character} == 0x0009 or # HT
1690     $self->{next_input_character} == 0x000A or # LF
1691     $self->{next_input_character} == 0x000B or # VT
1692     $self->{next_input_character} == 0x000C or # FF
1693     $self->{next_input_character} == 0x0020) { # SP
1694     $self->{state} = 'after DOCTYPE name';
1695    
1696     if (@{$self->{char}}) {
1697     $self->{next_input_character} = shift @{$self->{char}};
1698     } else {
1699     $self->{set_next_input_character}->($self);
1700     }
1701    
1702     redo A;
1703 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
1704     $self->{state} = 'data';
1705    
1706     if (@{$self->{char}}) {
1707     $self->{next_input_character} = shift @{$self->{char}};
1708     } else {
1709     $self->{set_next_input_character}->($self);
1710     }
1711    
1712    
1713 wakaba 1.18 return ($self->{current_token}); # DOCTYPE
1714 wakaba 1.1
1715     redo A;
1716 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1717     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1718 wakaba 1.1 $self->{state} = 'data';
1719     ## reconsume
1720    
1721 wakaba 1.18 delete $self->{current_token}->{correct};
1722     return ($self->{current_token}); # DOCTYPE
1723 wakaba 1.1
1724     redo A;
1725     } else {
1726 wakaba 1.18 $self->{current_token}->{name}
1727     .= chr ($self->{next_input_character}); # DOCTYPE
1728     ## Stay in the state
1729 wakaba 1.1
1730     if (@{$self->{char}}) {
1731     $self->{next_input_character} = shift @{$self->{char}};
1732     } else {
1733     $self->{set_next_input_character}->($self);
1734     }
1735    
1736     redo A;
1737     }
1738 wakaba 1.18 } elsif ($self->{state} eq 'after DOCTYPE name') {
1739 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1740     $self->{next_input_character} == 0x000A or # LF
1741     $self->{next_input_character} == 0x000B or # VT
1742     $self->{next_input_character} == 0x000C or # FF
1743     $self->{next_input_character} == 0x0020) { # SP
1744 wakaba 1.18 ## Stay in the state
1745 wakaba 1.1
1746     if (@{$self->{char}}) {
1747     $self->{next_input_character} = shift @{$self->{char}};
1748     } else {
1749     $self->{set_next_input_character}->($self);
1750     }
1751    
1752     redo A;
1753     } elsif ($self->{next_input_character} == 0x003E) { # >
1754     $self->{state} = 'data';
1755    
1756     if (@{$self->{char}}) {
1757     $self->{next_input_character} = shift @{$self->{char}};
1758     } else {
1759     $self->{set_next_input_character}->($self);
1760     }
1761    
1762    
1763     return ($self->{current_token}); # DOCTYPE
1764    
1765     redo A;
1766 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1767     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1768     $self->{state} = 'data';
1769     ## reconsume
1770    
1771     delete $self->{current_token}->{correct};
1772     return ($self->{current_token}); # DOCTYPE
1773    
1774     redo A;
1775     } elsif ($self->{next_input_character} == 0x0050 or # P
1776     $self->{next_input_character} == 0x0070) { # p
1777    
1778     if (@{$self->{char}}) {
1779     $self->{next_input_character} = shift @{$self->{char}};
1780     } else {
1781     $self->{set_next_input_character}->($self);
1782     }
1783    
1784     if ($self->{next_input_character} == 0x0055 or # U
1785     $self->{next_input_character} == 0x0075) { # u
1786    
1787     if (@{$self->{char}}) {
1788     $self->{next_input_character} = shift @{$self->{char}};
1789     } else {
1790     $self->{set_next_input_character}->($self);
1791     }
1792    
1793     if ($self->{next_input_character} == 0x0042 or # B
1794     $self->{next_input_character} == 0x0062) { # b
1795    
1796     if (@{$self->{char}}) {
1797     $self->{next_input_character} = shift @{$self->{char}};
1798     } else {
1799     $self->{set_next_input_character}->($self);
1800     }
1801    
1802     if ($self->{next_input_character} == 0x004C or # L
1803     $self->{next_input_character} == 0x006C) { # l
1804    
1805     if (@{$self->{char}}) {
1806     $self->{next_input_character} = shift @{$self->{char}};
1807     } else {
1808     $self->{set_next_input_character}->($self);
1809     }
1810    
1811     if ($self->{next_input_character} == 0x0049 or # I
1812     $self->{next_input_character} == 0x0069) { # i
1813    
1814     if (@{$self->{char}}) {
1815     $self->{next_input_character} = shift @{$self->{char}};
1816     } else {
1817     $self->{set_next_input_character}->($self);
1818     }
1819    
1820     if ($self->{next_input_character} == 0x0043 or # C
1821     $self->{next_input_character} == 0x0063) { # c
1822     $self->{state} = 'before DOCTYPE public identifier';
1823    
1824     if (@{$self->{char}}) {
1825     $self->{next_input_character} = shift @{$self->{char}};
1826     } else {
1827     $self->{set_next_input_character}->($self);
1828     }
1829    
1830     redo A;
1831     }
1832     }
1833     }
1834     }
1835     }
1836    
1837     #
1838     } elsif ($self->{next_input_character} == 0x0053 or # S
1839     $self->{next_input_character} == 0x0073) { # s
1840    
1841     if (@{$self->{char}}) {
1842     $self->{next_input_character} = shift @{$self->{char}};
1843     } else {
1844     $self->{set_next_input_character}->($self);
1845     }
1846    
1847     if ($self->{next_input_character} == 0x0059 or # Y
1848     $self->{next_input_character} == 0x0079) { # y
1849    
1850     if (@{$self->{char}}) {
1851     $self->{next_input_character} = shift @{$self->{char}};
1852     } else {
1853     $self->{set_next_input_character}->($self);
1854     }
1855    
1856     if ($self->{next_input_character} == 0x0053 or # S
1857     $self->{next_input_character} == 0x0073) { # s
1858    
1859     if (@{$self->{char}}) {
1860     $self->{next_input_character} = shift @{$self->{char}};
1861     } else {
1862     $self->{set_next_input_character}->($self);
1863     }
1864    
1865     if ($self->{next_input_character} == 0x0054 or # T
1866     $self->{next_input_character} == 0x0074) { # t
1867    
1868     if (@{$self->{char}}) {
1869     $self->{next_input_character} = shift @{$self->{char}};
1870     } else {
1871     $self->{set_next_input_character}->($self);
1872     }
1873    
1874     if ($self->{next_input_character} == 0x0045 or # E
1875     $self->{next_input_character} == 0x0065) { # e
1876    
1877     if (@{$self->{char}}) {
1878     $self->{next_input_character} = shift @{$self->{char}};
1879     } else {
1880     $self->{set_next_input_character}->($self);
1881     }
1882    
1883     if ($self->{next_input_character} == 0x004D or # M
1884     $self->{next_input_character} == 0x006D) { # m
1885     $self->{state} = 'before DOCTYPE system identifier';
1886    
1887     if (@{$self->{char}}) {
1888     $self->{next_input_character} = shift @{$self->{char}};
1889     } else {
1890     $self->{set_next_input_character}->($self);
1891     }
1892    
1893     redo A;
1894     }
1895     }
1896     }
1897     }
1898     }
1899    
1900     #
1901     } else {
1902    
1903     if (@{$self->{char}}) {
1904     $self->{next_input_character} = shift @{$self->{char}};
1905     } else {
1906     $self->{set_next_input_character}->($self);
1907     }
1908    
1909     #
1910     }
1911    
1912     $self->{parse_error}-> (type => 'string after DOCTYPE name');
1913     $self->{state} = 'bogus DOCTYPE';
1914     # next-input-character is already done
1915     redo A;
1916     } elsif ($self->{state} eq 'before DOCTYPE public identifier') {
1917     if ({
1918     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1919     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1920     }->{$self->{next_input_character}}) {
1921 wakaba 1.1 ## Stay in the state
1922    
1923     if (@{$self->{char}}) {
1924     $self->{next_input_character} = shift @{$self->{char}};
1925     } else {
1926     $self->{set_next_input_character}->($self);
1927     }
1928    
1929     redo A;
1930 wakaba 1.18 } elsif ($self->{next_input_character} eq 0x0022) { # "
1931     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1932     $self->{state} = 'DOCTYPE public identifier (double-quoted)';
1933    
1934     if (@{$self->{char}}) {
1935     $self->{next_input_character} = shift @{$self->{char}};
1936     } else {
1937     $self->{set_next_input_character}->($self);
1938     }
1939    
1940     redo A;
1941     } elsif ($self->{next_input_character} eq 0x0027) { # '
1942     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1943     $self->{state} = 'DOCTYPE public identifier (single-quoted)';
1944    
1945     if (@{$self->{char}}) {
1946     $self->{next_input_character} = shift @{$self->{char}};
1947     } else {
1948     $self->{set_next_input_character}->($self);
1949     }
1950    
1951     redo A;
1952     } elsif ($self->{next_input_character} eq 0x003E) { # >
1953     $self->{parse_error}-> (type => 'no PUBLIC literal');
1954    
1955     $self->{state} = 'data';
1956    
1957     if (@{$self->{char}}) {
1958     $self->{next_input_character} = shift @{$self->{char}};
1959     } else {
1960     $self->{set_next_input_character}->($self);
1961     }
1962    
1963    
1964     delete $self->{current_token}->{correct};
1965     return ($self->{current_token}); # DOCTYPE
1966    
1967     redo A;
1968 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
1969 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1970 wakaba 1.18
1971 wakaba 1.1 $self->{state} = 'data';
1972     ## reconsume
1973    
1974 wakaba 1.18 delete $self->{current_token}->{correct};
1975     return ($self->{current_token}); # DOCTYPE
1976 wakaba 1.1
1977     redo A;
1978     } else {
1979 wakaba 1.18 $self->{parse_error}-> (type => 'string after PUBLIC');
1980     $self->{state} = 'bogus DOCTYPE';
1981    
1982     if (@{$self->{char}}) {
1983     $self->{next_input_character} = shift @{$self->{char}};
1984     } else {
1985     $self->{set_next_input_character}->($self);
1986     }
1987    
1988     redo A;
1989     }
1990     } elsif ($self->{state} eq 'DOCTYPE public identifier (double-quoted)') {
1991     if ($self->{next_input_character} == 0x0022) { # "
1992     $self->{state} = 'after DOCTYPE public identifier';
1993    
1994     if (@{$self->{char}}) {
1995     $self->{next_input_character} = shift @{$self->{char}};
1996     } else {
1997     $self->{set_next_input_character}->($self);
1998     }
1999    
2000     redo A;
2001     } elsif ($self->{next_input_character} == -1) {
2002     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
2003    
2004     $self->{state} = 'data';
2005     ## reconsume
2006    
2007     delete $self->{current_token}->{correct};
2008     return ($self->{current_token}); # DOCTYPE
2009    
2010     redo A;
2011     } else {
2012     $self->{current_token}->{public_identifier} # DOCTYPE
2013     .= chr $self->{next_input_character};
2014     ## Stay in the state
2015    
2016     if (@{$self->{char}}) {
2017     $self->{next_input_character} = shift @{$self->{char}};
2018     } else {
2019     $self->{set_next_input_character}->($self);
2020     }
2021    
2022     redo A;
2023     }
2024     } elsif ($self->{state} eq 'DOCTYPE public identifier (single-quoted)') {
2025     if ($self->{next_input_character} == 0x0027) { # '
2026     $self->{state} = 'after DOCTYPE public identifier';
2027    
2028     if (@{$self->{char}}) {
2029     $self->{next_input_character} = shift @{$self->{char}};
2030     } else {
2031     $self->{set_next_input_character}->($self);
2032     }
2033    
2034     redo A;
2035     } elsif ($self->{next_input_character} == -1) {
2036     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
2037    
2038     $self->{state} = 'data';
2039     ## reconsume
2040    
2041     delete $self->{current_token}->{correct};
2042     return ($self->{current_token}); # DOCTYPE
2043    
2044     redo A;
2045     } else {
2046     $self->{current_token}->{public_identifier} # DOCTYPE
2047     .= chr $self->{next_input_character};
2048     ## Stay in the state
2049    
2050     if (@{$self->{char}}) {
2051     $self->{next_input_character} = shift @{$self->{char}};
2052     } else {
2053     $self->{set_next_input_character}->($self);
2054     }
2055    
2056     redo A;
2057     }
2058     } elsif ($self->{state} eq 'after DOCTYPE public identifier') {
2059     if ({
2060     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2061     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2062     }->{$self->{next_input_character}}) {
2063 wakaba 1.1 ## Stay in the state
2064    
2065     if (@{$self->{char}}) {
2066     $self->{next_input_character} = shift @{$self->{char}};
2067     } else {
2068     $self->{set_next_input_character}->($self);
2069     }
2070    
2071     redo A;
2072 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
2073     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2074     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
2075    
2076     if (@{$self->{char}}) {
2077     $self->{next_input_character} = shift @{$self->{char}};
2078     } else {
2079     $self->{set_next_input_character}->($self);
2080     }
2081    
2082     redo A;
2083     } elsif ($self->{next_input_character} == 0x0027) { # '
2084     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2085     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
2086    
2087     if (@{$self->{char}}) {
2088     $self->{next_input_character} = shift @{$self->{char}};
2089     } else {
2090     $self->{set_next_input_character}->($self);
2091     }
2092    
2093     redo A;
2094     } elsif ($self->{next_input_character} == 0x003E) { # >
2095     $self->{state} = 'data';
2096    
2097     if (@{$self->{char}}) {
2098     $self->{next_input_character} = shift @{$self->{char}};
2099     } else {
2100     $self->{set_next_input_character}->($self);
2101     }
2102    
2103    
2104     return ($self->{current_token}); # DOCTYPE
2105    
2106     redo A;
2107     } elsif ($self->{next_input_character} == -1) {
2108     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2109    
2110     $self->{state} = 'data';
2111 wakaba 1.26 ## reconsume
2112 wakaba 1.18
2113     delete $self->{current_token}->{correct};
2114     return ($self->{current_token}); # DOCTYPE
2115    
2116     redo A;
2117     } else {
2118     $self->{parse_error}-> (type => 'string after PUBLIC literal');
2119     $self->{state} = 'bogus DOCTYPE';
2120    
2121     if (@{$self->{char}}) {
2122     $self->{next_input_character} = shift @{$self->{char}};
2123     } else {
2124     $self->{set_next_input_character}->($self);
2125     }
2126    
2127     redo A;
2128 wakaba 1.1 }
2129 wakaba 1.18 } elsif ($self->{state} eq 'before DOCTYPE system identifier') {
2130     if ({
2131     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2132     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2133     }->{$self->{next_input_character}}) {
2134 wakaba 1.1 ## Stay in the state
2135    
2136     if (@{$self->{char}}) {
2137     $self->{next_input_character} = shift @{$self->{char}};
2138     } else {
2139     $self->{set_next_input_character}->($self);
2140     }
2141    
2142     redo A;
2143 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
2144     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2145     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
2146    
2147     if (@{$self->{char}}) {
2148     $self->{next_input_character} = shift @{$self->{char}};
2149     } else {
2150     $self->{set_next_input_character}->($self);
2151     }
2152    
2153     redo A;
2154     } elsif ($self->{next_input_character} == 0x0027) { # '
2155     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2156     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
2157    
2158     if (@{$self->{char}}) {
2159     $self->{next_input_character} = shift @{$self->{char}};
2160     } else {
2161     $self->{set_next_input_character}->($self);
2162     }
2163    
2164     redo A;
2165 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
2166 wakaba 1.18 $self->{parse_error}-> (type => 'no SYSTEM literal');
2167 wakaba 1.1 $self->{state} = 'data';
2168    
2169     if (@{$self->{char}}) {
2170     $self->{next_input_character} = shift @{$self->{char}};
2171     } else {
2172     $self->{set_next_input_character}->($self);
2173     }
2174    
2175    
2176 wakaba 1.18 delete $self->{current_token}->{correct};
2177 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2178    
2179     redo A;
2180     } elsif ($self->{next_input_character} == -1) {
2181 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2182 wakaba 1.18
2183     $self->{state} = 'data';
2184 wakaba 1.26 ## reconsume
2185 wakaba 1.18
2186     delete $self->{current_token}->{correct};
2187     return ($self->{current_token}); # DOCTYPE
2188    
2189     redo A;
2190     } else {
2191 wakaba 1.30 $self->{parse_error}-> (type => 'string after SYSTEM');
2192 wakaba 1.18 $self->{state} = 'bogus DOCTYPE';
2193    
2194     if (@{$self->{char}}) {
2195     $self->{next_input_character} = shift @{$self->{char}};
2196     } else {
2197     $self->{set_next_input_character}->($self);
2198     }
2199    
2200     redo A;
2201     }
2202     } elsif ($self->{state} eq 'DOCTYPE system identifier (double-quoted)') {
2203     if ($self->{next_input_character} == 0x0022) { # "
2204     $self->{state} = 'after DOCTYPE system identifier';
2205    
2206     if (@{$self->{char}}) {
2207     $self->{next_input_character} = shift @{$self->{char}};
2208     } else {
2209     $self->{set_next_input_character}->($self);
2210     }
2211    
2212     redo A;
2213     } elsif ($self->{next_input_character} == -1) {
2214     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2215    
2216 wakaba 1.1 $self->{state} = 'data';
2217     ## reconsume
2218    
2219 wakaba 1.18 delete $self->{current_token}->{correct};
2220 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2221    
2222     redo A;
2223     } else {
2224 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2225     .= chr $self->{next_input_character};
2226     ## Stay in the state
2227    
2228     if (@{$self->{char}}) {
2229     $self->{next_input_character} = shift @{$self->{char}};
2230     } else {
2231     $self->{set_next_input_character}->($self);
2232     }
2233    
2234     redo A;
2235     }
2236     } elsif ($self->{state} eq 'DOCTYPE system identifier (single-quoted)') {
2237     if ($self->{next_input_character} == 0x0027) { # '
2238     $self->{state} = 'after DOCTYPE system identifier';
2239    
2240     if (@{$self->{char}}) {
2241     $self->{next_input_character} = shift @{$self->{char}};
2242     } else {
2243     $self->{set_next_input_character}->($self);
2244     }
2245    
2246     redo A;
2247     } elsif ($self->{next_input_character} == -1) {
2248     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2249    
2250     $self->{state} = 'data';
2251     ## reconsume
2252    
2253     delete $self->{current_token}->{correct};
2254     return ($self->{current_token}); # DOCTYPE
2255    
2256     redo A;
2257     } else {
2258     $self->{current_token}->{system_identifier} # DOCTYPE
2259     .= chr $self->{next_input_character};
2260     ## Stay in the state
2261    
2262     if (@{$self->{char}}) {
2263     $self->{next_input_character} = shift @{$self->{char}};
2264     } else {
2265     $self->{set_next_input_character}->($self);
2266     }
2267    
2268     redo A;
2269     }
2270     } elsif ($self->{state} eq 'after DOCTYPE system identifier') {
2271     if ({
2272     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2273     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2274     }->{$self->{next_input_character}}) {
2275     ## Stay in the state
2276    
2277     if (@{$self->{char}}) {
2278     $self->{next_input_character} = shift @{$self->{char}};
2279     } else {
2280     $self->{set_next_input_character}->($self);
2281     }
2282    
2283     redo A;
2284     } elsif ($self->{next_input_character} == 0x003E) { # >
2285     $self->{state} = 'data';
2286    
2287     if (@{$self->{char}}) {
2288     $self->{next_input_character} = shift @{$self->{char}};
2289     } else {
2290     $self->{set_next_input_character}->($self);
2291     }
2292    
2293    
2294     return ($self->{current_token}); # DOCTYPE
2295    
2296     redo A;
2297     } elsif ($self->{next_input_character} == -1) {
2298     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2299    
2300     $self->{state} = 'data';
2301 wakaba 1.26 ## reconsume
2302 wakaba 1.18
2303     delete $self->{current_token}->{correct};
2304     return ($self->{current_token}); # DOCTYPE
2305    
2306     redo A;
2307     } else {
2308     $self->{parse_error}-> (type => 'string after SYSTEM literal');
2309 wakaba 1.1 $self->{state} = 'bogus DOCTYPE';
2310    
2311     if (@{$self->{char}}) {
2312     $self->{next_input_character} = shift @{$self->{char}};
2313     } else {
2314     $self->{set_next_input_character}->($self);
2315     }
2316    
2317     redo A;
2318     }
2319     } elsif ($self->{state} eq 'bogus DOCTYPE') {
2320     if ($self->{next_input_character} == 0x003E) { # >
2321     $self->{state} = 'data';
2322    
2323     if (@{$self->{char}}) {
2324     $self->{next_input_character} = shift @{$self->{char}};
2325     } else {
2326     $self->{set_next_input_character}->($self);
2327     }
2328    
2329    
2330 wakaba 1.18 delete $self->{current_token}->{correct};
2331 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2332    
2333     redo A;
2334     } elsif ($self->{next_input_character} == -1) {
2335 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2336 wakaba 1.1 $self->{state} = 'data';
2337     ## reconsume
2338    
2339 wakaba 1.18 delete $self->{current_token}->{correct};
2340 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2341    
2342     redo A;
2343     } else {
2344     ## Stay in the state
2345    
2346     if (@{$self->{char}}) {
2347     $self->{next_input_character} = shift @{$self->{char}};
2348     } else {
2349     $self->{set_next_input_character}->($self);
2350     }
2351    
2352     redo A;
2353     }
2354     } else {
2355     die "$0: $self->{state}: Unknown state";
2356     }
2357     } # A
2358    
2359     die "$0: _get_next_token: unexpected case";
2360     } # _get_next_token
2361    
2362 wakaba 1.26 sub _tokenize_attempt_to_consume_an_entity ($$) {
2363     my ($self, $in_attr) = @_;
2364 wakaba 1.20
2365     if ({
2366     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2367     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2368     }->{$self->{next_input_character}}) {
2369     ## Don't consume
2370     ## No error
2371     return undef;
2372     } elsif ($self->{next_input_character} == 0x0023) { # #
2373 wakaba 1.1
2374     if (@{$self->{char}}) {
2375     $self->{next_input_character} = shift @{$self->{char}};
2376     } else {
2377     $self->{set_next_input_character}->($self);
2378     }
2379    
2380     if ($self->{next_input_character} == 0x0078 or # x
2381     $self->{next_input_character} == 0x0058) { # X
2382 wakaba 1.26 my $code;
2383 wakaba 1.1 X: {
2384     my $x_char = $self->{next_input_character};
2385    
2386     if (@{$self->{char}}) {
2387     $self->{next_input_character} = shift @{$self->{char}};
2388     } else {
2389     $self->{set_next_input_character}->($self);
2390     }
2391    
2392     if (0x0030 <= $self->{next_input_character} and
2393     $self->{next_input_character} <= 0x0039) { # 0..9
2394 wakaba 1.26 $code ||= 0;
2395     $code *= 0x10;
2396     $code += $self->{next_input_character} - 0x0030;
2397 wakaba 1.1 redo X;
2398     } elsif (0x0061 <= $self->{next_input_character} and
2399     $self->{next_input_character} <= 0x0066) { # a..f
2400 wakaba 1.26 $code ||= 0;
2401     $code *= 0x10;
2402     $code += $self->{next_input_character} - 0x0060 + 9;
2403 wakaba 1.1 redo X;
2404     } elsif (0x0041 <= $self->{next_input_character} and
2405     $self->{next_input_character} <= 0x0046) { # A..F
2406 wakaba 1.26 $code ||= 0;
2407     $code *= 0x10;
2408     $code += $self->{next_input_character} - 0x0040 + 9;
2409 wakaba 1.1 redo X;
2410 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2411 wakaba 1.3 $self->{parse_error}-> (type => 'bare hcro');
2412 wakaba 1.37 unshift @{$self->{char}}, ($x_char, $self->{next_input_character});
2413 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
2414     return undef;
2415     } elsif ($self->{next_input_character} == 0x003B) { # ;
2416    
2417     if (@{$self->{char}}) {
2418     $self->{next_input_character} = shift @{$self->{char}};
2419     } else {
2420     $self->{set_next_input_character}->($self);
2421     }
2422    
2423     } else {
2424 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2425 wakaba 1.1 }
2426    
2427 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2428     $self->{parse_error}-> (type => sprintf 'invalid character reference:U+%04X', $code);
2429     $code = 0xFFFD;
2430     } elsif ($code > 0x10FFFF) {
2431     $self->{parse_error}-> (type => sprintf 'invalid character reference:U-%08X', $code);
2432     $code = 0xFFFD;
2433     } elsif ($code == 0x000D) {
2434     $self->{parse_error}-> (type => 'CR character reference');
2435     $code = 0x000A;
2436     } elsif (0x80 <= $code and $code <= 0x9F) {
2437 wakaba 1.30 $self->{parse_error}-> (type => sprintf 'C1 character reference:U+%04X', $code);
2438 wakaba 1.26 $code = $c1_entity_char->{$code};
2439 wakaba 1.1 }
2440    
2441 wakaba 1.26 return {type => 'character', data => chr $code};
2442 wakaba 1.1 } # X
2443     } elsif (0x0030 <= $self->{next_input_character} and
2444     $self->{next_input_character} <= 0x0039) { # 0..9
2445     my $code = $self->{next_input_character} - 0x0030;
2446    
2447     if (@{$self->{char}}) {
2448     $self->{next_input_character} = shift @{$self->{char}};
2449     } else {
2450     $self->{set_next_input_character}->($self);
2451     }
2452    
2453    
2454     while (0x0030 <= $self->{next_input_character} and
2455     $self->{next_input_character} <= 0x0039) { # 0..9
2456     $code *= 10;
2457     $code += $self->{next_input_character} - 0x0030;
2458    
2459    
2460     if (@{$self->{char}}) {
2461     $self->{next_input_character} = shift @{$self->{char}};
2462     } else {
2463     $self->{set_next_input_character}->($self);
2464     }
2465    
2466     }
2467    
2468     if ($self->{next_input_character} == 0x003B) { # ;
2469    
2470     if (@{$self->{char}}) {
2471     $self->{next_input_character} = shift @{$self->{char}};
2472     } else {
2473     $self->{set_next_input_character}->($self);
2474     }
2475    
2476     } else {
2477 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2478 wakaba 1.1 }
2479    
2480 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2481     $self->{parse_error}-> (type => sprintf 'invalid character reference:U+%04X', $code);
2482     $code = 0xFFFD;
2483     } elsif ($code > 0x10FFFF) {
2484     $self->{parse_error}-> (type => sprintf 'invalid character reference:U-%08X', $code);
2485     $code = 0xFFFD;
2486     } elsif ($code == 0x000D) {
2487     $self->{parse_error}-> (type => 'CR character reference');
2488     $code = 0x000A;
2489 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2490 wakaba 1.30 $self->{parse_error}-> (type => sprintf 'C1 character reference:U+%04X', $code);
2491 wakaba 1.4 $code = $c1_entity_char->{$code};
2492 wakaba 1.1 }
2493    
2494     return {type => 'character', data => chr $code};
2495     } else {
2496 wakaba 1.3 $self->{parse_error}-> (type => 'bare nero');
2497 wakaba 1.1 unshift @{$self->{char}}, ($self->{next_input_character});
2498     $self->{next_input_character} = 0x0023; # #
2499     return undef;
2500     }
2501     } elsif ((0x0041 <= $self->{next_input_character} and
2502     $self->{next_input_character} <= 0x005A) or
2503     (0x0061 <= $self->{next_input_character} and
2504     $self->{next_input_character} <= 0x007A)) {
2505     my $entity_name = chr $self->{next_input_character};
2506    
2507     if (@{$self->{char}}) {
2508     $self->{next_input_character} = shift @{$self->{char}};
2509     } else {
2510     $self->{set_next_input_character}->($self);
2511     }
2512    
2513    
2514     my $value = $entity_name;
2515 wakaba 1.37 my $match = 0;
2516 wakaba 1.16 require Whatpm::_NamedEntityList;
2517     our $EntityChar;
2518 wakaba 1.1
2519     while (length $entity_name < 10 and
2520     ## NOTE: Some number greater than the maximum length of entity name
2521 wakaba 1.16 ((0x0041 <= $self->{next_input_character} and # a
2522     $self->{next_input_character} <= 0x005A) or # x
2523     (0x0061 <= $self->{next_input_character} and # a
2524     $self->{next_input_character} <= 0x007A) or # z
2525     (0x0030 <= $self->{next_input_character} and # 0
2526     $self->{next_input_character} <= 0x0039) or # 9
2527     $self->{next_input_character} == 0x003B)) { # ;
2528 wakaba 1.1 $entity_name .= chr $self->{next_input_character};
2529 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2530     if ($self->{next_input_character} == 0x003B) { # ;
2531 wakaba 1.26 $value = $EntityChar->{$entity_name};
2532 wakaba 1.16 $match = 1;
2533    
2534     if (@{$self->{char}}) {
2535     $self->{next_input_character} = shift @{$self->{char}};
2536     } else {
2537     $self->{set_next_input_character}->($self);
2538     }
2539    
2540     last;
2541 wakaba 1.37 } else {
2542 wakaba 1.26 $value = $EntityChar->{$entity_name};
2543     $match = -1;
2544 wakaba 1.37
2545     if (@{$self->{char}}) {
2546     $self->{next_input_character} = shift @{$self->{char}};
2547     } else {
2548     $self->{set_next_input_character}->($self);
2549     }
2550    
2551 wakaba 1.16 }
2552 wakaba 1.1 } else {
2553     $value .= chr $self->{next_input_character};
2554 wakaba 1.37 $match *= 2;
2555    
2556 wakaba 1.1 if (@{$self->{char}}) {
2557     $self->{next_input_character} = shift @{$self->{char}};
2558     } else {
2559     $self->{set_next_input_character}->($self);
2560     }
2561    
2562 wakaba 1.37 }
2563 wakaba 1.1 }
2564    
2565 wakaba 1.16 if ($match > 0) {
2566     return {type => 'character', data => $value};
2567     } elsif ($match < 0) {
2568 wakaba 1.30 $self->{parse_error}-> (type => 'no refc');
2569 wakaba 1.37 if ($in_attr and $match < -1) {
2570     return {type => 'character', data => '&'.$entity_name};
2571     } else {
2572     return {type => 'character', data => $value};
2573     }
2574 wakaba 1.1 } else {
2575 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2576 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
2577 wakaba 1.26 return {type => 'character', data => '&'.$value};
2578 wakaba 1.1 }
2579     } else {
2580     ## no characters are consumed
2581 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2582 wakaba 1.1 return undef;
2583     }
2584     } # _tokenize_attempt_to_consume_an_entity
2585    
2586     sub _initialize_tree_constructor ($) {
2587     my $self = shift;
2588     ## NOTE: $self->{document} MUST be specified before this method is called
2589     $self->{document}->strict_error_checking (0);
2590     ## TODO: Turn mutation events off # MUST
2591     ## TODO: Turn loose Document option (manakai extension) on
2592 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2593 wakaba 1.1 } # _initialize_tree_constructor
2594    
2595     sub _terminate_tree_constructor ($) {
2596     my $self = shift;
2597     $self->{document}->strict_error_checking (1);
2598     ## TODO: Turn mutation events on
2599     } # _terminate_tree_constructor
2600    
2601     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2602    
2603 wakaba 1.3 { # tree construction stage
2604     my $token;
2605    
2606 wakaba 1.1 sub _construct_tree ($) {
2607     my ($self) = @_;
2608    
2609     ## When an interactive UA render the $self->{document} available
2610     ## to the user, or when it begin accepting user input, are
2611     ## not defined.
2612    
2613     ## Append a character: collect it and all subsequent consecutive
2614     ## characters and insert one Text node whose data is concatenation
2615     ## of all those characters. # MUST
2616    
2617     $token = $self->_get_next_token;
2618    
2619 wakaba 1.3 $self->{insertion_mode} = 'before head';
2620     undef $self->{form_element};
2621     undef $self->{head_element};
2622     $self->{open_elements} = [];
2623     undef $self->{inner_html_node};
2624    
2625     $self->_tree_construction_initial; # MUST
2626     $self->_tree_construction_root_element;
2627     $self->_tree_construction_main;
2628     } # _construct_tree
2629    
2630     sub _tree_construction_initial ($) {
2631     my $self = shift;
2632 wakaba 1.18 INITIAL: {
2633     if ($token->{type} eq 'DOCTYPE') {
2634     ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2635     ## error, switch to a conformance checking mode for another
2636     ## language.
2637     my $doctype_name = $token->{name};
2638     $doctype_name = '' unless defined $doctype_name;
2639     $doctype_name =~ tr/a-z/A-Z/;
2640     if (not defined $token->{name} or # <!DOCTYPE>
2641     defined $token->{public_identifier} or
2642     defined $token->{system_identifier}) {
2643     $self->{parse_error}-> (type => 'not HTML5');
2644     } elsif ($doctype_name ne 'HTML') {
2645     ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2646     $self->{parse_error}-> (type => 'not HTML5');
2647     }
2648    
2649     my $doctype = $self->{document}->create_document_type_definition
2650     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2651     $doctype->public_id ($token->{public_identifier})
2652     if defined $token->{public_identifier};
2653     $doctype->system_id ($token->{system_identifier})
2654     if defined $token->{system_identifier};
2655     ## NOTE: Other DocumentType attributes are null or empty lists.
2656     ## ISSUE: internalSubset = null??
2657     $self->{document}->append_child ($doctype);
2658    
2659     if (not $token->{correct} or $doctype_name ne 'HTML') {
2660     $self->{document}->manakai_compat_mode ('quirks');
2661     } elsif (defined $token->{public_identifier}) {
2662     my $pubid = $token->{public_identifier};
2663     $pubid =~ tr/a-z/A-z/;
2664     if ({
2665     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2666     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2667     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2668     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2669     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2670     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2671     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2672     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2673     "-//IETF//DTD HTML 2.0//EN" => 1,
2674     "-//IETF//DTD HTML 2.1E//EN" => 1,
2675     "-//IETF//DTD HTML 3.0//EN" => 1,
2676     "-//IETF//DTD HTML 3.0//EN//" => 1,
2677     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2678     "-//IETF//DTD HTML 3.2//EN" => 1,
2679     "-//IETF//DTD HTML 3//EN" => 1,
2680     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2681     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2682     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2683     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2684     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2685     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2686     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2687     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2688     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2689     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2690     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2691     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2692     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2693     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2694     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2695     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2696     "-//IETF//DTD HTML STRICT//EN" => 1,
2697     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2698     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2699     "-//IETF//DTD HTML//EN" => 1,
2700     "-//IETF//DTD HTML//EN//2.0" => 1,
2701     "-//IETF//DTD HTML//EN//3.0" => 1,
2702     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2703     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2704     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2705     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2706     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2707     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2708     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2709     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2710     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2711     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2712     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2713     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2714     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2715     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2716     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2717     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2718     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2719     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2720     "-//W3C//DTD HTML 3.2//EN" => 1,
2721     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2722     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2723     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2724     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2725     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2726     "-//W3C//DTD W3 HTML//EN" => 1,
2727     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2728     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2729     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2730     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2731     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2732     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2733     "HTML" => 1,
2734     }->{$pubid}) {
2735     $self->{document}->manakai_compat_mode ('quirks');
2736     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2737     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2738     if (defined $token->{system_identifier}) {
2739     $self->{document}->manakai_compat_mode ('quirks');
2740     } else {
2741     $self->{document}->manakai_compat_mode ('limited quirks');
2742 wakaba 1.3 }
2743 wakaba 1.18 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 Frameset//EN" or
2744     $pubid eq "-//W3C//DTD XHTML 1.0 Transitional//EN") {
2745     $self->{document}->manakai_compat_mode ('limited quirks');
2746     }
2747     }
2748     if (defined $token->{system_identifier}) {
2749     my $sysid = $token->{system_identifier};
2750     $sysid =~ tr/A-Z/a-z/;
2751     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2752     $self->{document}->manakai_compat_mode ('quirks');
2753     }
2754     }
2755    
2756     ## Go to the root element phase.
2757     $token = $self->_get_next_token;
2758     return;
2759     } elsif ({
2760     'start tag' => 1,
2761     'end tag' => 1,
2762     'end-of-file' => 1,
2763     }->{$token->{type}}) {
2764     $self->{parse_error}-> (type => 'no DOCTYPE');
2765     $self->{document}->manakai_compat_mode ('quirks');
2766     ## Go to the root element phase
2767     ## reprocess
2768     return;
2769     } elsif ($token->{type} eq 'character') {
2770     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2771     ## Ignore the token
2772 wakaba 1.26
2773 wakaba 1.18 unless (length $token->{data}) {
2774     ## Stay in the phase
2775     $token = $self->_get_next_token;
2776     redo INITIAL;
2777 wakaba 1.3 }
2778     }
2779 wakaba 1.18
2780     $self->{parse_error}-> (type => 'no DOCTYPE');
2781     $self->{document}->manakai_compat_mode ('quirks');
2782     ## Go to the root element phase
2783     ## reprocess
2784     return;
2785     } elsif ($token->{type} eq 'comment') {
2786     my $comment = $self->{document}->create_comment ($token->{data});
2787     $self->{document}->append_child ($comment);
2788    
2789     ## Stay in the phase.
2790     $token = $self->_get_next_token;
2791     redo INITIAL;
2792     } else {
2793     die "$0: $token->{type}: Unknown token";
2794     }
2795     } # INITIAL
2796 wakaba 1.3 } # _tree_construction_initial
2797    
2798     sub _tree_construction_root_element ($) {
2799     my $self = shift;
2800    
2801     B: {
2802     if ($token->{type} eq 'DOCTYPE') {
2803     $self->{parse_error}-> (type => 'in html:#DOCTYPE');
2804     ## Ignore the token
2805     ## Stay in the phase
2806     $token = $self->_get_next_token;
2807     redo B;
2808     } elsif ($token->{type} eq 'comment') {
2809     my $comment = $self->{document}->create_comment ($token->{data});
2810     $self->{document}->append_child ($comment);
2811     ## Stay in the phase
2812     $token = $self->_get_next_token;
2813     redo B;
2814     } elsif ($token->{type} eq 'character') {
2815 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2816     ## Ignore the token.
2817    
2818 wakaba 1.3 unless (length $token->{data}) {
2819     ## Stay in the phase
2820     $token = $self->_get_next_token;
2821     redo B;
2822     }
2823     }
2824     #
2825     } elsif ({
2826     'start tag' => 1,
2827     'end tag' => 1,
2828     'end-of-file' => 1,
2829     }->{$token->{type}}) {
2830     ## ISSUE: There is an issue in the spec
2831     #
2832     } else {
2833     die "$0: $token->{type}: Unknown token";
2834     }
2835     my $root_element;
2836     $root_element = $self->{document}->create_element_ns
2837     (q<http://www.w3.org/1999/xhtml>, [undef, 'html']);
2838    
2839     $self->{document}->append_child ($root_element);
2840     push @{$self->{open_elements}}, [$root_element, 'html'];
2841     ## reprocess
2842     #redo B;
2843 wakaba 1.35 return; ## Go to the main phase.
2844 wakaba 1.3 } # B
2845     } # _tree_construction_root_element
2846    
2847     sub _reset_insertion_mode ($) {
2848     my $self = shift;
2849    
2850     ## Step 1
2851     my $last;
2852    
2853     ## Step 2
2854     my $i = -1;
2855     my $node = $self->{open_elements}->[$i];
2856    
2857     ## Step 3
2858     S3: {
2859 wakaba 1.29 ## ISSUE: Oops! "If node is the first node in the stack of open
2860     ## elements, then set last to true. If the context element of the
2861     ## HTML fragment parsing algorithm is neither a td element nor a
2862     ## th element, then set node to the context element. (fragment case)":
2863     ## The second "if" is in the scope of the first "if"!?
2864     if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
2865     $last = 1;
2866     if (defined $self->{inner_html_node}) {
2867     if ($self->{inner_html_node}->[1] eq 'td' or
2868     $self->{inner_html_node}->[1] eq 'th') {
2869     #
2870     } else {
2871     $node = $self->{inner_html_node};
2872     }
2873 wakaba 1.3 }
2874     }
2875    
2876     ## Step 4..13
2877     my $new_mode = {
2878     select => 'in select',
2879     td => 'in cell',
2880     th => 'in cell',
2881     tr => 'in row',
2882     tbody => 'in table body',
2883     thead => 'in table head',
2884     tfoot => 'in table foot',
2885     caption => 'in caption',
2886     colgroup => 'in column group',
2887     table => 'in table',
2888     head => 'in body', # not in head!
2889     body => 'in body',
2890     frameset => 'in frameset',
2891     }->{$node->[1]};
2892     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2893    
2894     ## Step 14
2895     if ($node->[1] eq 'html') {
2896     unless (defined $self->{head_element}) {
2897     $self->{insertion_mode} = 'before head';
2898     } else {
2899     $self->{insertion_mode} = 'after head';
2900     }
2901     return;
2902     }
2903    
2904     ## Step 15
2905     $self->{insertion_mode} = 'in body' and return if $last;
2906    
2907     ## Step 16
2908     $i--;
2909     $node = $self->{open_elements}->[$i];
2910    
2911     ## Step 17
2912     redo S3;
2913     } # S3
2914     } # _reset_insertion_mode
2915    
2916     sub _tree_construction_main ($) {
2917     my $self = shift;
2918    
2919 wakaba 1.35 my $previous_insertion_mode;
2920 wakaba 1.1
2921     my $active_formatting_elements = [];
2922    
2923     my $reconstruct_active_formatting_elements = sub { # MUST
2924     my $insert = shift;
2925    
2926     ## Step 1
2927     return unless @$active_formatting_elements;
2928    
2929     ## Step 3
2930     my $i = -1;
2931     my $entry = $active_formatting_elements->[$i];
2932    
2933     ## Step 2
2934     return if $entry->[0] eq '#marker';
2935 wakaba 1.3 for (@{$self->{open_elements}}) {
2936 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2937     return;
2938     }
2939     }
2940    
2941     S4: {
2942     ## Step 4
2943     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2944    
2945     ## Step 5
2946     $i--;
2947     $entry = $active_formatting_elements->[$i];
2948    
2949     ## Step 6
2950     if ($entry->[0] eq '#marker') {
2951     #
2952     } else {
2953     my $in_open_elements;
2954 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
2955 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2956     $in_open_elements = 1;
2957     last OE;
2958     }
2959     }
2960     if ($in_open_elements) {
2961     #
2962     } else {
2963     redo S4;
2964     }
2965     }
2966    
2967     ## Step 7
2968     $i++;
2969     $entry = $active_formatting_elements->[$i];
2970     } # S4
2971    
2972     S7: {
2973     ## Step 8
2974     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2975    
2976     ## Step 9
2977     $insert->($clone->[0]);
2978 wakaba 1.3 push @{$self->{open_elements}}, $clone;
2979 wakaba 1.1
2980     ## Step 10
2981 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2982 wakaba 1.1
2983     ## Step 11
2984     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2985     ## Step 7'
2986     $i++;
2987     $entry = $active_formatting_elements->[$i];
2988    
2989     redo S7;
2990     }
2991     } # S7
2992     }; # $reconstruct_active_formatting_elements
2993    
2994     my $clear_up_to_marker = sub {
2995     for (reverse 0..$#$active_formatting_elements) {
2996     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2997     splice @$active_formatting_elements, $_;
2998     return;
2999     }
3000     }
3001     }; # $clear_up_to_marker
3002    
3003 wakaba 1.25 my $parse_rcdata = sub ($$) {
3004     my ($content_model_flag, $insert) = @_;
3005    
3006     ## Step 1
3007     my $start_tag_name = $token->{tag_name};
3008     my $el;
3009    
3010     $el = $self->{document}->create_element_ns
3011     (q<http://www.w3.org/1999/xhtml>, [undef, $start_tag_name]);
3012 wakaba 1.1
3013 wakaba 1.6 for my $attr_name (keys %{ $token->{attributes}}) {
3014 wakaba 1.25 $el->set_attribute_ns (undef, [undef, $attr_name],
3015 wakaba 1.6 $token->{attributes} ->{$attr_name}->{value});
3016     }
3017    
3018 wakaba 1.25
3019     ## Step 2
3020     $insert->($el); # /context node/->append_child ($el)
3021    
3022     ## Step 3
3023 wakaba 1.41 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3024 wakaba 1.13 delete $self->{escape}; # MUST
3025 wakaba 1.25
3026     ## Step 4
3027 wakaba 1.1 my $text = '';
3028     $token = $self->_get_next_token;
3029 wakaba 1.25 while ($token->{type} eq 'character') { # or until stop tokenizing
3030 wakaba 1.1 $text .= $token->{data};
3031     $token = $self->_get_next_token;
3032 wakaba 1.25 }
3033    
3034     ## Step 5
3035 wakaba 1.1 if (length $text) {
3036 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3037     $el->append_child ($text);
3038 wakaba 1.1 }
3039 wakaba 1.25
3040     ## Step 6
3041 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL;
3042 wakaba 1.25
3043     ## Step 7
3044     if ($token->{type} eq 'end tag' and $token->{tag_name} eq $start_tag_name) {
3045 wakaba 1.1 ## Ignore the token
3046 wakaba 1.41 } elsif ($content_model_flag == CDATA_CONTENT_MODEL) {
3047     $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
3048     } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3049     $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
3050 wakaba 1.1 } else {
3051 wakaba 1.41 die "$0: $content_model_flag in parse_rcdata";
3052 wakaba 1.1 }
3053     $token = $self->_get_next_token;
3054 wakaba 1.25 }; # $parse_rcdata
3055 wakaba 1.1
3056 wakaba 1.25 my $script_start_tag = sub ($) {
3057     my $insert = $_[0];
3058 wakaba 1.1 my $script_el;
3059    
3060     $script_el = $self->{document}->create_element_ns
3061     (q<http://www.w3.org/1999/xhtml>, [undef, 'script']);
3062    
3063     for my $attr_name (keys %{ $token->{attributes}}) {
3064     $script_el->set_attribute_ns (undef, [undef, $attr_name],
3065     $token->{attributes} ->{$attr_name}->{value});
3066     }
3067    
3068     ## TODO: mark as "parser-inserted"
3069    
3070 wakaba 1.41 $self->{content_model} = CDATA_CONTENT_MODEL;
3071 wakaba 1.13 delete $self->{escape}; # MUST
3072 wakaba 1.1
3073     my $text = '';
3074     $token = $self->_get_next_token;
3075     while ($token->{type} eq 'character') {
3076     $text .= $token->{data};
3077     $token = $self->_get_next_token;
3078     } # stop if non-character token or tokenizer stops tokenising
3079     if (length $text) {
3080     $script_el->manakai_append_text ($text);
3081     }
3082    
3083 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL;
3084 wakaba 1.1
3085     if ($token->{type} eq 'end tag' and
3086     $token->{tag_name} eq 'script') {
3087     ## Ignore the token
3088     } else {
3089 wakaba 1.3 $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
3090 wakaba 1.1 ## ISSUE: And ignore?
3091     ## TODO: mark as "already executed"
3092     }
3093    
3094 wakaba 1.3 if (defined $self->{inner_html_node}) {
3095     ## TODO: mark as "already executed"
3096     } else {
3097 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3098     ## TODO: insertion point = just before the next input character
3099 wakaba 1.25
3100     $insert->($script_el);
3101 wakaba 1.1
3102     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3103    
3104     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3105     }
3106    
3107     $token = $self->_get_next_token;
3108     }; # $script_start_tag
3109    
3110     my $formatting_end_tag = sub {
3111     my $tag_name = shift;
3112    
3113     FET: {
3114     ## Step 1
3115     my $formatting_element;
3116     my $formatting_element_i_in_active;
3117     AFE: for (reverse 0..$#$active_formatting_elements) {
3118     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
3119     $formatting_element = $active_formatting_elements->[$_];
3120     $formatting_element_i_in_active = $_;
3121     last AFE;
3122     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
3123     last AFE;
3124     }
3125     } # AFE
3126     unless (defined $formatting_element) {
3127 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$tag_name);
3128 wakaba 1.1 ## Ignore the token
3129     $token = $self->_get_next_token;
3130     return;
3131     }
3132     ## has an element in scope
3133     my $in_scope = 1;
3134     my $formatting_element_i_in_open;
3135 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3136     my $node = $self->{open_elements}->[$_];
3137 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3138     if ($in_scope) {
3139     $formatting_element_i_in_open = $_;
3140     last INSCOPE;
3141     } else { # in open elements but not in scope
3142 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3143 wakaba 1.1 ## Ignore the token
3144     $token = $self->_get_next_token;
3145     return;
3146     }
3147     } elsif ({
3148     table => 1, caption => 1, td => 1, th => 1,
3149     button => 1, marquee => 1, object => 1, html => 1,
3150     }->{$node->[1]}) {
3151     $in_scope = 0;
3152     }
3153     } # INSCOPE
3154     unless (defined $formatting_element_i_in_open) {
3155 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3156 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3157     $token = $self->_get_next_token; ## TODO: ok?
3158     return;
3159     }
3160 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3161 wakaba 1.4 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3162 wakaba 1.1 }
3163    
3164     ## Step 2
3165     my $furthest_block;
3166     my $furthest_block_i_in_open;
3167 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3168     my $node = $self->{open_elements}->[$_];
3169 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
3170     #not $phrasing_category->{$node->[1]} and
3171     ($special_category->{$node->[1]} or
3172     $scoping_category->{$node->[1]})) {
3173     $furthest_block = $node;
3174     $furthest_block_i_in_open = $_;
3175     } elsif ($node->[0] eq $formatting_element->[0]) {
3176     last OE;
3177     }
3178     } # OE
3179    
3180     ## Step 3
3181     unless (defined $furthest_block) { # MUST
3182 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3183 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3184     $token = $self->_get_next_token;
3185     return;
3186     }
3187    
3188     ## Step 4
3189 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3190 wakaba 1.1
3191     ## Step 5
3192     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3193     if (defined $furthest_block_parent) {
3194     $furthest_block_parent->remove_child ($furthest_block->[0]);
3195     }
3196    
3197     ## Step 6
3198     my $bookmark_prev_el
3199     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3200     ->[0];
3201    
3202     ## Step 7
3203     my $node = $furthest_block;
3204     my $node_i_in_open = $furthest_block_i_in_open;
3205     my $last_node = $furthest_block;
3206     S7: {
3207     ## Step 1
3208     $node_i_in_open--;
3209 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3210 wakaba 1.1
3211     ## Step 2
3212     my $node_i_in_active;
3213     S7S2: {
3214     for (reverse 0..$#$active_formatting_elements) {
3215     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3216     $node_i_in_active = $_;
3217     last S7S2;
3218     }
3219     }
3220 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3221 wakaba 1.1 redo S7;
3222     } # S7S2
3223    
3224     ## Step 3
3225     last S7 if $node->[0] eq $formatting_element->[0];
3226    
3227     ## Step 4
3228     if ($last_node->[0] eq $furthest_block->[0]) {
3229     $bookmark_prev_el = $node->[0];
3230     }
3231    
3232     ## Step 5
3233     if ($node->[0]->has_child_nodes ()) {
3234     my $clone = [$node->[0]->clone_node (0), $node->[1]];
3235     $active_formatting_elements->[$node_i_in_active] = $clone;
3236 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3237 wakaba 1.1 $node = $clone;
3238     }
3239    
3240     ## Step 6
3241     $node->[0]->append_child ($last_node->[0]);
3242    
3243     ## Step 7
3244     $last_node = $node;
3245    
3246     ## Step 8
3247     redo S7;
3248     } # S7
3249    
3250     ## Step 8
3251     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3252    
3253     ## Step 9
3254     my $clone = [$formatting_element->[0]->clone_node (0),
3255     $formatting_element->[1]];
3256    
3257     ## Step 10
3258     my @cn = @{$furthest_block->[0]->child_nodes};
3259     $clone->[0]->append_child ($_) for @cn;
3260    
3261     ## Step 11
3262     $furthest_block->[0]->append_child ($clone->[0]);
3263    
3264     ## Step 12
3265     my $i;
3266     AFE: for (reverse 0..$#$active_formatting_elements) {
3267     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3268     splice @$active_formatting_elements, $_, 1;
3269     $i-- and last AFE if defined $i;
3270     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3271     $i = $_;
3272     }
3273     } # AFE
3274     splice @$active_formatting_elements, $i + 1, 0, $clone;
3275    
3276     ## Step 13
3277     undef $i;
3278 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3279     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3280     splice @{$self->{open_elements}}, $_, 1;
3281 wakaba 1.1 $i-- and last OE if defined $i;
3282 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3283 wakaba 1.1 $i = $_;
3284     }
3285     } # OE
3286 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3287 wakaba 1.1
3288     ## Step 14
3289     redo FET;
3290     } # FET
3291     }; # $formatting_end_tag
3292    
3293     my $insert_to_current = sub {
3294 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3295 wakaba 1.1 }; # $insert_to_current
3296    
3297     my $insert_to_foster = sub {
3298     my $child = shift;
3299     if ({
3300     table => 1, tbody => 1, tfoot => 1,
3301     thead => 1, tr => 1,
3302 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3303 wakaba 1.1 # MUST
3304     my $foster_parent_element;
3305     my $next_sibling;
3306 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3307     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3308     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3309 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3310     $foster_parent_element = $parent;
3311 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3312 wakaba 1.1 } else {
3313     $foster_parent_element
3314 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3315 wakaba 1.1 }
3316     last OE;
3317     }
3318     } # OE
3319 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3320 wakaba 1.1 unless defined $foster_parent_element;
3321     $foster_parent_element->insert_before
3322     ($child, $next_sibling);
3323     } else {
3324 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
3325 wakaba 1.1 }
3326     }; # $insert_to_foster
3327    
3328     my $in_body = sub {
3329     my $insert = shift;
3330     if ($token->{type} eq 'start tag') {
3331     if ($token->{tag_name} eq 'script') {
3332 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
3333     $script_start_tag->($insert);
3334 wakaba 1.1 return;
3335     } elsif ($token->{tag_name} eq 'style') {
3336 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
3337 wakaba 1.41 $parse_rcdata->(CDATA_CONTENT_MODEL, $insert);
3338 wakaba 1.1 return;
3339     } elsif ({
3340 wakaba 1.35 base => 1, link => 1,
3341 wakaba 1.1 }->{$token->{tag_name}}) {
3342 wakaba 1.25 ## NOTE: This is an "as if in head" code clone, only "-t" differs
3343 wakaba 1.1
3344 wakaba 1.25 {
3345     my $el;
3346    
3347 wakaba 1.1 $el = $self->{document}->create_element_ns
3348     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3349    
3350 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
3351 wakaba 1.1 $el->set_attribute_ns (undef, [undef, $attr_name],
3352 wakaba 1.25 $token->{attributes} ->{$attr_name}->{value});
3353 wakaba 1.1 }
3354    
3355 wakaba 1.25 $insert->($el);
3356     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3357     }
3358    
3359     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3360 wakaba 1.1 $token = $self->_get_next_token;
3361     return;
3362 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
3363     ## NOTE: This is an "as if in head" code clone, only "-t" differs
3364    
3365     {
3366     my $el;
3367    
3368     $el = $self->{document}->create_element_ns
3369     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3370    
3371     for my $attr_name (keys %{ $token->{attributes}}) {
3372     $el->set_attribute_ns (undef, [undef, $attr_name],
3373     $token->{attributes} ->{$attr_name}->{value});
3374     }
3375    
3376     $insert->($el);
3377     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3378     }
3379    
3380     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3381    
3382     unless ($self->{confident}) {
3383     my $charset;
3384     if ($token->{attributes}->{charset}) { ## TODO: And if supported
3385     $charset = $token->{attributes}->{charset}->{value};
3386     }
3387     if ($token->{attributes}->{'http-equiv'}) {
3388 wakaba 1.35 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
3389 wakaba 1.34 if ($token->{attributes}->{'http-equiv'}->{value}
3390     =~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*=
3391     [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
3392     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
3393     $charset = defined $1 ? $1 : defined $2 ? $2 : $3;
3394     } ## TODO: And if supported
3395     }
3396     ## TODO: Change the encoding
3397     }
3398    
3399     $token = $self->_get_next_token;
3400     return;
3401 wakaba 1.1 } elsif ($token->{tag_name} eq 'title') {
3402 wakaba 1.3 $self->{parse_error}-> (type => 'in body:title');
3403 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
3404 wakaba 1.41 $parse_rcdata->(RCDATA_CONTENT_MODEL, sub {
3405 wakaba 1.31 if (defined $self->{head_element}) {
3406     $self->{head_element}->append_child ($_[0]);
3407     } else {
3408     $insert->($_[0]);
3409     }
3410     });
3411 wakaba 1.1 return;
3412     } elsif ($token->{tag_name} eq 'body') {
3413 wakaba 1.3 $self->{parse_error}-> (type => 'in body:body');
3414 wakaba 1.1
3415 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
3416     $self->{open_elements}->[1]->[1] ne 'body') {
3417 wakaba 1.1 ## Ignore the token
3418     } else {
3419 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
3420 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
3421     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
3422     $body_el->set_attribute_ns
3423     (undef, [undef, $attr_name],
3424     $token->{attributes}->{$attr_name}->{value});
3425     }
3426     }
3427     }
3428     $token = $self->_get_next_token;
3429     return;
3430     } elsif ({
3431     address => 1, blockquote => 1, center => 1, dir => 1,
3432     div => 1, dl => 1, fieldset => 1, listing => 1,
3433     menu => 1, ol => 1, p => 1, ul => 1,
3434     pre => 1,
3435     }->{$token->{tag_name}}) {
3436     ## has a p element in scope
3437 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3438 wakaba 1.1 if ($_->[1] eq 'p') {
3439     unshift @{$self->{token}}, $token;
3440     $token = {type => 'end tag', tag_name => 'p'};
3441     return;
3442     } elsif ({
3443     table => 1, caption => 1, td => 1, th => 1,
3444     button => 1, marquee => 1, object => 1, html => 1,
3445     }->{$_->[1]}) {
3446     last INSCOPE;
3447     }
3448     } # INSCOPE
3449    
3450    
3451     {
3452     my $el;
3453    
3454     $el = $self->{document}->create_element_ns
3455     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3456    
3457     for my $attr_name (keys %{ $token->{attributes}}) {
3458     $el->set_attribute_ns (undef, [undef, $attr_name],
3459     $token->{attributes} ->{$attr_name}->{value});
3460     }
3461    
3462     $insert->($el);
3463 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3464 wakaba 1.1 }
3465    
3466     if ($token->{tag_name} eq 'pre') {
3467     $token = $self->_get_next_token;
3468     if ($token->{type} eq 'character') {
3469     $token->{data} =~ s/^\x0A//;
3470     unless (length $token->{data}) {
3471     $token = $self->_get_next_token;
3472     }
3473     }
3474     } else {
3475     $token = $self->_get_next_token;
3476     }
3477     return;
3478     } elsif ($token->{tag_name} eq 'form') {
3479 wakaba 1.3 if (defined $self->{form_element}) {
3480     $self->{parse_error}-> (type => 'in form:form');
3481 wakaba 1.1 ## Ignore the token
3482 wakaba 1.7 $token = $self->_get_next_token;
3483     return;
3484 wakaba 1.1 } else {
3485     ## has a p element in scope
3486 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3487 wakaba 1.1 if ($_->[1] eq 'p') {
3488     unshift @{$self->{token}}, $token;
3489     $token = {type => 'end tag', tag_name => 'p'};
3490     return;
3491     } elsif ({
3492     table => 1, caption => 1, td => 1, th => 1,
3493     button => 1, marquee => 1, object => 1, html => 1,
3494     }->{$_->[1]}) {
3495     last INSCOPE;
3496     }
3497     } # INSCOPE
3498    
3499    
3500     {
3501     my $el;
3502    
3503     $el = $self->{document}->create_element_ns
3504     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3505    
3506     for my $attr_name (keys %{ $token->{attributes}}) {
3507     $el->set_attribute_ns (undef, [undef, $attr_name],
3508     $token->{attributes} ->{$attr_name}->{value});
3509     }
3510    
3511     $insert->($el);
3512 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3513 wakaba 1.1 }
3514    
3515 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
3516 wakaba 1.1 $token = $self->_get_next_token;
3517     return;
3518     }
3519     } elsif ($token->{tag_name} eq 'li') {
3520     ## has a p element in scope
3521 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3522 wakaba 1.1 if ($_->[1] eq 'p') {
3523     unshift @{$self->{token}}, $token;
3524     $token = {type => 'end tag', tag_name => 'p'};
3525     return;
3526     } elsif ({
3527     table => 1, caption => 1, td => 1, th => 1,
3528     button => 1, marquee => 1, object => 1, html => 1,
3529     }->{$_->[1]}) {
3530     last INSCOPE;
3531     }
3532     } # INSCOPE
3533    
3534     ## Step 1
3535     my $i = -1;
3536 wakaba 1.3 my $node = $self->{open_elements}->[$i];
3537 wakaba 1.1 LI: {
3538     ## Step 2
3539     if ($node->[1] eq 'li') {
3540 wakaba 1.8 if ($i != -1) {
3541     $self->{parse_error}-> (type => 'end tag missing:'.
3542     $self->{open_elements}->[-1]->[1]);
3543     }
3544 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3545 wakaba 1.1 last LI;
3546     }
3547    
3548     ## Step 3
3549     if (not $formatting_category->{$node->[1]} and
3550     #not $phrasing_category->{$node->[1]} and
3551     ($special_category->{$node->[1]} or
3552     $scoping_category->{$node->[1]}) and
3553     $node->[1] ne 'address' and $node->[1] ne 'div') {
3554     last LI;
3555     }
3556    
3557     ## Step 4
3558     $i--;
3559 wakaba 1.3 $node = $self->{open_elements}->[$i];
3560 wakaba 1.1 redo LI;
3561     } # LI
3562    
3563    
3564     {
3565     my $el;
3566    
3567     $el = $self->{document}->create_element_ns
3568     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3569    
3570     for my $attr_name (keys %{ $token->{attributes}}) {
3571     $el->set_attribute_ns (undef, [undef, $attr_name],
3572     $token->{attributes} ->{$attr_name}->{value});
3573     }
3574    
3575     $insert->($el);
3576 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3577 wakaba 1.1 }
3578    
3579     $token = $self->_get_next_token;
3580     return;
3581     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
3582     ## has a p element in scope
3583 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3584 wakaba 1.1 if ($_->[1] eq 'p') {
3585     unshift @{$self->{token}}, $token;
3586     $token = {type => 'end tag', tag_name => 'p'};
3587     return;
3588     } elsif ({
3589     table => 1, caption => 1, td => 1, th => 1,
3590     button => 1, marquee => 1, object => 1, html => 1,
3591     }->{$_->[1]}) {
3592     last INSCOPE;
3593     }
3594     } # INSCOPE
3595    
3596     ## Step 1
3597     my $i = -1;
3598 wakaba 1.3 my $node = $self->{open_elements}->[$i];
3599 wakaba 1.1 LI: {
3600     ## Step 2
3601     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
3602 wakaba 1.8 if ($i != -1) {
3603     $self->{parse_error}-> (type => 'end tag missing:'.
3604     $self->{open_elements}->[-1]->[1]);
3605     }
3606 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3607 wakaba 1.1 last LI;
3608     }
3609    
3610     ## Step 3
3611     if (not $formatting_category->{$node->[1]} and
3612     #not $phrasing_category->{$node->[1]} and
3613     ($special_category->{$node->[1]} or
3614     $scoping_category->{$node->[1]}) and
3615     $node->[1] ne 'address' and $node->[1] ne 'div') {
3616     last LI;
3617     }
3618    
3619     ## Step 4
3620     $i--;
3621 wakaba 1.3 $node = $self->{open_elements}->[$i];
3622 wakaba 1.1 redo LI;
3623     } # LI
3624    
3625    
3626     {
3627     my $el;
3628    
3629     $el = $self->{document}->create_element_ns
3630     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3631    
3632     for my $attr_name (keys %{ $token->{attributes}}) {
3633     $el->set_attribute_ns (undef, [undef, $attr_name],
3634     $token->{attributes} ->{$attr_name}->{value});
3635     }
3636    
3637     $insert->($el);
3638 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3639 wakaba 1.1 }
3640    
3641     $token = $self->_get_next_token;
3642     return;
3643     } elsif ($token->{tag_name} eq 'plaintext') {
3644     ## has a p element in scope
3645 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3646 wakaba 1.1 if ($_->[1] eq 'p') {
3647     unshift @{$self->{token}}, $token;
3648     $token = {type => 'end tag', tag_name => 'p'};
3649     return;
3650     } elsif ({
3651     table => 1, caption => 1, td => 1, th => 1,
3652     button => 1, marquee => 1, object => 1, html => 1,
3653     }->{$_->[1]}) {
3654     last INSCOPE;
3655     }
3656     } # INSCOPE
3657    
3658    
3659     {
3660     my $el;
3661    
3662     $el = $self->{document}->create_element_ns
3663     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3664    
3665     for my $attr_name (keys %{ $token->{attributes}}) {
3666     $el->set_attribute_ns (undef, [undef, $attr_name],
3667     $token->{attributes} ->{$attr_name}->{value});
3668     }
3669    
3670     $insert->($el);
3671 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3672 wakaba 1.1 }
3673    
3674    
3675 wakaba 1.41 $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
3676 wakaba 1.1
3677     $token = $self->_get_next_token;
3678     return;
3679     } elsif ({
3680     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3681     }->{$token->{tag_name}}) {
3682     ## has a p element in scope
3683 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3684     my $node = $self->{open_elements}->[$_];
3685 wakaba 1.1 if ($node->[1] eq 'p') {
3686     unshift @{$self->{token}}, $token;
3687     $token = {type => 'end tag', tag_name => 'p'};
3688     return;
3689     } elsif ({
3690     table => 1, caption => 1, td => 1, th => 1,
3691     button => 1, marquee => 1, object => 1, html => 1,
3692     }->{$node->[1]}) {
3693     last INSCOPE;
3694     }
3695     } # INSCOPE
3696    
3697 wakaba 1.23 ## NOTE: See <http://html5.org/tools/web-apps-tracker?from=925&to=926>
3698 wakaba 1.1 ## has an element in scope
3699 wakaba 1.23 #my $i;
3700     #INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3701     # my $node = $self->{open_elements}->[$_];
3702     # if ({
3703     # h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3704     # }->{$node->[1]}) {
3705     # $i = $_;
3706     # last INSCOPE;
3707     # } elsif ({
3708     # table => 1, caption => 1, td => 1, th => 1,
3709     # button => 1, marquee => 1, object => 1, html => 1,
3710     # }->{$node->[1]}) {
3711     # last INSCOPE;
3712     # }
3713     #} # INSCOPE
3714     #
3715     #if (defined $i) {
3716     # !!! parse-error (type => 'in hn:hn');
3717     # splice @{$self->{open_elements}}, $i;
3718     #}
3719 wakaba 1.1
3720    
3721     {
3722     my $el;
3723    
3724     $el = $self->{document}->create_element_ns
3725     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3726    
3727     for my $attr_name (keys %{ $token->{attributes}}) {
3728     $el->set_attribute_ns (undef, [undef, $attr_name],
3729     $token->{attributes} ->{$attr_name}->{value});
3730     }
3731    
3732     $insert->($el);
3733 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3734 wakaba 1.1 }
3735    
3736    
3737     $token = $self->_get_next_token;
3738     return;
3739     } elsif ($token->{tag_name} eq 'a') {
3740     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
3741     my $node = $active_formatting_elements->[$i];
3742     if ($node->[1] eq 'a') {
3743 wakaba 1.3 $self->{parse_error}-> (type => 'in a:a');
3744 wakaba 1.1
3745     unshift @{$self->{token}}, $token;
3746     $token = {type => 'end tag', tag_name => 'a'};
3747     $formatting_end_tag->($token->{tag_name});
3748    
3749     AFE2: for (reverse 0..$#$active_formatting_elements) {
3750     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3751     splice @$active_formatting_elements, $_, 1;
3752     last AFE2;
3753     }
3754     } # AFE2
3755 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3756     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
3757     splice @{$self->{open_elements}}, $_, 1;
3758 wakaba 1.1 last OE;
3759     }
3760     } # OE
3761     last AFE;
3762     } elsif ($node->[0] eq '#marker') {
3763     last AFE;
3764     }
3765     } # AFE
3766    
3767     $reconstruct_active_formatting_elements->($insert_to_current);
3768    
3769    
3770     {
3771     my $el;
3772    
3773     $el = $self->{document}->create_element_ns
3774     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3775    
3776     for my $attr_name (keys %{ $token->{attributes}}) {
3777     $el->set_attribute_ns (undef, [undef, $attr_name],
3778     $token->{attributes} ->{$attr_name}->{value});
3779     }
3780    
3781     $insert->($el);
3782 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3783 wakaba 1.1 }
3784    
3785 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
3786 wakaba 1.1
3787     $token = $self->_get_next_token;
3788     return;
3789     } elsif ({
3790     b => 1, big => 1, em => 1, font => 1, i => 1,
3791 wakaba 1.19 s => 1, small => 1, strile => 1,
3792 wakaba 1.1 strong => 1, tt => 1, u => 1,
3793     }->{$token->{tag_name}}) {
3794     $reconstruct_active_formatting_elements->($insert_to_current);
3795    
3796    
3797     {
3798     my $el;
3799    
3800     $el = $self->{document}->create_element_ns
3801     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3802    
3803     for my $attr_name (keys %{ $token->{attributes}}) {
3804     $el->set_attribute_ns (undef, [undef, $attr_name],
3805     $token->{attributes} ->{$attr_name}->{value});
3806     }
3807    
3808     $insert->($el);
3809 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3810 wakaba 1.1 }
3811    
3812 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
3813 wakaba 1.1
3814     $token = $self->_get_next_token;
3815     return;
3816 wakaba 1.19 } elsif ($token->{tag_name} eq 'nobr') {
3817     $reconstruct_active_formatting_elements->($insert_to_current);
3818    
3819     ## has a |nobr| element in scope
3820     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3821     my $node = $self->{open_elements}->[$_];
3822     if ($node->[1] eq 'nobr') {
3823 wakaba 1.31 $self->{parse_error}-> (type => 'not closed:nobr');
3824 wakaba 1.19 unshift @{$self->{token}}, $token;
3825     $token = {type => 'end tag', tag_name => 'nobr'};
3826     return;
3827     } elsif ({
3828     table => 1, caption => 1, td => 1, th => 1,
3829     button => 1, marquee => 1, object => 1, html => 1,
3830     }->{$node->[1]}) {
3831     last INSCOPE;
3832     }
3833     } # INSCOPE
3834    
3835    
3836     {
3837     my $el;
3838    
3839     $el = $self->{document}->create_element_ns
3840     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3841    
3842     for my $attr_name (keys %{ $token->{attributes}}) {
3843     $el->set_attribute_ns (undef, [undef, $attr_name],
3844     $token->{attributes} ->{$attr_name}->{value});
3845     }
3846    
3847     $insert->($el);
3848     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3849     }
3850    
3851     push @$active_formatting_elements, $self->{open_elements}->[-1];
3852    
3853     $token = $self->_get_next_token;
3854     return;
3855 wakaba 1.1 } elsif ($token->{tag_name} eq 'button') {
3856     ## has a button element in scope
3857 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3858     my $node = $self->{open_elements}->[$_];
3859 wakaba 1.1 if ($node->[1] eq 'button') {
3860 wakaba 1.3 $self->{parse_error}-> (type => 'in button:button');
3861 wakaba 1.1 unshift @{$self->{token}}, $token;
3862     $token = {type => 'end tag', tag_name => 'button'};
3863     return;
3864     } elsif ({
3865     table => 1, caption => 1, td => 1, th => 1,
3866     button => 1, marquee => 1, object => 1, html => 1,
3867     }->{$node->[1]}) {
3868     last INSCOPE;
3869     }
3870     } # INSCOPE
3871    
3872     $reconstruct_active_formatting_elements->($insert_to_current);
3873    
3874    
3875     {
3876     my $el;
3877    
3878     $el = $self->{document}->create_element_ns
3879     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3880    
3881     for my $attr_name (keys %{ $token->{attributes}}) {
3882     $el->set_attribute_ns (undef, [undef, $attr_name],
3883     $token->{attributes} ->{$attr_name}->{value});
3884     }
3885    
3886     $insert->($el);
3887 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3888 wakaba 1.1 }
3889    
3890     push @$active_formatting_elements, ['#marker', ''];
3891    
3892     $token = $self->_get_next_token;
3893     return;
3894     } elsif ($token->{tag_name} eq 'marquee' or
3895     $token->{tag_name} eq 'object') {
3896     $reconstruct_active_formatting_elements->($insert_to_current);
3897    
3898    
3899     {
3900     my $el;
3901    
3902     $el = $self->{document}->create_element_ns
3903     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3904    
3905     for my $attr_name (keys %{ $token->{attributes}}) {
3906     $el->set_attribute_ns (undef, [undef, $attr_name],
3907     $token->{attributes} ->{$attr_name}->{value});
3908     }
3909    
3910     $insert->($el);
3911 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3912 wakaba 1.1 }
3913    
3914     push @$active_formatting_elements, ['#marker', ''];
3915    
3916     $token = $self->_get_next_token;
3917     return;
3918     } elsif ($token->{tag_name} eq 'xmp') {
3919     $reconstruct_active_formatting_elements->($insert_to_current);
3920 wakaba 1.41 $parse_rcdata->(CDATA_CONTENT_MODEL, $insert);
3921 wakaba 1.1 return;
3922     } elsif ($token->{tag_name} eq 'table') {
3923     ## has a p element in scope
3924 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3925 wakaba 1.1 if ($_->[1] eq 'p') {
3926     unshift @{$self->{token}}, $token;
3927     $token = {type => 'end tag', tag_name => 'p'};
3928     return;
3929     } elsif ({
3930     table => 1, caption => 1, td => 1, th => 1,
3931     button => 1, marquee => 1, object => 1, html => 1,
3932     }->{$_->[1]}) {
3933     last INSCOPE;
3934     }
3935     } # INSCOPE
3936    
3937    
3938     {
3939     my $el;
3940    
3941     $el = $self->{document}->create_element_ns
3942     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3943    
3944     for my $attr_name (keys %{ $token->{attributes}}) {
3945     $el->set_attribute_ns (undef, [undef, $attr_name],
3946     $token->{attributes} ->{$attr_name}->{value});
3947     }
3948    
3949     $insert->($el);
3950 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3951 wakaba 1.1 }
3952    
3953    
3954 wakaba 1.3 $self->{insertion_mode} = 'in table';
3955 wakaba 1.1
3956     $token = $self->_get_next_token;
3957     return;
3958     } elsif ({
3959     area => 1, basefont => 1, bgsound => 1, br => 1,
3960     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
3961     image => 1,
3962     }->{$token->{tag_name}}) {
3963     if ($token->{tag_name} eq 'image') {
3964 wakaba 1.3 $self->{parse_error}-> (type => 'image');
3965 wakaba 1.1 $token->{tag_name} = 'img';
3966     }
3967 wakaba 1.31
3968     ## NOTE: There is an "as if <br>" code clone.
3969 wakaba 1.1 $reconstruct_active_formatting_elements->($insert_to_current);
3970    
3971    
3972     {
3973     my $el;
3974    
3975     $el = $self->{document}->create_element_ns
3976     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3977    
3978     for my $attr_name (keys %{ $token->{attributes}}) {
3979     $el->set_attribute_ns (undef, [undef, $attr_name],
3980     $token->{attributes} ->{$attr_name}->{value});
3981     }
3982    
3983     $insert->($el);
3984 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3985 wakaba 1.1 }
3986    
3987 wakaba 1.3 pop @{$self->{open_elements}};
3988 wakaba 1.1
3989     $token = $self->_get_next_token;
3990     return;
3991     } elsif ($token->{tag_name} eq 'hr') {
3992     ## has a p element in scope
3993 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3994 wakaba 1.1 if ($_->[1] eq 'p') {
3995     unshift @{$self->{token}}, $token;
3996     $token = {type => 'end tag', tag_name => 'p'};
3997     return;
3998     } elsif ({
3999     table => 1, caption => 1, td => 1, th => 1,
4000     button => 1, marquee => 1, object => 1, html => 1,
4001     }->{$_->[1]}) {
4002     last INSCOPE;
4003     }
4004     } # INSCOPE
4005    
4006    
4007     {
4008     my $el;
4009    
4010     $el = $self->{document}->create_element_ns
4011     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4012    
4013     for my $attr_name (keys %{ $token->{attributes}}) {
4014     $el->set_attribute_ns (undef, [undef, $attr_name],
4015     $token->{attributes} ->{$attr_name}->{value});
4016     }
4017    
4018     $insert->($el);
4019 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4020 wakaba 1.1 }
4021    
4022 wakaba 1.3 pop @{$self->{open_elements}};
4023 wakaba 1.1
4024     $token = $self->_get_next_token;
4025     return;
4026     } elsif ($token->{tag_name} eq 'input') {
4027     $reconstruct_active_formatting_elements->($insert_to_current);
4028    
4029    
4030     {
4031     my $el;
4032    
4033     $el = $self->{document}->create_element_ns
4034     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4035    
4036     for my $attr_name (keys %{ $token->{attributes}}) {
4037     $el->set_attribute_ns (undef, [undef, $attr_name],
4038     $token->{attributes} ->{$attr_name}->{value});
4039     }
4040    
4041     $insert->($el);
4042 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4043 wakaba 1.1 }
4044    
4045 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
4046     pop @{$self->{open_elements}};
4047 wakaba 1.1
4048     $token = $self->_get_next_token;
4049     return;
4050     } elsif ($token->{tag_name} eq 'isindex') {
4051 wakaba 1.3 $self->{parse_error}-> (type => 'isindex');
4052 wakaba 1.1
4053 wakaba 1.3 if (defined $self->{form_element}) {
4054 wakaba 1.1 ## Ignore the token
4055     $token = $self->_get_next_token;
4056     return;
4057     } else {
4058     my $at = $token->{attributes};
4059 wakaba 1.22 my $form_attrs;
4060     $form_attrs->{action} = $at->{action} if $at->{action};
4061     my $prompt_attr = $at->{prompt};
4062 wakaba 1.1 $at->{name} = {name => 'name', value => 'isindex'};
4063 wakaba 1.22 delete $at->{action};
4064     delete $at->{prompt};
4065 wakaba 1.1 my @tokens = (
4066 wakaba 1.22 {type => 'start tag', tag_name => 'form',
4067     attributes => $form_attrs},
4068 wakaba 1.1 {type => 'start tag', tag_name => 'hr'},
4069     {type => 'start tag', tag_name => 'p'},
4070     {type => 'start tag', tag_name => 'label'},
4071 wakaba 1.22 );
4072     if ($prompt_attr) {
4073     push @tokens, {type => 'character', data => $prompt_attr->{value}};
4074     } else {
4075     push @tokens, {type => 'character',
4076     data => 'This is a searchable index. Insert your search keywords here: '}; # SHOULD
4077     ## TODO: make this configurable
4078     }
4079     push @tokens,
4080 wakaba 1.1 {type => 'start tag', tag_name => 'input', attributes => $at},
4081     #{type => 'character', data => ''}, # SHOULD
4082     {type => 'end tag', tag_name => 'label'},
4083     {type => 'end tag', tag_name => 'p'},
4084     {type => 'start tag', tag_name => 'hr'},
4085 wakaba 1.22 {type => 'end tag', tag_name => 'form'};
4086 wakaba 1.1 $token = shift @tokens;
4087     unshift @{$self->{token}}, (@tokens);
4088     return;
4089     }
4090 wakaba 1.25 } elsif ($token->{tag_name} eq 'textarea') {
4091 wakaba 1.1 my $tag_name = $token->{tag_name};
4092     my $el;
4093    
4094     $el = $self->{document}->create_element_ns
4095     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4096    
4097     for my $attr_name (keys %{ $token->{attributes}}) {
4098     $el->set_attribute_ns (undef, [undef, $attr_name],
4099     $token->{attributes} ->{$attr_name}->{value});
4100     }
4101    
4102    
4103 wakaba 1.25 ## TODO: $self->{form_element} if defined
4104 wakaba 1.41 $self->{content_model} = RCDATA_CONTENT_MODEL;
4105 wakaba 1.13 delete $self->{escape}; # MUST
4106 wakaba 1.1
4107     $insert->($el);
4108    
4109     my $text = '';
4110 wakaba 1.25 $token = $self->_get_next_token;
4111     if ($token->{type} eq 'character') {
4112     $token->{data} =~ s/^\x0A//;
4113     unless (length $token->{data}) {
4114     $token = $self->_get_next_token;
4115 wakaba 1.8 }
4116     }
4117 wakaba 1.1 while ($token->{type} eq 'character') {
4118     $text .= $token->{data};
4119     $token = $self->_get_next_token;
4120     }
4121     if (length $text) {
4122     $el->manakai_append_text ($text);
4123     }
4124    
4125 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL;
4126 wakaba 1.1
4127     if ($token->{type} eq 'end tag' and
4128     $token->{tag_name} eq $tag_name) {
4129     ## Ignore the token
4130     } else {
4131 wakaba 1.25 $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
4132 wakaba 1.1 }
4133     $token = $self->_get_next_token;
4134     return;
4135 wakaba 1.25 } elsif ({
4136     iframe => 1,
4137     noembed => 1,
4138     noframes => 1,
4139     noscript => 0, ## TODO: 1 if scripting is enabled
4140     }->{$token->{tag_name}}) {
4141 wakaba 1.41 $parse_rcdata->(CDATA_CONTENT_MODEL, $insert);
4142 wakaba 1.25 return;
4143 wakaba 1.1 } elsif ($token->{tag_name} eq 'select') {
4144     $reconstruct_active_formatting_elements->($insert_to_current);
4145    
4146    
4147     {
4148     my $el;
4149    
4150     $el = $self->{document}->create_element_ns
4151     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4152    
4153     for my $attr_name (keys %{ $token->{attributes}}) {
4154     $el->set_attribute_ns (undef, [undef, $attr_name],
4155     $token->{attributes} ->{$attr_name}->{value});
4156     }
4157    
4158     $insert->($el);
4159 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4160 wakaba 1.1 }
4161    
4162    
4163 wakaba 1.3 $self->{insertion_mode} = 'in select';
4164 wakaba 1.1 $token = $self->_get_next_token;
4165     return;
4166     } elsif ({
4167     caption => 1, col => 1, colgroup => 1, frame => 1,
4168     frameset => 1, head => 1, option => 1, optgroup => 1,
4169     tbody => 1, td => 1, tfoot => 1, th => 1,
4170     thead => 1, tr => 1,
4171     }->{$token->{tag_name}}) {
4172 wakaba 1.3 $self->{parse_error}-> (type => 'in body:'.$token->{tag_name});
4173 wakaba 1.1 ## Ignore the token
4174     $token = $self->_get_next_token;
4175     return;
4176    
4177     ## ISSUE: An issue on HTML5 new elements in the spec.
4178     } else {
4179     $reconstruct_active_formatting_elements->($insert_to_current);
4180    
4181    
4182     {
4183     my $el;
4184    
4185     $el = $self->{document}->create_element_ns
4186     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4187    
4188     for my $attr_name (keys %{ $token->{attributes}}) {
4189     $el->set_attribute_ns (undef, [undef, $attr_name],
4190     $token->{attributes} ->{$attr_name}->{value});
4191     }
4192    
4193     $insert->($el);
4194 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4195 wakaba 1.1 }
4196    
4197    
4198     $token = $self->_get_next_token;
4199     return;
4200     }
4201     } elsif ($token->{type} eq 'end tag') {
4202     if ($token->{tag_name} eq 'body') {
4203 wakaba 1.20 if (@{$self->{open_elements}} > 1 and
4204     $self->{open_elements}->[1]->[1] eq 'body') {
4205     for (@{$self->{open_elements}}) {
4206     unless ({
4207     dd => 1, dt => 1, li => 1, p => 1, td => 1,
4208     th => 1, tr => 1, body => 1, html => 1,
4209 wakaba 1.31 tbody => 1, tfoot => 1, thead => 1,
4210 wakaba 1.20 }->{$_->[1]}) {
4211     $self->{parse_error}-> (type => 'not closed:'.$_->[1]);
4212     }
4213 wakaba 1.1 }
4214 wakaba 1.20
4215 wakaba 1.3 $self->{insertion_mode} = 'after body';
4216 wakaba 1.1 $token = $self->_get_next_token;
4217     return;
4218     } else {
4219 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4220 wakaba 1.1 ## Ignore the token
4221     $token = $self->_get_next_token;
4222     return;
4223     }
4224     } elsif ($token->{tag_name} eq 'html') {
4225 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
4226 wakaba 1.1 ## ISSUE: There is an issue in the spec.
4227 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
4228     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
4229 wakaba 1.1 }
4230 wakaba 1.3 $self->{insertion_mode} = 'after body';
4231 wakaba 1.1 ## reprocess
4232     return;
4233     } else {
4234 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4235 wakaba 1.1 ## Ignore the token
4236     $token = $self->_get_next_token;
4237     return;
4238     }
4239     } elsif ({
4240     address => 1, blockquote => 1, center => 1, dir => 1,
4241     div => 1, dl => 1, fieldset => 1, listing => 1,
4242     menu => 1, ol => 1, pre => 1, ul => 1,
4243     p => 1,
4244     dd => 1, dt => 1, li => 1,
4245     button => 1, marquee => 1, object => 1,
4246     }->{$token->{tag_name}}) {
4247     ## has an element in scope
4248     my $i;
4249 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4250     my $node = $self->{open_elements}->[$_];
4251 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4252     ## generate implied end tags
4253     if ({
4254     dd => ($token->{tag_name} ne 'dd'),
4255     dt => ($token->{tag_name} ne 'dt'),
4256     li => ($token->{tag_name} ne 'li'),
4257     p => ($token->{tag_name} ne 'p'),
4258     td => 1, th => 1, tr => 1,
4259 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4260 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4261 wakaba 1.1 unshift @{$self->{token}}, $token;
4262     $token = {type => 'end tag',
4263 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4264 wakaba 1.1 return;
4265     }
4266     $i = $_;
4267     last INSCOPE unless $token->{tag_name} eq 'p';
4268     } elsif ({
4269     table => 1, caption => 1, td => 1, th => 1,
4270     button => 1, marquee => 1, object => 1, html => 1,
4271     }->{$node->[1]}) {
4272     last INSCOPE;
4273     }
4274     } # INSCOPE
4275    
4276 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4277 wakaba 1.32 if (defined $i) {
4278     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4279     } else {
4280     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4281     }
4282 wakaba 1.1 }
4283    
4284 wakaba 1.31 if (defined $i) {
4285     splice @{$self->{open_elements}}, $i;
4286     } elsif ($token->{tag_name} eq 'p') {
4287     ## As if <p>, then reprocess the current token
4288     my $el;
4289    
4290     $el = $self->{document}->create_element_ns
4291     (q<http://www.w3.org/1999/xhtml>, [undef, 'p']);
4292    
4293     $insert->($el);
4294     }
4295 wakaba 1.1 $clear_up_to_marker->()
4296     if {
4297     button => 1, marquee => 1, object => 1,
4298     }->{$token->{tag_name}};
4299 wakaba 1.12 $token = $self->_get_next_token;
4300     return;
4301     } elsif ($token->{tag_name} eq 'form') {
4302     ## has an element in scope
4303     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4304     my $node = $self->{open_elements}->[$_];
4305     if ($node->[1] eq $token->{tag_name}) {
4306     ## generate implied end tags
4307     if ({
4308     dd => 1, dt => 1, li => 1, p => 1,
4309     td => 1, th => 1, tr => 1,
4310 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4311 wakaba 1.12 }->{$self->{open_elements}->[-1]->[1]}) {
4312     unshift @{$self->{token}}, $token;
4313     $token = {type => 'end tag',
4314     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4315     return;
4316     }
4317     last INSCOPE;
4318     } elsif ({
4319     table => 1, caption => 1, td => 1, th => 1,
4320     button => 1, marquee => 1, object => 1, html => 1,
4321     }->{$node->[1]}) {
4322     last INSCOPE;
4323     }
4324     } # INSCOPE
4325    
4326     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
4327     pop @{$self->{open_elements}};
4328     } else {
4329     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4330     }
4331    
4332     undef $self->{form_element};
4333 wakaba 1.1 $token = $self->_get_next_token;
4334     return;
4335     } elsif ({
4336     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
4337     }->{$token->{tag_name}}) {
4338     ## has an element in scope
4339     my $i;
4340 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4341     my $node = $self->{open_elements}->[$_];
4342 wakaba 1.1 if ({
4343     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
4344     }->{$node->[1]}) {
4345     ## generate implied end tags
4346     if ({
4347     dd => 1, dt => 1, li => 1, p => 1,
4348     td => 1, th => 1, tr => 1,
4349 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4350 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4351 wakaba 1.1 unshift @{$self->{token}}, $token;
4352     $token = {type => 'end tag',
4353 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4354 wakaba 1.1 return;
4355     }
4356     $i = $_;
4357     last INSCOPE;
4358     } elsif ({
4359     table => 1, caption => 1, td => 1, th => 1,
4360     button => 1, marquee => 1, object => 1, html => 1,
4361     }->{$node->[1]}) {
4362     last INSCOPE;
4363     }
4364     } # INSCOPE
4365    
4366 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4367     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4368 wakaba 1.1 }
4369    
4370 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
4371 wakaba 1.1 $token = $self->_get_next_token;
4372     return;
4373     } elsif ({
4374     a => 1,
4375     b => 1, big => 1, em => 1, font => 1, i => 1,
4376     nobr => 1, s => 1, small => 1, strile => 1,
4377     strong => 1, tt => 1, u => 1,
4378     }->{$token->{tag_name}}) {
4379     $formatting_end_tag->($token->{tag_name});
4380 wakaba 1.31 return;
4381     } elsif ($token->{tag_name} eq 'br') {
4382     $self->{parse_error}-> (type => 'unmatched end tag:br');
4383    
4384     ## As if <br>
4385     $reconstruct_active_formatting_elements->($insert_to_current);
4386    
4387     my $el;
4388    
4389     $el = $self->{document}->create_element_ns
4390     (q<http://www.w3.org/1999/xhtml>, [undef, 'br']);
4391    
4392     $insert->($el);
4393    
4394     ## Ignore the token.
4395     $token = $self->_get_next_token;
4396 wakaba 1.1 return;
4397     } elsif ({
4398     caption => 1, col => 1, colgroup => 1, frame => 1,
4399     frameset => 1, head => 1, option => 1, optgroup => 1,
4400     tbody => 1, td => 1, tfoot => 1, th => 1,
4401     thead => 1, tr => 1,
4402 wakaba 1.31 area => 1, basefont => 1, bgsound => 1,
4403 wakaba 1.1 embed => 1, hr => 1, iframe => 1, image => 1,
4404 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
4405 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
4406     table => 1, textarea => 1, wbr => 1,
4407     noscript => 0, ## TODO: if scripting is enabled
4408     }->{$token->{tag_name}}) {
4409 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4410 wakaba 1.1 ## Ignore the token
4411     $token = $self->_get_next_token;
4412     return;
4413    
4414     ## ISSUE: Issue on HTML5 new elements in spec
4415    
4416     } else {
4417     ## Step 1
4418     my $node_i = -1;
4419 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
4420 wakaba 1.1
4421     ## Step 2
4422     S2: {
4423     if ($node->[1] eq $token->{tag_name}) {
4424     ## Step 1
4425     ## generate implied end tags
4426     if ({
4427     dd => 1, dt => 1, li => 1, p => 1,
4428     td => 1, th => 1, tr => 1,
4429 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4430 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4431 wakaba 1.1 unshift @{$self->{token}}, $token;
4432     $token = {type => 'end tag',
4433 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4434 wakaba 1.1 return;
4435     }
4436    
4437     ## Step 2
4438 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
4439     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4440 wakaba 1.1 }
4441    
4442     ## Step 3
4443 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
4444    
4445     $token = $self->_get_next_token;
4446 wakaba 1.1 last S2;
4447     } else {
4448     ## Step 3
4449     if (not $formatting_category->{$node->[1]} and
4450     #not $phrasing_category->{$node->[1]} and
4451     ($special_category->{$node->[1]} or
4452     $scoping_category->{$node->[1]})) {
4453 wakaba 1.25 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4454 wakaba 1.1 ## Ignore the token
4455     $token = $self->_get_next_token;
4456     last S2;
4457     }
4458     }
4459    
4460     ## Step 4
4461     $node_i--;
4462 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
4463 wakaba 1.1
4464     ## Step 5;
4465     redo S2;
4466     } # S2
4467 wakaba 1.3 return;
4468 wakaba 1.1 }
4469     }
4470     }; # $in_body
4471    
4472     B: {
4473 wakaba 1.36 if ($token->{type} eq 'DOCTYPE') {
4474     $self->{parse_error}-> (type => 'DOCTYPE in the middle');
4475     ## Ignore the token
4476     ## Stay in the phase
4477     $token = $self->_get_next_token;
4478     redo B;
4479     } elsif ($token->{type} eq 'end-of-file') {
4480     if ($token->{insertion_mode} ne 'trailing end') {
4481 wakaba 1.1 ## Generate implied end tags
4482     if ({
4483     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
4484 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4485 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4486 wakaba 1.1 unshift @{$self->{token}}, $token;
4487 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
4488 wakaba 1.1 redo B;
4489     }
4490    
4491 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
4492     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
4493     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4494     } elsif (defined $self->{inner_html_node} and
4495     @{$self->{open_elements}} > 1 and
4496     $self->{open_elements}->[1]->[1] ne 'body') {
4497     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4498 wakaba 1.1 }
4499    
4500 wakaba 1.36 ## ISSUE: There is an issue in the spec.
4501     }
4502 wakaba 1.1
4503 wakaba 1.36 ## Stop parsing
4504     last B;
4505     } elsif ($token->{type} eq 'start tag' and
4506     $token->{tag_name} eq 'html') {
4507     if ($self->{insertion_mode} eq 'trailing end') {
4508     ## Turn into the main phase
4509     $self->{parse_error}-> (type => 'after html:html');
4510     $self->{insertion_mode} = $previous_insertion_mode;
4511     }
4512    
4513     ## ISSUE: "aa<html>" is not a parse error.
4514     ## ISSUE: "<html>" in fragment is not a parse error.
4515     unless ($token->{first_start_tag}) {
4516     $self->{parse_error}-> (type => 'not first start tag');
4517     }
4518     my $top_el = $self->{open_elements}->[0]->[0];
4519     for my $attr_name (keys %{$token->{attributes}}) {
4520     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
4521     $top_el->set_attribute_ns
4522     (undef, [undef, $attr_name],
4523     $token->{attributes}->{$attr_name}->{value});
4524     }
4525     }
4526     $token = $self->_get_next_token;
4527     redo B;
4528     } elsif ($token->{type} eq 'comment') {
4529     my $comment = $self->{document}->create_comment ($token->{data});
4530     if ($self->{insertion_mode} eq 'trailing end') {
4531     $self->{document}->append_child ($comment);
4532     } elsif ($self->{insertion_mode} eq 'after body') {
4533     $self->{open_elements}->[0]->[0]->append_child ($comment);
4534 wakaba 1.1 } else {
4535 wakaba 1.36 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4536     }
4537     $token = $self->_get_next_token;
4538     redo B;
4539     } elsif ($self->{insertion_mode} eq 'before head') {
4540 wakaba 1.1 if ($token->{type} eq 'character') {
4541     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4542 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4543 wakaba 1.1 unless (length $token->{data}) {
4544     $token = $self->_get_next_token;
4545     redo B;
4546     }
4547     }
4548     ## As if <head>
4549    
4550 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4551 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4552    
4553 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4554     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4555     $self->{insertion_mode} = 'in head';
4556 wakaba 1.1 ## reprocess
4557     redo B;
4558     } elsif ($token->{type} eq 'start tag') {
4559     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
4560    
4561 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4562 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4563    
4564     for my $attr_name (keys %{ $attr}) {
4565 wakaba 1.3 $self->{head_element}->set_attribute_ns (undef, [undef, $attr_name],
4566 wakaba 1.1 $attr ->{$attr_name}->{value});
4567     }
4568    
4569 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4570     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4571     $self->{insertion_mode} = 'in head';
4572 wakaba 1.1 if ($token->{tag_name} eq 'head') {
4573     $token = $self->_get_next_token;
4574     #} elsif ({
4575     # base => 1, link => 1, meta => 1,
4576     # script => 1, style => 1, title => 1,
4577     # }->{$token->{tag_name}}) {
4578     # ## reprocess
4579     } else {
4580     ## reprocess
4581     }
4582     redo B;
4583     } elsif ($token->{type} eq 'end tag') {
4584 wakaba 1.31 if ({
4585     head => 1, body => 1, html => 1,
4586     p => 1, br => 1,
4587     }->{$token->{tag_name}}) {
4588 wakaba 1.1 ## As if <head>
4589    
4590 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4591 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4592    
4593 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4594     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4595     $self->{insertion_mode} = 'in head';
4596 wakaba 1.1 ## reprocess
4597     redo B;
4598     } else {
4599 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4600 wakaba 1.21 ## Ignore the token ## ISSUE: An issue in the spec.
4601 wakaba 1.1 $token = $self->_get_next_token;
4602     redo B;
4603     }
4604     } else {
4605     die "$0: $token->{type}: Unknown type";
4606     }
4607 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'in head' or
4608     $self->{insertion_mode} eq 'in head noscript' or
4609     $self->{insertion_mode} eq 'after head') {
4610 wakaba 1.1 if ($token->{type} eq 'character') {
4611     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4612 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4613 wakaba 1.1 unless (length $token->{data}) {
4614     $token = $self->_get_next_token;
4615     redo B;
4616     }
4617     }
4618    
4619     #
4620     } elsif ($token->{type} eq 'start tag') {
4621 wakaba 1.25 if ({base => ($self->{insertion_mode} eq 'in head' or
4622     $self->{insertion_mode} eq 'after head'),
4623 wakaba 1.34 link => 1}->{$token->{tag_name}}) {
4624 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4625     if ($self->{insertion_mode} eq 'after head') {
4626     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4627     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4628     }
4629 wakaba 1.1
4630 wakaba 1.25 {
4631     my $el;
4632    
4633     $el = $self->{document}->create_element_ns
4634     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4635 wakaba 1.1
4636 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
4637     $el->set_attribute_ns (undef, [undef, $attr_name],
4638     $token->{attributes} ->{$attr_name}->{value});
4639 wakaba 1.1 }
4640    
4641 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($el);
4642     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4643     }
4644    
4645     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4646     pop @{$self->{open_elements}}
4647     if $self->{insertion_mode} eq 'after head';
4648 wakaba 1.1 $token = $self->_get_next_token;
4649 wakaba 1.25 redo B;
4650 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
4651     ## NOTE: There is a "as if in head" code clone.
4652     if ($self->{insertion_mode} eq 'after head') {
4653     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4654     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4655     }
4656    
4657     {
4658     my $el;
4659    
4660     $el = $self->{document}->create_element_ns
4661     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4662    
4663     for my $attr_name (keys %{ $token->{attributes}}) {
4664     $el->set_attribute_ns (undef, [undef, $attr_name],
4665     $token->{attributes} ->{$attr_name}->{value});
4666     }
4667    
4668     $self->{open_elements}->[-1]->[0]->append_child ($el);
4669     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4670     }
4671    
4672     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4673    
4674     unless ($self->{confident}) {
4675     my $charset;
4676     if ($token->{attributes}->{charset}) { ## TODO: And if supported
4677     $charset = $token->{attributes}->{charset}->{value};
4678     }
4679     if ($token->{attributes}->{'http-equiv'}) {
4680 wakaba 1.35 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
4681 wakaba 1.34 if ($token->{attributes}->{'http-equiv'}->{value}
4682     =~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*=
4683     [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4684     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
4685     $charset = defined $1 ? $1 : defined $2 ? $2 : $3;
4686     } ## TODO: And if supported
4687     }
4688     ## TODO: Change the encoding
4689     }
4690    
4691     ## TODO: Extracting |charset| from |meta|.
4692     pop @{$self->{open_elements}}
4693     if $self->{insertion_mode} eq 'after head';
4694     $token = $self->_get_next_token;
4695     redo B;
4696 wakaba 1.25 } elsif ($token->{tag_name} eq 'title' and
4697     $self->{insertion_mode} eq 'in head') {
4698     ## NOTE: There is a "as if in head" code clone.
4699     if ($self->{insertion_mode} eq 'after head') {
4700     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4701     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4702     }
4703 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
4704     : $self->{open_elements}->[-1]->[0];
4705 wakaba 1.41 $parse_rcdata->(RCDATA_CONTENT_MODEL,
4706     sub { $parent->append_child ($_[0]) });
4707 wakaba 1.25 pop @{$self->{open_elements}}
4708     if $self->{insertion_mode} eq 'after head';
4709 wakaba 1.1 redo B;
4710     } elsif ($token->{tag_name} eq 'style') {
4711 wakaba 1.25 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4712     ## insertion mode 'in head')
4713     ## NOTE: There is a "as if in head" code clone.
4714     if ($self->{insertion_mode} eq 'after head') {
4715     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4716     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4717     }
4718 wakaba 1.41 $parse_rcdata->(CDATA_CONTENT_MODEL, $insert_to_current);
4719 wakaba 1.25 pop @{$self->{open_elements}}
4720     if $self->{insertion_mode} eq 'after head';
4721     redo B;
4722     } elsif ($token->{tag_name} eq 'noscript') {
4723     if ($self->{insertion_mode} eq 'in head') {
4724     ## NOTE: and scripting is disalbed
4725    
4726     {
4727     my $el;
4728    
4729 wakaba 1.1 $el = $self->{document}->create_element_ns
4730     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4731    
4732 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
4733 wakaba 1.1 $el->set_attribute_ns (undef, [undef, $attr_name],
4734 wakaba 1.25 $token->{attributes} ->{$attr_name}->{value});
4735 wakaba 1.1 }
4736    
4737 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($el);
4738     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4739     }
4740    
4741     $self->{insertion_mode} = 'in head noscript';
4742     $token = $self->_get_next_token;
4743     redo B;
4744     } elsif ($self->{insertion_mode} eq 'in head noscript') {
4745 wakaba 1.30 $self->{parse_error}-> (type => 'in noscript:noscript');
4746 wakaba 1.25 ## Ignore the token
4747 wakaba 1.42 $token = $self->_get_next_token;
4748 wakaba 1.25 redo B;
4749 wakaba 1.24 } else {
4750 wakaba 1.25 #
4751 wakaba 1.24 }
4752 wakaba 1.25 } elsif ($token->{tag_name} eq 'head' and
4753     $self->{insertion_mode} ne 'after head') {
4754     $self->{parse_error}-> (type => 'in head:head'); # or in head noscript
4755 wakaba 1.1 ## Ignore the token
4756     $token = $self->_get_next_token;
4757     redo B;
4758 wakaba 1.25 } elsif ($self->{insertion_mode} ne 'in head noscript' and
4759     $token->{tag_name} eq 'script') {
4760     if ($self->{insertion_mode} eq 'after head') {
4761     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4762     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4763     }
4764     ## NOTE: There is a "as if in head" code clone.
4765     $script_start_tag->($insert_to_current);
4766     pop @{$self->{open_elements}}
4767     if $self->{insertion_mode} eq 'after head';
4768 wakaba 1.1 redo B;
4769 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'after head' and
4770     $token->{tag_name} eq 'body') {
4771 wakaba 1.1
4772     {
4773     my $el;
4774    
4775     $el = $self->{document}->create_element_ns
4776     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
4777    
4778     for my $attr_name (keys %{ $token->{attributes}}) {
4779     $el->set_attribute_ns (undef, [undef, $attr_name],
4780     $token->{attributes} ->{$attr_name}->{value});
4781     }
4782    
4783 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4784     push @{$self->{open_elements}}, [$el, 'body'];
4785 wakaba 1.1 }
4786    
4787 wakaba 1.3 $self->{insertion_mode} = 'in body';
4788 wakaba 1.1 $token = $self->_get_next_token;
4789     redo B;
4790 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'after head' and
4791     $token->{tag_name} eq 'frameset') {
4792 wakaba 1.1
4793     {
4794     my $el;
4795    
4796     $el = $self->{document}->create_element_ns
4797     (q<http://www.w3.org/1999/xhtml>, [undef, 'frameset']);
4798    
4799     for my $attr_name (keys %{ $token->{attributes}}) {
4800     $el->set_attribute_ns (undef, [undef, $attr_name],
4801     $token->{attributes} ->{$attr_name}->{value});
4802     }
4803    
4804 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4805     push @{$self->{open_elements}}, [$el, 'frameset'];
4806 wakaba 1.1 }
4807    
4808 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
4809 wakaba 1.1 $token = $self->_get_next_token;
4810     redo B;
4811 wakaba 1.25 } else {
4812     #
4813     }
4814     } elsif ($token->{type} eq 'end tag') {
4815     if ($self->{insertion_mode} eq 'in head' and
4816     $token->{tag_name} eq 'head') {
4817     pop @{$self->{open_elements}};
4818     $self->{insertion_mode} = 'after head';
4819     $token = $self->_get_next_token;
4820     redo B;
4821     } elsif ($self->{insertion_mode} eq 'in head noscript' and
4822     $token->{tag_name} eq 'noscript') {
4823     pop @{$self->{open_elements}};
4824 wakaba 1.3 $self->{insertion_mode} = 'in head';
4825 wakaba 1.25 $token = $self->_get_next_token;
4826     redo B;
4827     } elsif ($self->{insertion_mode} eq 'in head' and
4828 wakaba 1.31 {
4829     body => 1, html => 1,
4830     p => 1, br => 1,
4831     }->{$token->{tag_name}}) {
4832     #
4833     } elsif ($self->{insertion_mode} eq 'in head noscript' and
4834     {
4835     p => 1, br => 1,
4836     }->{$token->{tag_name}}) {
4837 wakaba 1.25 #
4838     } elsif ($self->{insertion_mode} ne 'after head') {
4839     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4840     ## Ignore the token
4841     $token = $self->_get_next_token;
4842 wakaba 1.1 redo B;
4843     } else {
4844 wakaba 1.25 #
4845     }
4846 wakaba 1.1 } else {
4847     #
4848     }
4849 wakaba 1.25
4850     ## As if </head> or </noscript> or <body>
4851     if ($self->{insertion_mode} eq 'in head') {
4852     pop @{$self->{open_elements}};
4853     $self->{insertion_mode} = 'after head';
4854     } elsif ($self->{insertion_mode} eq 'in head noscript') {
4855     pop @{$self->{open_elements}};
4856     $self->{parse_error}-> (type => 'in noscript:'.(defined $token->{tag_name} ? ($token->{type} eq 'end tag' ? '/' : '') . $token->{tag_name} : '#' . $token->{type}));
4857     $self->{insertion_mode} = 'in head';
4858     } else { # 'after head'
4859    
4860 wakaba 1.1 {
4861     my $el;
4862    
4863     $el = $self->{document}->create_element_ns
4864     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
4865    
4866 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4867     push @{$self->{open_elements}}, [$el, 'body'];
4868 wakaba 1.1 }
4869    
4870 wakaba 1.25 $self->{insertion_mode} = 'in body';
4871     }
4872 wakaba 1.1 ## reprocess
4873     redo B;
4874 wakaba 1.25
4875     ## ISSUE: An issue in the spec.
4876 wakaba 1.43 } elsif ($self->{insertion_mode} eq 'in body' or
4877     $self->{insertion_mode} eq 'in caption') {
4878 wakaba 1.1 if ($token->{type} eq 'character') {
4879     ## NOTE: There is a code clone of "character in body".
4880     $reconstruct_active_formatting_elements->($insert_to_current);
4881    
4882 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4883 wakaba 1.1
4884     $token = $self->_get_next_token;
4885     redo B;
4886 wakaba 1.43 } elsif ($token->{type} eq 'start tag') {
4887     if ({
4888     caption => 1, col => 1, colgroup => 1, tbody => 1,
4889     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4890     }->{$token->{tag_name}} and
4891     $self->{insertion_mode} eq 'in caption') {
4892     $self->{parse_error}-> (type => 'not closed:caption');
4893    
4894     ## As if </caption>
4895     ## have a table element in table scope
4896     my $i;
4897     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4898     my $node = $self->{open_elements}->[$_];
4899     if ($node->[1] eq 'caption') {
4900     $i = $_;
4901     last INSCOPE;
4902     } elsif ({
4903     table => 1, html => 1,
4904     }->{$node->[1]}) {
4905     last INSCOPE;
4906     }
4907     } # INSCOPE
4908     unless (defined $i) {
4909     $self->{parse_error}-> (type => 'unmatched end tag:caption');
4910     ## Ignore the token
4911     $token = $self->_get_next_token;
4912     redo B;
4913     }
4914    
4915     ## generate implied end tags
4916     if ({
4917     dd => 1, dt => 1, li => 1, p => 1,
4918     td => 1, th => 1, tr => 1,
4919     tbody => 1, tfoot=> 1, thead => 1,
4920     }->{$self->{open_elements}->[-1]->[1]}) {
4921     unshift @{$self->{token}}, $token; # <?>
4922     $token = {type => 'end tag', tag_name => 'caption'};
4923     unshift @{$self->{token}}, $token;
4924     $token = {type => 'end tag',
4925     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4926     redo B;
4927     }
4928    
4929     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4930     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4931     }
4932    
4933     splice @{$self->{open_elements}}, $i;
4934    
4935     $clear_up_to_marker->();
4936    
4937     $self->{insertion_mode} = 'in table';
4938    
4939     ## reprocess
4940     redo B;
4941     } else {
4942     #
4943     }
4944     } elsif ($token->{type} eq 'end tag') {
4945     if ($token->{tag_name} eq 'caption' and
4946     $self->{insertion_mode} eq 'in caption') {
4947     ## have a table element in table scope
4948     my $i;
4949     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4950     my $node = $self->{open_elements}->[$_];
4951     if ($node->[1] eq $token->{tag_name}) {
4952     $i = $_;
4953     last INSCOPE;
4954     } elsif ({
4955     table => 1, html => 1,
4956     }->{$node->[1]}) {
4957     last INSCOPE;
4958     }
4959     } # INSCOPE
4960     unless (defined $i) {
4961     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4962     ## Ignore the token
4963     $token = $self->_get_next_token;
4964     redo B;
4965     }
4966    
4967     ## generate implied end tags
4968     if ({
4969     dd => 1, dt => 1, li => 1, p => 1,
4970     td => 1, th => 1, tr => 1,
4971     tbody => 1, tfoot=> 1, thead => 1,
4972     }->{$self->{open_elements}->[-1]->[1]}) {
4973     unshift @{$self->{token}}, $token;
4974     $token = {type => 'end tag',
4975     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4976     redo B;
4977     }
4978    
4979     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4980     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4981     }
4982    
4983     splice @{$self->{open_elements}}, $i;
4984    
4985     $clear_up_to_marker->();
4986    
4987     $self->{insertion_mode} = 'in table';
4988    
4989     $token = $self->_get_next_token;
4990     redo B;
4991     } elsif ($token->{tag_name} eq 'table' and
4992     $self->{insertion_mode} eq 'in caption') {
4993     $self->{parse_error}-> (type => 'not closed:caption');
4994    
4995     ## As if </caption>
4996     ## have a table element in table scope
4997     my $i;
4998     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4999     my $node = $self->{open_elements}->[$_];
5000     if ($node->[1] eq 'caption') {
5001     $i = $_;
5002     last INSCOPE;
5003     } elsif ({
5004     table => 1, html => 1,
5005     }->{$node->[1]}) {
5006     last INSCOPE;
5007     }
5008     } # INSCOPE
5009     unless (defined $i) {
5010     $self->{parse_error}-> (type => 'unmatched end tag:caption');
5011     ## Ignore the token
5012     $token = $self->_get_next_token;
5013     redo B;
5014     }
5015    
5016     ## generate implied end tags
5017     if ({
5018     dd => 1, dt => 1, li => 1, p => 1,
5019     td => 1, th => 1, tr => 1,
5020     tbody => 1, tfoot=> 1, thead => 1,
5021     }->{$self->{open_elements}->[-1]->[1]}) {
5022     unshift @{$self->{token}}, $token; # </table>
5023     $token = {type => 'end tag', tag_name => 'caption'};
5024     unshift @{$self->{token}}, $token;
5025     $token = {type => 'end tag',
5026     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5027     redo B;
5028     }
5029    
5030     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
5031     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5032     }
5033    
5034     splice @{$self->{open_elements}}, $i;
5035    
5036     $clear_up_to_marker->();
5037    
5038     $self->{insertion_mode} = 'in table';
5039    
5040     ## reprocess
5041     redo B;
5042     } elsif ({
5043     body => 1, col => 1, colgroup => 1,
5044     html => 1, tbody => 1, td => 1, tfoot => 1,
5045     th => 1, thead => 1, tr => 1,
5046     }->{$token->{tag_name}} and
5047     $self->{insertion_mode} eq 'in caption') {
5048     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5049     ## Ignore the token
5050     $token = $self->_get_next_token;
5051     redo B;
5052     } else {
5053     #
5054     }
5055 wakaba 1.1 } else {
5056 wakaba 1.43 #
5057 wakaba 1.1 }
5058 wakaba 1.43
5059     $in_body->($insert_to_current);
5060     redo B;
5061 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
5062 wakaba 1.1 if ($token->{type} eq 'character') {
5063     ## NOTE: There are "character in table" code clones.
5064     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5065 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5066 wakaba 1.1
5067     unless (length $token->{data}) {
5068     $token = $self->_get_next_token;
5069     redo B;
5070     }
5071     }
5072    
5073 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
5074    
5075 wakaba 1.1 ## As if in body, but insert into foster parent element
5076     ## ISSUE: Spec says that "whenever a node would be inserted
5077     ## into the current node" while characters might not be
5078     ## result in a new Text node.
5079     $reconstruct_active_formatting_elements->($insert_to_foster);
5080    
5081     if ({
5082     table => 1, tbody => 1, tfoot => 1,
5083     thead => 1, tr => 1,
5084 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5085 wakaba 1.1 # MUST
5086     my $foster_parent_element;
5087     my $next_sibling;
5088     my $prev_sibling;
5089 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
5090     if ($self->{open_elements}->[$_]->[1] eq 'table') {
5091     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5092 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
5093     $foster_parent_element = $parent;
5094 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
5095 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
5096     } else {
5097 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5098 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
5099     }
5100     last OE;
5101     }
5102     } # OE
5103 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5104 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
5105     unless defined $foster_parent_element;
5106     if (defined $prev_sibling and
5107     $prev_sibling->node_type == 3) {
5108     $prev_sibling->manakai_append_text ($token->{data});
5109     } else {
5110     $foster_parent_element->insert_before
5111     ($self->{document}->create_text_node ($token->{data}),
5112     $next_sibling);
5113     }
5114     } else {
5115 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5116 wakaba 1.1 }
5117    
5118     $token = $self->_get_next_token;
5119     redo B;
5120     } elsif ($token->{type} eq 'start tag') {
5121     if ({
5122     caption => 1,
5123     colgroup => 1,
5124     tbody => 1, tfoot => 1, thead => 1,
5125     }->{$token->{tag_name}}) {
5126     ## Clear back to table context
5127 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
5128     $self->{open_elements}->[-1]->[1] ne 'html') {
5129     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5130     pop @{$self->{open_elements}};
5131 wakaba 1.1 }
5132    
5133     push @$active_formatting_elements, ['#marker', '']
5134     if $token->{tag_name} eq 'caption';
5135    
5136    
5137     {
5138     my $el;
5139    
5140     $el = $self->{document}->create_element_ns
5141     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5142    
5143     for my $attr_name (keys %{ $token->{attributes}}) {
5144     $el->set_attribute_ns (undef, [undef, $attr_name],
5145     $token->{attributes} ->{$attr_name}->{value});
5146     }
5147    
5148 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5149     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5150 wakaba 1.1 }
5151    
5152 wakaba 1.3 $self->{insertion_mode} = {
5153 wakaba 1.1 caption => 'in caption',
5154     colgroup => 'in column group',
5155     tbody => 'in table body',
5156     tfoot => 'in table body',
5157     thead => 'in table body',
5158     }->{$token->{tag_name}};
5159     $token = $self->_get_next_token;
5160     redo B;
5161     } elsif ({
5162     col => 1,
5163     td => 1, th => 1, tr => 1,
5164     }->{$token->{tag_name}}) {
5165     ## Clear back to table context
5166 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
5167     $self->{open_elements}->[-1]->[1] ne 'html') {
5168     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5169     pop @{$self->{open_elements}};
5170 wakaba 1.1 }
5171    
5172    
5173     {
5174     my $el;
5175    
5176     $el = $self->{document}->create_element_ns
5177     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name} eq 'col' ? 'colgroup' : 'tbody']);
5178    
5179 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5180     push @{$self->{open_elements}}, [$el, $token->{tag_name} eq 'col' ? 'colgroup' : 'tbody'];
5181 wakaba 1.1 }
5182    
5183 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
5184 wakaba 1.1 ? 'in column group' : 'in table body';
5185     ## reprocess
5186     redo B;
5187     } elsif ($token->{tag_name} eq 'table') {
5188     ## NOTE: There are code clones for this "table in table"
5189 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5190 wakaba 1.1
5191     ## As if </table>
5192     ## have a table element in table scope
5193     my $i;
5194 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5195     my $node = $self->{open_elements}->[$_];
5196 wakaba 1.1 if ($node->[1] eq 'table') {
5197     $i = $_;
5198     last INSCOPE;
5199     } elsif ({
5200     table => 1, html => 1,
5201     }->{$node->[1]}) {
5202     last INSCOPE;
5203     }
5204     } # INSCOPE
5205     unless (defined $i) {
5206 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
5207 wakaba 1.1 ## Ignore tokens </table><table>
5208     $token = $self->_get_next_token;
5209     redo B;
5210     }
5211    
5212     ## generate implied end tags
5213     if ({
5214     dd => 1, dt => 1, li => 1, p => 1,
5215     td => 1, th => 1, tr => 1,
5216 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
5217 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5218 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
5219     $token = {type => 'end tag', tag_name => 'table'};
5220     unshift @{$self->{token}}, $token;
5221     $token = {type => 'end tag',
5222 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5223 wakaba 1.1 redo B;
5224     }
5225    
5226 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5227     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5228 wakaba 1.1 }
5229    
5230 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5231 wakaba 1.1
5232 wakaba 1.3 $self->_reset_insertion_mode;
5233 wakaba 1.1
5234     ## reprocess
5235     redo B;
5236     } else {
5237     #
5238     }
5239     } elsif ($token->{type} eq 'end tag') {
5240     if ($token->{tag_name} eq 'table') {
5241     ## have a table element in table scope
5242     my $i;
5243 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5244     my $node = $self->{open_elements}->[$_];
5245 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5246     $i = $_;
5247     last INSCOPE;
5248     } elsif ({
5249     table => 1, html => 1,
5250     }->{$node->[1]}) {
5251     last INSCOPE;
5252     }
5253     } # INSCOPE
5254     unless (defined $i) {
5255 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5256 wakaba 1.1 ## Ignore the token
5257     $token = $self->_get_next_token;
5258     redo B;
5259     }
5260    
5261     ## generate implied end tags
5262     if ({
5263     dd => 1, dt => 1, li => 1, p => 1,
5264     td => 1, th => 1, tr => 1,
5265 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
5266 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5267 wakaba 1.1 unshift @{$self->{token}}, $token;
5268     $token = {type => 'end tag',
5269 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5270 wakaba 1.1 redo B;
5271     }
5272    
5273 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5274     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5275 wakaba 1.1 }
5276    
5277 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5278 wakaba 1.1
5279 wakaba 1.3 $self->_reset_insertion_mode;
5280 wakaba 1.1
5281     $token = $self->_get_next_token;
5282     redo B;
5283     } elsif ({
5284     body => 1, caption => 1, col => 1, colgroup => 1,
5285     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
5286     thead => 1, tr => 1,
5287     }->{$token->{tag_name}}) {
5288 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5289 wakaba 1.1 ## Ignore the token
5290     $token = $self->_get_next_token;
5291     redo B;
5292     } else {
5293     #
5294     }
5295     } else {
5296     #
5297     }
5298    
5299 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
5300 wakaba 1.1 $in_body->($insert_to_foster);
5301     redo B;
5302 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
5303 wakaba 1.1 if ($token->{type} eq 'character') {
5304     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5305 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5306 wakaba 1.1 unless (length $token->{data}) {
5307     $token = $self->_get_next_token;
5308     redo B;
5309     }
5310     }
5311    
5312     #
5313     } elsif ($token->{type} eq 'start tag') {
5314     if ($token->{tag_name} eq 'col') {
5315    
5316     {
5317     my $el;
5318    
5319     $el = $self->{document}->create_element_ns
5320     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5321    
5322     for my $attr_name (keys %{ $token->{attributes}}) {
5323     $el->set_attribute_ns (undef, [undef, $attr_name],
5324     $token->{attributes} ->{$attr_name}->{value});
5325     }
5326    
5327 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5328     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5329 wakaba 1.1 }
5330    
5331 wakaba 1.3 pop @{$self->{open_elements}};
5332 wakaba 1.1 $token = $self->_get_next_token;
5333     redo B;
5334     } else {
5335     #
5336     }
5337     } elsif ($token->{type} eq 'end tag') {
5338     if ($token->{tag_name} eq 'colgroup') {
5339 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
5340     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
5341 wakaba 1.1 ## Ignore the token
5342     $token = $self->_get_next_token;
5343     redo B;
5344     } else {
5345 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
5346     $self->{insertion_mode} = 'in table';
5347 wakaba 1.1 $token = $self->_get_next_token;
5348     redo B;
5349     }
5350     } elsif ($token->{tag_name} eq 'col') {
5351 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:col');
5352 wakaba 1.1 ## Ignore the token
5353     $token = $self->_get_next_token;
5354     redo B;
5355     } else {
5356     #
5357     }
5358     } else {
5359     #
5360     }
5361    
5362     ## As if </colgroup>
5363 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
5364     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
5365 wakaba 1.1 ## Ignore the token
5366     $token = $self->_get_next_token;
5367     redo B;
5368     } else {
5369 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
5370     $self->{insertion_mode} = 'in table';
5371 wakaba 1.1 ## reprocess
5372     redo B;
5373     }
5374 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
5375 wakaba 1.1 if ($token->{type} eq 'character') {
5376     ## NOTE: This is a "character in table" code clone.
5377     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5378 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5379 wakaba 1.1
5380     unless (length $token->{data}) {
5381     $token = $self->_get_next_token;
5382     redo B;
5383     }
5384     }
5385    
5386 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
5387    
5388 wakaba 1.1 ## As if in body, but insert into foster parent element
5389     ## ISSUE: Spec says that "whenever a node would be inserted
5390     ## into the current node" while characters might not be
5391     ## result in a new Text node.
5392     $reconstruct_active_formatting_elements->($insert_to_foster);
5393    
5394     if ({
5395     table => 1, tbody => 1, tfoot => 1,
5396     thead => 1, tr => 1,
5397 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5398 wakaba 1.1 # MUST
5399     my $foster_parent_element;
5400     my $next_sibling;
5401     my $prev_sibling;
5402 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
5403     if ($self->{open_elements}->[$_]->[1] eq 'table') {
5404     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5405 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
5406     $foster_parent_element = $parent;
5407 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
5408 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
5409     } else {
5410 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5411 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
5412     }
5413     last OE;
5414     }
5415     } # OE
5416 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5417 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
5418     unless defined $foster_parent_element;
5419     if (defined $prev_sibling and
5420     $prev_sibling->node_type == 3) {
5421     $prev_sibling->manakai_append_text ($token->{data});
5422     } else {
5423     $foster_parent_element->insert_before
5424     ($self->{document}->create_text_node ($token->{data}),
5425     $next_sibling);
5426     }
5427     } else {
5428 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5429 wakaba 1.1 }
5430    
5431     $token = $self->_get_next_token;
5432     redo B;
5433     } elsif ($token->{type} eq 'start tag') {
5434     if ({
5435     tr => 1,
5436     th => 1, td => 1,
5437     }->{$token->{tag_name}}) {
5438 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
5439     $self->{parse_error}-> (type => 'missing start tag:tr');
5440     }
5441    
5442 wakaba 1.1 ## Clear back to table body context
5443     while (not {
5444     tbody => 1, tfoot => 1, thead => 1, html => 1,
5445 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5446     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5447     pop @{$self->{open_elements}};
5448 wakaba 1.1 }
5449    
5450 wakaba 1.3 $self->{insertion_mode} = 'in row';
5451 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
5452    
5453     {
5454     my $el;
5455    
5456     $el = $self->{document}->create_element_ns
5457     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5458    
5459     for my $attr_name (keys %{ $token->{attributes}}) {
5460     $el->set_attribute_ns (undef, [undef, $attr_name],
5461     $token->{attributes} ->{$attr_name}->{value});
5462     }
5463    
5464 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5465     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5466 wakaba 1.1 }
5467    
5468     $token = $self->_get_next_token;
5469     } else {
5470    
5471     {
5472     my $el;
5473    
5474     $el = $self->{document}->create_element_ns
5475     (q<http://www.w3.org/1999/xhtml>, [undef, 'tr']);
5476    
5477 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5478     push @{$self->{open_elements}}, [$el, 'tr'];
5479 wakaba 1.1 }
5480    
5481     ## reprocess
5482     }
5483     redo B;
5484     } elsif ({
5485     caption => 1, col => 1, colgroup => 1,
5486     tbody => 1, tfoot => 1, thead => 1,
5487     }->{$token->{tag_name}}) {
5488     ## have an element in table scope
5489     my $i;
5490 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5491     my $node = $self->{open_elements}->[$_];
5492 wakaba 1.1 if ({
5493     tbody => 1, thead => 1, tfoot => 1,
5494     }->{$node->[1]}) {
5495     $i = $_;
5496     last INSCOPE;
5497     } elsif ({
5498     table => 1, html => 1,
5499     }->{$node->[1]}) {
5500     last INSCOPE;
5501     }
5502     } # INSCOPE
5503     unless (defined $i) {
5504 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5505 wakaba 1.1 ## Ignore the token
5506     $token = $self->_get_next_token;
5507     redo B;
5508     }
5509    
5510     ## Clear back to table body context
5511     while (not {
5512     tbody => 1, tfoot => 1, thead => 1, html => 1,
5513 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5514     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5515     pop @{$self->{open_elements}};
5516 wakaba 1.1 }
5517    
5518     ## As if <{current node}>
5519     ## have an element in table scope
5520     ## true by definition
5521    
5522     ## Clear back to table body context
5523     ## nop by definition
5524    
5525 wakaba 1.3 pop @{$self->{open_elements}};
5526     $self->{insertion_mode} = 'in table';
5527 wakaba 1.1 ## reprocess
5528     redo B;
5529     } elsif ($token->{tag_name} eq 'table') {
5530     ## NOTE: This is a code clone of "table in table"
5531 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:table');
5532 wakaba 1.1
5533     ## As if </table>
5534     ## have a table element in table scope
5535     my $i;
5536 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5537     my $node = $self->{open_elements}->[$_];
5538 wakaba 1.1 if ($node->[1] eq 'table') {
5539     $i = $_;
5540     last INSCOPE;
5541     } elsif ({
5542     table => 1, html => 1,
5543     }->{$node->[1]}) {
5544     last INSCOPE;
5545     }
5546     } # INSCOPE
5547     unless (defined $i) {
5548 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
5549 wakaba 1.1 ## Ignore tokens </table><table>
5550     $token = $self->_get_next_token;
5551     redo B;
5552     }
5553    
5554     ## generate implied end tags
5555     if ({
5556     dd => 1, dt => 1, li => 1, p => 1,
5557     td => 1, th => 1, tr => 1,
5558 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
5559 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5560 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
5561     $token = {type => 'end tag', tag_name => 'table'};
5562     unshift @{$self->{token}}, $token;
5563     $token = {type => 'end tag',
5564 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5565 wakaba 1.1 redo B;
5566     }
5567    
5568 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5569     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5570 wakaba 1.1 }
5571    
5572 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5573 wakaba 1.1
5574 wakaba 1.3 $self->_reset_insertion_mode;
5575 wakaba 1.1
5576     ## reprocess
5577     redo B;
5578     } else {
5579     #
5580     }
5581     } elsif ($token->{type} eq 'end tag') {
5582     if ({
5583     tbody => 1, tfoot => 1, thead => 1,
5584     }->{$token->{tag_name}}) {
5585     ## have an element in table scope
5586     my $i;
5587 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5588     my $node = $self->{open_elements}->[$_];
5589 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5590     $i = $_;
5591     last INSCOPE;
5592     } elsif ({
5593     table => 1, html => 1,
5594     }->{$node->[1]}) {
5595     last INSCOPE;
5596     }
5597     } # INSCOPE
5598     unless (defined $i) {
5599 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5600 wakaba 1.1 ## Ignore the token
5601     $token = $self->_get_next_token;
5602     redo B;
5603     }
5604    
5605     ## Clear back to table body context
5606     while (not {
5607     tbody => 1, tfoot => 1, thead => 1, html => 1,
5608 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5609     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5610     pop @{$self->{open_elements}};
5611 wakaba 1.1 }
5612    
5613 wakaba 1.3 pop @{$self->{open_elements}};
5614     $self->{insertion_mode} = 'in table';
5615 wakaba 1.1 $token = $self->_get_next_token;
5616     redo B;
5617     } elsif ($token->{tag_name} eq 'table') {
5618     ## have an element in table scope
5619     my $i;
5620 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5621     my $node = $self->{open_elements}->[$_];
5622 wakaba 1.1 if ({
5623     tbody => 1, thead => 1, tfoot => 1,
5624     }->{$node->[1]}) {
5625     $i = $_;
5626     last INSCOPE;
5627     } elsif ({
5628     table => 1, html => 1,
5629     }->{$node->[1]}) {
5630     last INSCOPE;
5631     }
5632     } # INSCOPE
5633     unless (defined $i) {
5634 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5635 wakaba 1.1 ## Ignore the token
5636     $token = $self->_get_next_token;
5637     redo B;
5638     }
5639    
5640     ## Clear back to table body context
5641     while (not {
5642     tbody => 1, tfoot => 1, thead => 1, html => 1,
5643 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5644     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5645     pop @{$self->{open_elements}};
5646 wakaba 1.1 }
5647    
5648     ## As if <{current node}>
5649     ## have an element in table scope
5650     ## true by definition
5651    
5652     ## Clear back to table body context
5653     ## nop by definition
5654    
5655 wakaba 1.3 pop @{$self->{open_elements}};
5656     $self->{insertion_mode} = 'in table';
5657 wakaba 1.1 ## reprocess
5658     redo B;
5659     } elsif ({
5660     body => 1, caption => 1, col => 1, colgroup => 1,
5661     html => 1, td => 1, th => 1, tr => 1,
5662     }->{$token->{tag_name}}) {
5663 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5664 wakaba 1.1 ## Ignore the token
5665     $token = $self->_get_next_token;
5666     redo B;
5667     } else {
5668     #
5669     }
5670     } else {
5671     #
5672     }
5673    
5674     ## As if in table
5675 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
5676 wakaba 1.1 $in_body->($insert_to_foster);
5677     redo B;
5678 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
5679 wakaba 1.1 if ($token->{type} eq 'character') {
5680     ## NOTE: This is a "character in table" code clone.
5681     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5682 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5683 wakaba 1.1
5684     unless (length $token->{data}) {
5685     $token = $self->_get_next_token;
5686     redo B;
5687     }
5688     }
5689    
5690 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
5691    
5692 wakaba 1.1 ## As if in body, but insert into foster parent element
5693     ## ISSUE: Spec says that "whenever a node would be inserted
5694     ## into the current node" while characters might not be
5695     ## result in a new Text node.
5696     $reconstruct_active_formatting_elements->($insert_to_foster);
5697    
5698     if ({
5699     table => 1, tbody => 1, tfoot => 1,
5700     thead => 1, tr => 1,
5701 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5702 wakaba 1.1 # MUST
5703     my $foster_parent_element;
5704     my $next_sibling;
5705     my $prev_sibling;
5706 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
5707     if ($self->{open_elements}->[$_]->[1] eq 'table') {
5708     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5709 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
5710     $foster_parent_element = $parent;
5711 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
5712 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
5713     } else {
5714 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5715 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
5716     }
5717     last OE;
5718     }
5719     } # OE
5720 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5721 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
5722     unless defined $foster_parent_element;
5723     if (defined $prev_sibling and
5724     $prev_sibling->node_type == 3) {
5725     $prev_sibling->manakai_append_text ($token->{data});
5726     } else {
5727     $foster_parent_element->insert_before
5728     ($self->{document}->create_text_node ($token->{data}),
5729     $next_sibling);
5730     }
5731     } else {
5732 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5733 wakaba 1.1 }
5734    
5735     $token = $self->_get_next_token;
5736     redo B;
5737     } elsif ($token->{type} eq 'start tag') {
5738     if ($token->{tag_name} eq 'th' or
5739     $token->{tag_name} eq 'td') {
5740     ## Clear back to table row context
5741     while (not {
5742     tr => 1, html => 1,
5743 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5744     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5745     pop @{$self->{open_elements}};
5746 wakaba 1.1 }
5747    
5748    
5749     {
5750     my $el;
5751    
5752     $el = $self->{document}->create_element_ns
5753     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5754    
5755     for my $attr_name (keys %{ $token->{attributes}}) {
5756     $el->set_attribute_ns (undef, [undef, $attr_name],
5757     $token->{attributes} ->{$attr_name}->{value});
5758     }
5759    
5760 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5761     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5762 wakaba 1.1 }
5763    
5764 wakaba 1.3 $self->{insertion_mode} = 'in cell';
5765 wakaba 1.1
5766     push @$active_formatting_elements, ['#marker', ''];
5767    
5768     $token = $self->_get_next_token;
5769     redo B;
5770     } elsif ({
5771     caption => 1, col => 1, colgroup => 1,
5772     tbody => 1, tfoot => 1, thead => 1, tr => 1,
5773     }->{$token->{tag_name}}) {
5774     ## As if </tr>
5775     ## have an element in table scope
5776     my $i;
5777 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5778     my $node = $self->{open_elements}->[$_];
5779 wakaba 1.1 if ($node->[1] eq 'tr') {
5780     $i = $_;
5781     last INSCOPE;
5782     } elsif ({
5783     table => 1, html => 1,
5784     }->{$node->[1]}) {
5785     last INSCOPE;
5786     }
5787     } # INSCOPE
5788     unless (defined $i) {
5789 wakaba 1.3 $self->{parse_error}-> (type => 'unmacthed end tag:'.$token->{tag_name});
5790 wakaba 1.1 ## Ignore the token
5791     $token = $self->_get_next_token;
5792     redo B;
5793     }
5794    
5795     ## Clear back to table row context
5796     while (not {
5797     tr => 1, html => 1,
5798 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5799     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5800     pop @{$self->{open_elements}};
5801 wakaba 1.1 }
5802    
5803 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5804     $self->{insertion_mode} = 'in table body';
5805 wakaba 1.1 ## reprocess
5806     redo B;
5807     } elsif ($token->{tag_name} eq 'table') {
5808     ## NOTE: This is a code clone of "table in table"
5809 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:table');
5810 wakaba 1.1
5811     ## As if </table>
5812     ## have a table element in table scope
5813     my $i;
5814 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5815     my $node = $self->{open_elements}->[$_];
5816 wakaba 1.1 if ($node->[1] eq 'table') {
5817     $i = $_;
5818     last INSCOPE;
5819     } elsif ({
5820     table => 1, html => 1,
5821     }->{$node->[1]}) {
5822     last INSCOPE;
5823     }
5824     } # INSCOPE
5825     unless (defined $i) {
5826 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
5827 wakaba 1.1 ## Ignore tokens </table><table>
5828     $token = $self->_get_next_token;
5829     redo B;
5830     }
5831    
5832     ## generate implied end tags
5833     if ({
5834     dd => 1, dt => 1, li => 1, p => 1,
5835     td => 1, th => 1, tr => 1,
5836 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
5837 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5838 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
5839     $token = {type => 'end tag', tag_name => 'table'};
5840     unshift @{$self->{token}}, $token;
5841     $token = {type => 'end tag',
5842 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5843 wakaba 1.1 redo B;
5844     }
5845    
5846 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5847     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5848 wakaba 1.1 }
5849    
5850 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5851 wakaba 1.1
5852 wakaba 1.3 $self->_reset_insertion_mode;
5853 wakaba 1.1
5854     ## reprocess
5855     redo B;
5856     } else {
5857     #
5858     }
5859     } elsif ($token->{type} eq 'end tag') {
5860     if ($token->{tag_name} eq 'tr') {
5861     ## have an element in table scope
5862     my $i;
5863 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5864     my $node = $self->{open_elements}->[$_];
5865 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5866     $i = $_;
5867     last INSCOPE;
5868     } elsif ({
5869     table => 1, html => 1,
5870     }->{$node->[1]}) {
5871     last INSCOPE;
5872     }
5873     } # INSCOPE
5874     unless (defined $i) {
5875 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5876 wakaba 1.1 ## Ignore the token
5877     $token = $self->_get_next_token;
5878     redo B;
5879     }
5880    
5881     ## Clear back to table row context
5882     while (not {
5883     tr => 1, html => 1,
5884 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5885     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5886     pop @{$self->{open_elements}};
5887 wakaba 1.1 }
5888    
5889 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5890     $self->{insertion_mode} = 'in table body';
5891 wakaba 1.1 $token = $self->_get_next_token;
5892     redo B;
5893     } elsif ($token->{tag_name} eq 'table') {
5894     ## As if </tr>
5895     ## have an element in table scope
5896     my $i;
5897 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5898     my $node = $self->{open_elements}->[$_];
5899 wakaba 1.1 if ($node->[1] eq 'tr') {
5900     $i = $_;
5901     last INSCOPE;
5902     } elsif ({
5903     table => 1, html => 1,
5904     }->{$node->[1]}) {
5905     last INSCOPE;
5906     }
5907     } # INSCOPE
5908     unless (defined $i) {
5909 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{type});
5910 wakaba 1.1 ## Ignore the token
5911     $token = $self->_get_next_token;
5912     redo B;
5913     }
5914    
5915     ## Clear back to table row context
5916     while (not {
5917     tr => 1, html => 1,
5918 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5919     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5920     pop @{$self->{open_elements}};
5921 wakaba 1.1 }
5922    
5923 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5924     $self->{insertion_mode} = 'in table body';
5925 wakaba 1.1 ## reprocess
5926     redo B;
5927     } elsif ({
5928     tbody => 1, tfoot => 1, thead => 1,
5929     }->{$token->{tag_name}}) {
5930     ## have an element in table scope
5931     my $i;
5932 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5933     my $node = $self->{open_elements}->[$_];
5934 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5935     $i = $_;
5936     last INSCOPE;
5937     } elsif ({
5938     table => 1, html => 1,
5939     }->{$node->[1]}) {
5940     last INSCOPE;
5941     }
5942     } # INSCOPE
5943     unless (defined $i) {
5944 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5945 wakaba 1.1 ## Ignore the token
5946     $token = $self->_get_next_token;
5947     redo B;
5948     }
5949    
5950     ## As if </tr>
5951     ## have an element in table scope
5952     my $i;
5953 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5954     my $node = $self->{open_elements}->[$_];
5955 wakaba 1.1 if ($node->[1] eq 'tr') {
5956     $i = $_;
5957     last INSCOPE;
5958     } elsif ({
5959     table => 1, html => 1,
5960     }->{$node->[1]}) {
5961     last INSCOPE;
5962     }
5963     } # INSCOPE
5964     unless (defined $i) {
5965 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:tr');
5966 wakaba 1.1 ## Ignore the token
5967     $token = $self->_get_next_token;
5968     redo B;
5969     }
5970    
5971     ## Clear back to table row context
5972     while (not {
5973     tr => 1, html => 1,
5974 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5975     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5976     pop @{$self->{open_elements}};
5977 wakaba 1.1 }
5978    
5979 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5980     $self->{insertion_mode} = 'in table body';
5981 wakaba 1.1 ## reprocess
5982     redo B;
5983     } elsif ({
5984     body => 1, caption => 1, col => 1,
5985     colgroup => 1, html => 1, td => 1, th => 1,
5986     }->{$token->{tag_name}}) {
5987 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5988 wakaba 1.1 ## Ignore the token
5989     $token = $self->_get_next_token;
5990     redo B;
5991     } else {
5992     #
5993     }
5994     } else {
5995     #
5996     }
5997    
5998     ## As if in table
5999 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
6000 wakaba 1.1 $in_body->($insert_to_foster);
6001     redo B;
6002 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
6003 wakaba 1.1 if ($token->{type} eq 'character') {
6004     ## NOTE: This is a code clone of "character in body".
6005     $reconstruct_active_formatting_elements->($insert_to_current);
6006    
6007 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6008 wakaba 1.1
6009     $token = $self->_get_next_token;
6010     redo B;
6011     } elsif ($token->{type} eq 'start tag') {
6012     if ({
6013     caption => 1, col => 1, colgroup => 1,
6014     tbody => 1, td => 1, tfoot => 1, th => 1,
6015     thead => 1, tr => 1,
6016     }->{$token->{tag_name}}) {
6017     ## have an element in table scope
6018     my $tn;
6019 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6020     my $node = $self->{open_elements}->[$_];
6021 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
6022     $tn = $node->[1];
6023     last INSCOPE;
6024     } elsif ({
6025     table => 1, html => 1,
6026     }->{$node->[1]}) {
6027     last INSCOPE;
6028     }
6029     } # INSCOPE
6030     unless (defined $tn) {
6031 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6032 wakaba 1.1 ## Ignore the token
6033     $token = $self->_get_next_token;
6034     redo B;
6035     }
6036    
6037     ## Close the cell
6038     unshift @{$self->{token}}, $token; # <?>
6039     $token = {type => 'end tag', tag_name => $tn};
6040     redo B;
6041     } else {
6042     #
6043     }
6044     } elsif ($token->{type} eq 'end tag') {
6045     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
6046     ## have an element in table scope
6047     my $i;
6048 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6049     my $node = $self->{open_elements}->[$_];
6050 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6051     $i = $_;
6052     last INSCOPE;
6053     } elsif ({
6054     table => 1, html => 1,
6055     }->{$node->[1]}) {
6056     last INSCOPE;
6057     }
6058     } # INSCOPE
6059     unless (defined $i) {
6060 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6061 wakaba 1.1 ## Ignore the token
6062     $token = $self->_get_next_token;
6063     redo B;
6064     }
6065    
6066     ## generate implied end tags
6067     if ({
6068     dd => 1, dt => 1, li => 1, p => 1,
6069     td => ($token->{tag_name} eq 'th'),
6070     th => ($token->{tag_name} eq 'td'),
6071     tr => 1,
6072 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
6073 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
6074 wakaba 1.1 unshift @{$self->{token}}, $token;
6075     $token = {type => 'end tag',
6076 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
6077 wakaba 1.1 redo B;
6078     }
6079    
6080 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6081     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
6082 wakaba 1.1 }
6083    
6084 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6085 wakaba 1.1
6086     $clear_up_to_marker->();
6087    
6088 wakaba 1.3 $self->{insertion_mode} = 'in row';
6089 wakaba 1.1
6090     $token = $self->_get_next_token;
6091     redo B;
6092     } elsif ({
6093     body => 1, caption => 1, col => 1,
6094     colgroup => 1, html => 1,
6095     }->{$token->{tag_name}}) {
6096 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6097 wakaba 1.1 ## Ignore the token
6098     $token = $self->_get_next_token;
6099     redo B;
6100     } elsif ({
6101     table => 1, tbody => 1, tfoot => 1,
6102     thead => 1, tr => 1,
6103     }->{$token->{tag_name}}) {
6104     ## have an element in table scope
6105     my $i;
6106     my $tn;
6107 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6108     my $node = $self->{open_elements}->[$_];
6109 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6110     $i = $_;
6111     last INSCOPE;
6112     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
6113     $tn = $node->[1];
6114     ## NOTE: There is exactly one |td| or |th| element
6115     ## in scope in the stack of open elements by definition.
6116     } elsif ({
6117     table => 1, html => 1,
6118     }->{$node->[1]}) {
6119     last INSCOPE;
6120     }
6121     } # INSCOPE
6122     unless (defined $i) {
6123 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6124 wakaba 1.1 ## Ignore the token
6125     $token = $self->_get_next_token;
6126     redo B;
6127     }
6128    
6129     ## Close the cell
6130     unshift @{$self->{token}}, $token; # </?>
6131     $token = {type => 'end tag', tag_name => $tn};
6132     redo B;
6133     } else {
6134     #
6135     }
6136     } else {
6137     #
6138     }
6139    
6140     $in_body->($insert_to_current);
6141     redo B;
6142 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
6143 wakaba 1.1 if ($token->{type} eq 'character') {
6144 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6145 wakaba 1.1 $token = $self->_get_next_token;
6146     redo B;
6147     } elsif ($token->{type} eq 'start tag') {
6148     if ($token->{tag_name} eq 'option') {
6149 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6150 wakaba 1.1 ## As if </option>
6151 wakaba 1.3 pop @{$self->{open_elements}};
6152 wakaba 1.1 }
6153    
6154    
6155     {
6156     my $el;
6157    
6158     $el = $self->{document}->create_element_ns
6159     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6160    
6161     for my $attr_name (keys %{ $token->{attributes}}) {
6162     $el->set_attribute_ns (undef, [undef, $attr_name],
6163     $token->{attributes} ->{$attr_name}->{value});
6164     }
6165    
6166 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6167     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6168 wakaba 1.1 }
6169    
6170     $token = $self->_get_next_token;
6171     redo B;
6172     } elsif ($token->{tag_name} eq 'optgroup') {
6173 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6174 wakaba 1.1 ## As if </option>
6175 wakaba 1.3 pop @{$self->{open_elements}};
6176 wakaba 1.1 }
6177    
6178 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
6179 wakaba 1.1 ## As if </optgroup>
6180 wakaba 1.3 pop @{$self->{open_elements}};
6181 wakaba 1.1 }
6182    
6183    
6184     {
6185     my $el;
6186    
6187     $el = $self->{document}->create_element_ns
6188     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6189    
6190     for my $attr_name (keys %{ $token->{attributes}}) {
6191     $el->set_attribute_ns (undef, [undef, $attr_name],
6192     $token->{attributes} ->{$attr_name}->{value});
6193     }
6194    
6195 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6196     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6197 wakaba 1.1 }
6198    
6199     $token = $self->_get_next_token;
6200     redo B;
6201     } elsif ($token->{tag_name} eq 'select') {
6202 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:select');
6203 wakaba 1.1 ## As if </select> instead
6204     ## have an element in table scope
6205     my $i;
6206 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6207     my $node = $self->{open_elements}->[$_];
6208 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6209     $i = $_;
6210     last INSCOPE;
6211     } elsif ({
6212     table => 1, html => 1,
6213     }->{$node->[1]}) {
6214     last INSCOPE;
6215     }
6216     } # INSCOPE
6217     unless (defined $i) {
6218 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:select');
6219 wakaba 1.1 ## Ignore the token
6220     $token = $self->_get_next_token;
6221     redo B;
6222     }
6223    
6224 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6225 wakaba 1.1
6226 wakaba 1.3 $self->_reset_insertion_mode;
6227 wakaba 1.1
6228     $token = $self->_get_next_token;
6229     redo B;
6230     } else {
6231     #
6232     }
6233     } elsif ($token->{type} eq 'end tag') {
6234     if ($token->{tag_name} eq 'optgroup') {
6235 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
6236     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
6237 wakaba 1.1 ## As if </option>
6238 wakaba 1.3 splice @{$self->{open_elements}}, -2;
6239     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
6240     pop @{$self->{open_elements}};
6241 wakaba 1.1 } else {
6242 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6243 wakaba 1.1 ## Ignore the token
6244     }
6245     $token = $self->_get_next_token;
6246     redo B;
6247     } elsif ($token->{tag_name} eq 'option') {
6248 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6249     pop @{$self->{open_elements}};
6250 wakaba 1.1 } else {
6251 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6252 wakaba 1.1 ## Ignore the token
6253     }
6254     $token = $self->_get_next_token;
6255     redo B;
6256     } elsif ($token->{tag_name} eq 'select') {
6257     ## have an element in table scope
6258     my $i;
6259 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6260     my $node = $self->{open_elements}->[$_];
6261 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6262     $i = $_;
6263     last INSCOPE;
6264     } elsif ({
6265     table => 1, html => 1,
6266     }->{$node->[1]}) {
6267     last INSCOPE;
6268     }
6269     } # INSCOPE
6270     unless (defined $i) {
6271 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6272 wakaba 1.1 ## Ignore the token
6273     $token = $self->_get_next_token;
6274     redo B;
6275     }
6276    
6277 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6278 wakaba 1.1
6279 wakaba 1.3 $self->_reset_insertion_mode;
6280 wakaba 1.1
6281     $token = $self->_get_next_token;
6282     redo B;
6283     } elsif ({
6284     caption => 1, table => 1, tbody => 1,
6285     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6286     }->{$token->{tag_name}}) {
6287 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6288 wakaba 1.1
6289     ## have an element in table scope
6290     my $i;
6291 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6292     my $node = $self->{open_elements}->[$_];
6293 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6294     $i = $_;
6295     last INSCOPE;
6296     } elsif ({
6297     table => 1, html => 1,
6298     }->{$node->[1]}) {
6299     last INSCOPE;
6300     }
6301     } # INSCOPE
6302     unless (defined $i) {
6303     ## Ignore the token
6304     $token = $self->_get_next_token;
6305     redo B;
6306     }
6307    
6308     ## As if </select>
6309     ## have an element in table scope
6310     undef $i;
6311 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6312     my $node = $self->{open_elements}->[$_];
6313 wakaba 1.1 if ($node->[1] eq 'select') {
6314     $i = $_;
6315     last INSCOPE;
6316     } elsif ({
6317     table => 1, html => 1,
6318     }->{$node->[1]}) {
6319     last INSCOPE;
6320     }
6321     } # INSCOPE
6322     unless (defined $i) {
6323 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:select');
6324 wakaba 1.1 ## Ignore the </select> token
6325     $token = $self->_get_next_token; ## TODO: ok?
6326     redo B;
6327     }
6328    
6329 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6330 wakaba 1.1
6331 wakaba 1.3 $self->_reset_insertion_mode;
6332 wakaba 1.1
6333     ## reprocess
6334     redo B;
6335     } else {
6336     #
6337     }
6338     } else {
6339     #
6340     }
6341    
6342 wakaba 1.3 $self->{parse_error}-> (type => 'in select:'.$token->{tag_name});
6343 wakaba 1.1 ## Ignore the token
6344     $token = $self->_get_next_token;
6345     redo B;
6346 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
6347 wakaba 1.1 if ($token->{type} eq 'character') {
6348     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6349 wakaba 1.35 my $data = $1;
6350 wakaba 1.1 ## As if in body
6351     $reconstruct_active_formatting_elements->($insert_to_current);
6352    
6353 wakaba 1.35 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6354 wakaba 1.1
6355     unless (length $token->{data}) {
6356     $token = $self->_get_next_token;
6357     redo B;
6358     }
6359     }
6360    
6361     #
6362 wakaba 1.36 $self->{parse_error}-> (type => 'after body:#character');
6363 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
6364     $self->{parse_error}-> (type => 'after body:'.$token->{tag_name});
6365     #
6366 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
6367     if ($token->{tag_name} eq 'html') {
6368 wakaba 1.3 if (defined $self->{inner_html_node}) {
6369     $self->{parse_error}-> (type => 'unmatched end tag:html');
6370     ## Ignore the token
6371     $token = $self->_get_next_token;
6372     redo B;
6373     } else {
6374 wakaba 1.35 $previous_insertion_mode = $self->{insertion_mode};
6375     $self->{insertion_mode} = 'trailing end';
6376 wakaba 1.3 $token = $self->_get_next_token;
6377     redo B;
6378     }
6379 wakaba 1.1 } else {
6380 wakaba 1.3 $self->{parse_error}-> (type => 'after body:/'.$token->{tag_name});
6381 wakaba 1.1 }
6382     } else {
6383 wakaba 1.36 die "$0: $token->{type}: Unknown token type";
6384 wakaba 1.1 }
6385    
6386 wakaba 1.3 $self->{insertion_mode} = 'in body';
6387 wakaba 1.1 ## reprocess
6388     redo B;
6389 wakaba 1.36 } elsif ($self->{insertion_mode} eq 'in frameset') {
6390     if ($token->{type} eq 'character') {
6391     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6392     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6393 wakaba 1.1
6394 wakaba 1.36 unless (length $token->{data}) {
6395 wakaba 1.1 $token = $self->_get_next_token;
6396     redo B;
6397 wakaba 1.36 }
6398     }
6399    
6400     $self->{parse_error}-> (type => 'in frameset:#character');
6401     ## Ignore the token
6402     $token = $self->_get_next_token;
6403     redo B;
6404     } elsif ($token->{type} eq 'start tag') {
6405     if ($token->{tag_name} eq 'frameset') {
6406    
6407 wakaba 1.1 {
6408     my $el;
6409    
6410     $el = $self->{document}->create_element_ns
6411     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6412    
6413     for my $attr_name (keys %{ $token->{attributes}}) {
6414     $el->set_attribute_ns (undef, [undef, $attr_name],
6415     $token->{attributes} ->{$attr_name}->{value});
6416     }
6417    
6418 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6419     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6420 wakaba 1.1 }
6421    
6422 wakaba 1.36 $token = $self->_get_next_token;
6423     redo B;
6424     } elsif ($token->{tag_name} eq 'frame') {
6425    
6426 wakaba 1.1 {
6427     my $el;
6428    
6429     $el = $self->{document}->create_element_ns
6430     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6431    
6432     for my $attr_name (keys %{ $token->{attributes}}) {
6433     $el->set_attribute_ns (undef, [undef, $attr_name],
6434     $token->{attributes} ->{$attr_name}->{value});
6435     }
6436    
6437 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6438     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6439 wakaba 1.1 }
6440    
6441 wakaba 1.36 pop @{$self->{open_elements}};
6442     $token = $self->_get_next_token;
6443     redo B;
6444     } elsif ($token->{tag_name} eq 'noframes') {
6445     $in_body->($insert_to_current);
6446     redo B;
6447     } else {
6448     $self->{parse_error}-> (type => 'in frameset:'.$token->{tag_name});
6449     ## Ignore the token
6450     $token = $self->_get_next_token;
6451     redo B;
6452     }
6453     } elsif ($token->{type} eq 'end tag') {
6454     if ($token->{tag_name} eq 'frameset') {
6455     if ($self->{open_elements}->[-1]->[1] eq 'html' and
6456     @{$self->{open_elements}} == 1) {
6457     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6458     ## Ignore the token
6459     $token = $self->_get_next_token;
6460 wakaba 1.1 } else {
6461 wakaba 1.36 pop @{$self->{open_elements}};
6462     $token = $self->_get_next_token;
6463 wakaba 1.1 }
6464 wakaba 1.36
6465     if (not defined $self->{inner_html_node} and
6466     $self->{open_elements}->[-1]->[1] ne 'frameset') {
6467     $self->{insertion_mode} = 'after frameset';
6468 wakaba 1.3 }
6469 wakaba 1.36 redo B;
6470     } else {
6471     $self->{parse_error}-> (type => 'in frameset:/'.$token->{tag_name});
6472 wakaba 1.1 ## Ignore the token
6473     $token = $self->_get_next_token;
6474     redo B;
6475 wakaba 1.36 }
6476     } else {
6477     die "$0: $token->{type}: Unknown token type";
6478     }
6479     } elsif ($self->{insertion_mode} eq 'after frameset') {
6480     if ($token->{type} eq 'character') {
6481 wakaba 1.1 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6482 wakaba 1.35 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6483 wakaba 1.1
6484     unless (length $token->{data}) {
6485     $token = $self->_get_next_token;
6486     redo B;
6487     }
6488     }
6489    
6490 wakaba 1.35 if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
6491     $self->{parse_error}-> (type => 'after frameset:#character');
6492    
6493     ## Ignore the token.
6494     if (length $token->{data}) {
6495     ## reprocess the rest of characters
6496     } else {
6497     $token = $self->_get_next_token;
6498     }
6499     redo B;
6500     }
6501 wakaba 1.36
6502     die qq[$0: Character "$token->{data}"];
6503     } elsif ($token->{type} eq 'start tag') {
6504     if ($token->{tag_name} eq 'noframes') {
6505     $in_body->($insert_to_current);
6506     redo B;
6507     } else {
6508     $self->{parse_error}-> (type => 'after frameset:'.$token->{tag_name});
6509 wakaba 1.1 ## Ignore the token
6510     $token = $self->_get_next_token;
6511     redo B;
6512 wakaba 1.36 }
6513     } elsif ($token->{type} eq 'end tag') {
6514     if ($token->{tag_name} eq 'html') {
6515     $previous_insertion_mode = $self->{insertion_mode};
6516     $self->{insertion_mode} = 'trailing end';
6517     $token = $self->_get_next_token;
6518     redo B;
6519 wakaba 1.1 } else {
6520 wakaba 1.36 $self->{parse_error}-> (type => 'after frameset:/'.$token->{tag_name});
6521     ## Ignore the token
6522     $token = $self->_get_next_token;
6523     redo B;
6524 wakaba 1.1 }
6525 wakaba 1.36 } else {
6526     die "$0: $token->{type}: Unknown token type";
6527 wakaba 1.1 }
6528 wakaba 1.36
6529     ## ISSUE: An issue in spec here
6530 wakaba 1.35 } elsif ($self->{insertion_mode} eq 'trailing end') {
6531 wakaba 1.1 ## states in the main stage is preserved yet # MUST
6532    
6533 wakaba 1.36 if ($token->{type} eq 'character') {
6534 wakaba 1.1 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6535     my $data = $1;
6536     ## As if in the main phase.
6537     ## NOTE: The insertion mode in the main phase
6538     ## just before the phase has been changed to the trailing
6539     ## end phase is either "after body" or "after frameset".
6540 wakaba 1.35 $reconstruct_active_formatting_elements->($insert_to_current);
6541 wakaba 1.1
6542 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
6543 wakaba 1.1
6544     unless (length $token->{data}) {
6545     $token = $self->_get_next_token;
6546     redo B;
6547     }
6548     }
6549    
6550 wakaba 1.3 $self->{parse_error}-> (type => 'after html:#character');
6551 wakaba 1.35 $self->{insertion_mode} = $previous_insertion_mode;
6552 wakaba 1.1 ## reprocess
6553     redo B;
6554 wakaba 1.36 } elsif ($token->{type} eq 'start tag') {
6555     $self->{parse_error}-> (type => 'after html:'.$token->{tag_name});
6556     $self->{insertion_mode} = $previous_insertion_mode;
6557     ## reprocess
6558     redo B;
6559     } elsif ($token->{type} eq 'end tag') {
6560     $self->{parse_error}-> (type => 'after html:/'.$token->{tag_name});
6561 wakaba 1.35 $self->{insertion_mode} = $previous_insertion_mode;
6562 wakaba 1.1 ## reprocess
6563     redo B;
6564     } else {
6565     die "$0: $token->{type}: Unknown token";
6566     }
6567 wakaba 1.36 } else {
6568     die "$0: $self->{insertion_mode}: Unknown insertion mode";
6569 wakaba 1.1 }
6570     } # B
6571    
6572     ## Stop parsing # MUST
6573    
6574     ## TODO: script stuffs
6575 wakaba 1.3 } # _tree_construct_main
6576    
6577     sub set_inner_html ($$$) {
6578     my $class = shift;
6579     my $node = shift;
6580     my $s = \$_[0];
6581     my $onerror = $_[1];
6582    
6583     my $nt = $node->node_type;
6584     if ($nt == 9) {
6585     # MUST
6586    
6587     ## Step 1 # MUST
6588     ## TODO: If the document has an active parser, ...
6589     ## ISSUE: There is an issue in the spec.
6590    
6591     ## Step 2 # MUST
6592     my @cn = @{$node->child_nodes};
6593     for (@cn) {
6594     $node->remove_child ($_);
6595     }
6596    
6597     ## Step 3, 4, 5 # MUST
6598     $class->parse_string ($$s => $node, $onerror);
6599     } elsif ($nt == 1) {
6600     ## TODO: If non-html element
6601    
6602     ## NOTE: Most of this code is copied from |parse_string|
6603    
6604     ## Step 1 # MUST
6605 wakaba 1.14 my $this_doc = $node->owner_document;
6606     my $doc = $this_doc->implementation->create_document;
6607 wakaba 1.18 $doc->manakai_is_html (1);
6608 wakaba 1.3 my $p = $class->new;
6609     $p->{document} = $doc;
6610    
6611     ## Step 9 # MUST
6612     my $i = 0;
6613     my $line = 1;
6614     my $column = 0;
6615     $p->{set_next_input_character} = sub {
6616     my $self = shift;
6617 wakaba 1.14
6618     pop @{$self->{prev_input_character}};
6619     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
6620    
6621 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
6622     $self->{next_input_character} = ord substr $$s, $i++, 1;
6623     $column++;
6624 wakaba 1.4
6625     if ($self->{next_input_character} == 0x000A) { # LF
6626     $line++;
6627     $column = 0;
6628     } elsif ($self->{next_input_character} == 0x000D) { # CR
6629 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6630 wakaba 1.3 $self->{next_input_character} = 0x000A; # LF # MUST
6631     $line++;
6632 wakaba 1.4 $column = 0;
6633 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
6634     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6635     } elsif ($self->{next_input_character} == 0x0000) { # NULL
6636 wakaba 1.14 $self->{parse_error}-> (type => 'NULL');
6637 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6638     }
6639     };
6640 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
6641     $p->{next_input_character} = -1;
6642 wakaba 1.3
6643     my $ponerror = $onerror || sub {
6644     my (%opt) = @_;
6645     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
6646     };
6647     $p->{parse_error} = sub {
6648     $ponerror->(@_, line => $line, column => $column);
6649     };
6650    
6651     $p->_initialize_tokenizer;
6652     $p->_initialize_tree_constructor;
6653    
6654     ## Step 2
6655     my $node_ln = $node->local_name;
6656 wakaba 1.41 $p->{content_model} = {
6657     title => RCDATA_CONTENT_MODEL,
6658     textarea => RCDATA_CONTENT_MODEL,
6659     style => CDATA_CONTENT_MODEL,
6660     script => CDATA_CONTENT_MODEL,
6661     xmp => CDATA_CONTENT_MODEL,
6662     iframe => CDATA_CONTENT_MODEL,
6663     noembed => CDATA_CONTENT_MODEL,
6664     noframes => CDATA_CONTENT_MODEL,
6665     noscript => CDATA_CONTENT_MODEL,
6666     plaintext => PLAINTEXT_CONTENT_MODEL,
6667     }->{$node_ln};
6668     $p->{content_model} = PCDATA_CONTENT_MODEL
6669     unless defined $p->{content_model};
6670     ## ISSUE: What is "the name of the element"? local name?
6671 wakaba 1.3
6672     $p->{inner_html_node} = [$node, $node_ln];
6673    
6674     ## Step 4
6675     my $root = $doc->create_element_ns
6676     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6677    
6678     ## Step 5 # MUST
6679     $doc->append_child ($root);
6680    
6681     ## Step 6 # MUST
6682     push @{$p->{open_elements}}, [$root, 'html'];
6683    
6684     undef $p->{head_element};
6685    
6686     ## Step 7 # MUST
6687     $p->_reset_insertion_mode;
6688    
6689     ## Step 8 # MUST
6690     my $anode = $node;
6691     AN: while (defined $anode) {
6692     if ($anode->node_type == 1) {
6693     my $nsuri = $anode->namespace_uri;
6694     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6695     if ($anode->local_name eq 'form') { ## TODO: case?
6696     $p->{form_element} = $anode;
6697     last AN;
6698     }
6699     }
6700     }
6701     $anode = $anode->parent_node;
6702     } # AN
6703    
6704     ## Step 3 # MUST
6705     ## Step 10 # MUST
6706     {
6707     my $self = $p;
6708     $token = $self->_get_next_token;
6709     }
6710     $p->_tree_construction_main;
6711    
6712     ## Step 11 # MUST
6713     my @cn = @{$node->child_nodes};
6714     for (@cn) {
6715     $node->remove_child ($_);
6716     }
6717     ## ISSUE: mutation events? read-only?
6718    
6719     ## Step 12 # MUST
6720     @cn = @{$root->child_nodes};
6721     for (@cn) {
6722 wakaba 1.14 $this_doc->adopt_node ($_);
6723 wakaba 1.3 $node->append_child ($_);
6724     }
6725 wakaba 1.14 ## ISSUE: mutation events?
6726 wakaba 1.3
6727     $p->_terminate_tree_constructor;
6728     } else {
6729     die "$0: |set_inner_html| is not defined for node of type $nt";
6730     }
6731     } # set_inner_html
6732    
6733     } # tree construction stage
6734 wakaba 1.1
6735     sub get_inner_html ($$$) {
6736 wakaba 1.3 my (undef, $node, $on_error) = @_;
6737 wakaba 1.1
6738     ## Step 1
6739     my $s = '';
6740    
6741     my $in_cdata;
6742     my $parent = $node;
6743     while (defined $parent) {
6744     if ($parent->node_type == 1 and
6745     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
6746     {
6747     style => 1, script => 1, xmp => 1, iframe => 1,
6748     noembed => 1, noframes => 1, noscript => 1,
6749     }->{$parent->local_name}) { ## TODO: case thingy
6750     $in_cdata = 1;
6751     }
6752     $parent = $parent->parent_node;
6753     }
6754    
6755     ## Step 2
6756     my @node = @{$node->child_nodes};
6757     C: while (@node) {
6758     my $child = shift @node;
6759     unless (ref $child) {
6760     if ($child eq 'cdata-out') {
6761     $in_cdata = 0;
6762     } else {
6763     $s .= $child; # end tag
6764     }
6765     next C;
6766     }
6767    
6768     my $nt = $child->node_type;
6769     if ($nt == 1) { # Element
6770 wakaba 1.27 my $tag_name = $child->tag_name; ## TODO: manakai_tag_name
6771 wakaba 1.1 $s .= '<' . $tag_name;
6772 wakaba 1.27 ## NOTE: Non-HTML case:
6773     ## <http://permalink.gmane.org/gmane.org.w3c.whatwg.discuss/11191>
6774 wakaba 1.1
6775     my @attrs = @{$child->attributes}; # sort order MUST be stable
6776     for my $attr (@attrs) { # order is implementation dependent
6777 wakaba 1.27 my $attr_name = $attr->name; ## TODO: manakai_name
6778 wakaba 1.1 $s .= ' ' . $attr_name . '="';
6779     my $attr_value = $attr->value;
6780     ## escape
6781     $attr_value =~ s/&/&amp;/g;
6782     $attr_value =~ s/</&lt;/g;
6783     $attr_value =~ s/>/&gt;/g;
6784     $attr_value =~ s/"/&quot;/g;
6785     $s .= $attr_value . '"';
6786     }
6787     $s .= '>';
6788    
6789     next C if {
6790     area => 1, base => 1, basefont => 1, bgsound => 1,
6791     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
6792     img => 1, input => 1, link => 1, meta => 1, param => 1,
6793     spacer => 1, wbr => 1,
6794     }->{$tag_name};
6795    
6796 wakaba 1.23 $s .= "\x0A" if $tag_name eq 'pre' or $tag_name eq 'textarea';
6797    
6798 wakaba 1.1 if (not $in_cdata and {
6799     style => 1, script => 1, xmp => 1, iframe => 1,
6800     noembed => 1, noframes => 1, noscript => 1,
6801 wakaba 1.26 plaintext => 1,
6802 wakaba 1.1 }->{$tag_name}) {
6803     unshift @node, 'cdata-out';
6804     $in_cdata = 1;
6805     }
6806    
6807     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
6808     } elsif ($nt == 3 or $nt == 4) {
6809     if ($in_cdata) {
6810     $s .= $child->data;
6811     } else {
6812     my $value = $child->data;
6813     $value =~ s/&/&amp;/g;
6814     $value =~ s/</&lt;/g;
6815     $value =~ s/>/&gt;/g;
6816     $value =~ s/"/&quot;/g;
6817     $s .= $value;
6818     }
6819     } elsif ($nt == 8) {
6820     $s .= '<!--' . $child->data . '-->';
6821     } elsif ($nt == 10) {
6822     $s .= '<!DOCTYPE ' . $child->name . '>';
6823     } elsif ($nt == 5) { # entrefs
6824     push @node, @{$child->child_nodes};
6825     } else {
6826     $on_error->($child) if defined $on_error;
6827     }
6828     ## ISSUE: This code does not support PIs.
6829     } # C
6830    
6831     ## Step 3
6832     return \$s;
6833     } # get_inner_html
6834    
6835     1;
6836 wakaba 1.43 # $Date: 2007/07/21 06:04:07 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24