/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.32 - (hide annotations) (download)
Sun Jul 1 06:18:57 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.31: +7 -3 lines
++ whatpm/t/ChangeLog	1 Jul 2007 06:18:53 -0000
	* tree-test-1.dat: New tests for unmatched end tags.

2007-07-01  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	1 Jul 2007 06:18:08 -0000
	* HTML.pm.src: Report correct error message
	for |<body></div></body>|.

2007-07-01  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.32 our $VERSION=do{my @r=(q$Revision: 1.31 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 ## ISSUE:
6     ## var doc = implementation.createDocument (null, null, null);
7     ## doc.write ('');
8     ## alert (doc.compatMode);
9 wakaba 1.1
10 wakaba 1.31 ## ISSUE: HTML5 revision 967 says that the encoding layer MUST NOT
11     ## strip BOM and the HTML layer MUST ignore it. Whether we can do it
12     ## is not yet clear.
13     ## "{U+FEFF}..." in UTF-16BE/UTF-16LE is three or four characters?
14     ## "{U+FEFF}..." in GB18030?
15    
16 wakaba 1.1 my $permitted_slash_tag_name = {
17     base => 1,
18     link => 1,
19     meta => 1,
20     hr => 1,
21     br => 1,
22     img=> 1,
23     embed => 1,
24     param => 1,
25     area => 1,
26     col => 1,
27     input => 1,
28     };
29    
30 wakaba 1.4 my $c1_entity_char = {
31 wakaba 1.9 0x80 => 0x20AC,
32     0x81 => 0xFFFD,
33     0x82 => 0x201A,
34     0x83 => 0x0192,
35     0x84 => 0x201E,
36     0x85 => 0x2026,
37     0x86 => 0x2020,
38     0x87 => 0x2021,
39     0x88 => 0x02C6,
40     0x89 => 0x2030,
41     0x8A => 0x0160,
42     0x8B => 0x2039,
43     0x8C => 0x0152,
44     0x8D => 0xFFFD,
45     0x8E => 0x017D,
46     0x8F => 0xFFFD,
47     0x90 => 0xFFFD,
48     0x91 => 0x2018,
49     0x92 => 0x2019,
50     0x93 => 0x201C,
51     0x94 => 0x201D,
52     0x95 => 0x2022,
53     0x96 => 0x2013,
54     0x97 => 0x2014,
55     0x98 => 0x02DC,
56     0x99 => 0x2122,
57     0x9A => 0x0161,
58     0x9B => 0x203A,
59     0x9C => 0x0153,
60     0x9D => 0xFFFD,
61     0x9E => 0x017E,
62     0x9F => 0x0178,
63 wakaba 1.4 }; # $c1_entity_char
64 wakaba 1.1
65     my $special_category = {
66     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
67     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
68     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
69     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
70     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
71     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
72     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
73     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
74     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
75     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
76     };
77     my $scoping_category = {
78     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
79     table => 1, td => 1, th => 1,
80     };
81     my $formatting_category = {
82     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
83     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
84     };
85     # $phrasing_category: all other elements
86    
87     sub parse_string ($$$;$) {
88     my $self = shift->new;
89     my $s = \$_[0];
90     $self->{document} = $_[1];
91    
92 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
93    
94 wakaba 1.1 my $i = 0;
95 wakaba 1.3 my $line = 1;
96     my $column = 0;
97 wakaba 1.1 $self->{set_next_input_character} = sub {
98     my $self = shift;
99 wakaba 1.13
100     pop @{$self->{prev_input_character}};
101     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
102    
103 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
104     $self->{next_input_character} = ord substr $$s, $i++, 1;
105 wakaba 1.3 $column++;
106 wakaba 1.1
107 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
108     $line++;
109     $column = 0;
110     } elsif ($self->{next_input_character} == 0x000D) { # CR
111 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
112 wakaba 1.1 $self->{next_input_character} = 0x000A; # LF # MUST
113 wakaba 1.3 $line++;
114 wakaba 1.4 $column = 0;
115 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
116     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
117     } elsif ($self->{next_input_character} == 0x0000) { # NULL
118 wakaba 1.8 $self->{parse_error}-> (type => 'NULL');
119 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
120     }
121     };
122 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
123     $self->{next_input_character} = -1;
124 wakaba 1.1
125 wakaba 1.3 my $onerror = $_[2] || sub {
126     my (%opt) = @_;
127     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
128     };
129     $self->{parse_error} = sub {
130     $onerror->(@_, line => $line, column => $column);
131 wakaba 1.1 };
132    
133     $self->_initialize_tokenizer;
134     $self->_initialize_tree_constructor;
135     $self->_construct_tree;
136     $self->_terminate_tree_constructor;
137    
138     return $self->{document};
139     } # parse_string
140    
141     sub new ($) {
142     my $class = shift;
143     my $self = bless {}, $class;
144     $self->{set_next_input_character} = sub {
145     $self->{next_input_character} = -1;
146     };
147     $self->{parse_error} = sub {
148     #
149     };
150     return $self;
151     } # new
152    
153     ## Implementations MUST act as if state machine in the spec
154    
155     sub _initialize_tokenizer ($) {
156     my $self = shift;
157     $self->{state} = 'data'; # MUST
158     $self->{content_model_flag} = 'PCDATA'; # be
159     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
160     undef $self->{current_attribute};
161     undef $self->{last_emitted_start_tag_name};
162     undef $self->{last_attribute_value_state};
163     $self->{char} = [];
164     # $self->{next_input_character}
165    
166     if (@{$self->{char}}) {
167     $self->{next_input_character} = shift @{$self->{char}};
168     } else {
169     $self->{set_next_input_character}->($self);
170     }
171    
172     $self->{token} = [];
173 wakaba 1.18 # $self->{escape}
174 wakaba 1.1 } # _initialize_tokenizer
175    
176     ## A token has:
177     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
178     ## 'character', or 'end-of-file'
179 wakaba 1.18 ## ->{name} (DOCTYPE, start tag (tag name), end tag (tag name))
180     ## ->{public_identifier} (DOCTYPE)
181     ## ->{system_identifier} (DOCTYPE)
182     ## ->{correct} == 1 or 0 (DOCTYPE)
183 wakaba 1.1 ## ->{attributes} isa HASH (start tag, end tag)
184     ## ->{data} (comment, character)
185    
186     ## Emitted token MUST immediately be handled by the tree construction state.
187    
188     ## Before each step, UA MAY check to see if either one of the scripts in
189     ## "list of scripts that will execute as soon as possible" or the first
190     ## script in the "list of scripts that will execute asynchronously",
191     ## has completed loading. If one has, then it MUST be executed
192     ## and removed from the list.
193    
194     sub _get_next_token ($) {
195     my $self = shift;
196     if (@{$self->{token}}) {
197     return shift @{$self->{token}};
198     }
199    
200     A: {
201     if ($self->{state} eq 'data') {
202     if ($self->{next_input_character} == 0x0026) { # &
203     if ($self->{content_model_flag} eq 'PCDATA' or
204     $self->{content_model_flag} eq 'RCDATA') {
205     $self->{state} = 'entity data';
206    
207     if (@{$self->{char}}) {
208     $self->{next_input_character} = shift @{$self->{char}};
209     } else {
210     $self->{set_next_input_character}->($self);
211     }
212    
213     redo A;
214     } else {
215     #
216     }
217 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
218     if ($self->{content_model_flag} eq 'RCDATA' or
219     $self->{content_model_flag} eq 'CDATA') {
220     unless ($self->{escape}) {
221     if ($self->{prev_input_character}->[0] == 0x002D and # -
222     $self->{prev_input_character}->[1] == 0x0021 and # !
223     $self->{prev_input_character}->[2] == 0x003C) { # <
224     $self->{escape} = 1;
225     }
226     }
227     }
228    
229     #
230 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
231 wakaba 1.13 if ($self->{content_model_flag} eq 'PCDATA' or
232     (($self->{content_model_flag} eq 'CDATA' or
233     $self->{content_model_flag} eq 'RCDATA') and
234     not $self->{escape})) {
235 wakaba 1.1 $self->{state} = 'tag open';
236    
237     if (@{$self->{char}}) {
238     $self->{next_input_character} = shift @{$self->{char}};
239     } else {
240     $self->{set_next_input_character}->($self);
241     }
242    
243     redo A;
244     } else {
245     #
246     }
247 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
248     if ($self->{escape} and
249     ($self->{content_model_flag} eq 'RCDATA' or
250     $self->{content_model_flag} eq 'CDATA')) {
251     if ($self->{prev_input_character}->[0] == 0x002D and # -
252     $self->{prev_input_character}->[1] == 0x002D) { # -
253     delete $self->{escape};
254     }
255     }
256    
257     #
258 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
259     return ({type => 'end-of-file'});
260     last A; ## TODO: ok?
261     }
262     # Anything else
263     my $token = {type => 'character',
264     data => chr $self->{next_input_character}};
265     ## Stay in the data state
266    
267     if (@{$self->{char}}) {
268     $self->{next_input_character} = shift @{$self->{char}};
269     } else {
270     $self->{set_next_input_character}->($self);
271     }
272    
273    
274     return ($token);
275    
276     redo A;
277     } elsif ($self->{state} eq 'entity data') {
278     ## (cannot happen in CDATA state)
279    
280 wakaba 1.26 my $token = $self->_tokenize_attempt_to_consume_an_entity (0);
281 wakaba 1.1
282     $self->{state} = 'data';
283     # next-input-character is already done
284    
285     unless (defined $token) {
286     return ({type => 'character', data => '&'});
287     } else {
288     return ($token);
289     }
290    
291     redo A;
292     } elsif ($self->{state} eq 'tag open') {
293     if ($self->{content_model_flag} eq 'RCDATA' or
294     $self->{content_model_flag} eq 'CDATA') {
295     if ($self->{next_input_character} == 0x002F) { # /
296    
297     if (@{$self->{char}}) {
298     $self->{next_input_character} = shift @{$self->{char}};
299     } else {
300     $self->{set_next_input_character}->($self);
301     }
302    
303     $self->{state} = 'close tag open';
304     redo A;
305     } else {
306     ## reconsume
307     $self->{state} = 'data';
308    
309     return ({type => 'character', data => '<'});
310    
311     redo A;
312     }
313     } elsif ($self->{content_model_flag} eq 'PCDATA') {
314     if ($self->{next_input_character} == 0x0021) { # !
315     $self->{state} = 'markup declaration open';
316    
317     if (@{$self->{char}}) {
318     $self->{next_input_character} = shift @{$self->{char}};
319     } else {
320     $self->{set_next_input_character}->($self);
321     }
322    
323     redo A;
324     } elsif ($self->{next_input_character} == 0x002F) { # /
325     $self->{state} = 'close tag open';
326    
327     if (@{$self->{char}}) {
328     $self->{next_input_character} = shift @{$self->{char}};
329     } else {
330     $self->{set_next_input_character}->($self);
331     }
332    
333     redo A;
334     } elsif (0x0041 <= $self->{next_input_character} and
335     $self->{next_input_character} <= 0x005A) { # A..Z
336     $self->{current_token}
337     = {type => 'start tag',
338     tag_name => chr ($self->{next_input_character} + 0x0020)};
339     $self->{state} = 'tag name';
340    
341     if (@{$self->{char}}) {
342     $self->{next_input_character} = shift @{$self->{char}};
343     } else {
344     $self->{set_next_input_character}->($self);
345     }
346    
347     redo A;
348     } elsif (0x0061 <= $self->{next_input_character} and
349     $self->{next_input_character} <= 0x007A) { # a..z
350     $self->{current_token} = {type => 'start tag',
351     tag_name => chr ($self->{next_input_character})};
352     $self->{state} = 'tag name';
353    
354     if (@{$self->{char}}) {
355     $self->{next_input_character} = shift @{$self->{char}};
356     } else {
357     $self->{set_next_input_character}->($self);
358     }
359    
360     redo A;
361     } elsif ($self->{next_input_character} == 0x003E) { # >
362 wakaba 1.3 $self->{parse_error}-> (type => 'empty start tag');
363 wakaba 1.1 $self->{state} = 'data';
364    
365     if (@{$self->{char}}) {
366     $self->{next_input_character} = shift @{$self->{char}};
367     } else {
368     $self->{set_next_input_character}->($self);
369     }
370    
371    
372     return ({type => 'character', data => '<>'});
373    
374     redo A;
375     } elsif ($self->{next_input_character} == 0x003F) { # ?
376 wakaba 1.3 $self->{parse_error}-> (type => 'pio');
377 wakaba 1.1 $self->{state} = 'bogus comment';
378     ## $self->{next_input_character} is intentionally left as is
379     redo A;
380     } else {
381 wakaba 1.3 $self->{parse_error}-> (type => 'bare stago');
382 wakaba 1.1 $self->{state} = 'data';
383     ## reconsume
384    
385     return ({type => 'character', data => '<'});
386    
387     redo A;
388     }
389     } else {
390     die "$0: $self->{content_model_flag}: Unknown content model flag";
391     }
392     } elsif ($self->{state} eq 'close tag open') {
393     if ($self->{content_model_flag} eq 'RCDATA' or
394     $self->{content_model_flag} eq 'CDATA') {
395 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
396 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
397 wakaba 1.23 my @next_char;
398     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
399     push @next_char, $self->{next_input_character};
400     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
401     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
402     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
403    
404 wakaba 1.1 if (@{$self->{char}}) {
405     $self->{next_input_character} = shift @{$self->{char}};
406     } else {
407     $self->{set_next_input_character}->($self);
408     }
409    
410 wakaba 1.23 next TAGNAME;
411     } else {
412     $self->{next_input_character} = shift @next_char; # reconsume
413     unshift @{$self->{char}}, (@next_char);
414     $self->{state} = 'data';
415    
416     return ({type => 'character', data => '</'});
417    
418     redo A;
419     }
420     }
421     push @next_char, $self->{next_input_character};
422    
423     unless ($self->{next_input_character} == 0x0009 or # HT
424     $self->{next_input_character} == 0x000A or # LF
425     $self->{next_input_character} == 0x000B or # VT
426     $self->{next_input_character} == 0x000C or # FF
427     $self->{next_input_character} == 0x0020 or # SP
428     $self->{next_input_character} == 0x003E or # >
429     $self->{next_input_character} == 0x002F or # /
430     $self->{next_input_character} == -1) {
431 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
432     unshift @{$self->{char}}, (@next_char);
433     $self->{state} = 'data';
434     return ({type => 'character', data => '</'});
435     redo A;
436 wakaba 1.23 } else {
437     $self->{next_input_character} = shift @next_char;
438     unshift @{$self->{char}}, (@next_char);
439     # and consume...
440 wakaba 1.1 }
441 wakaba 1.23 } else {
442     ## No start tag token has ever been emitted
443     # next-input-character is already done
444 wakaba 1.1 $self->{state} = 'data';
445     return ({type => 'character', data => '</'});
446     redo A;
447     }
448     }
449    
450     if (0x0041 <= $self->{next_input_character} and
451     $self->{next_input_character} <= 0x005A) { # A..Z
452     $self->{current_token} = {type => 'end tag',
453     tag_name => chr ($self->{next_input_character} + 0x0020)};
454     $self->{state} = 'tag name';
455    
456     if (@{$self->{char}}) {
457     $self->{next_input_character} = shift @{$self->{char}};
458     } else {
459     $self->{set_next_input_character}->($self);
460     }
461    
462     redo A;
463     } elsif (0x0061 <= $self->{next_input_character} and
464     $self->{next_input_character} <= 0x007A) { # a..z
465     $self->{current_token} = {type => 'end tag',
466     tag_name => chr ($self->{next_input_character})};
467     $self->{state} = 'tag name';
468    
469     if (@{$self->{char}}) {
470     $self->{next_input_character} = shift @{$self->{char}};
471     } else {
472     $self->{set_next_input_character}->($self);
473     }
474    
475     redo A;
476     } elsif ($self->{next_input_character} == 0x003E) { # >
477 wakaba 1.3 $self->{parse_error}-> (type => 'empty end tag');
478 wakaba 1.1 $self->{state} = 'data';
479    
480     if (@{$self->{char}}) {
481     $self->{next_input_character} = shift @{$self->{char}};
482     } else {
483     $self->{set_next_input_character}->($self);
484     }
485    
486     redo A;
487     } elsif ($self->{next_input_character} == -1) {
488 wakaba 1.3 $self->{parse_error}-> (type => 'bare etago');
489 wakaba 1.1 $self->{state} = 'data';
490     # reconsume
491    
492     return ({type => 'character', data => '</'});
493    
494     redo A;
495     } else {
496 wakaba 1.3 $self->{parse_error}-> (type => 'bogus end tag');
497 wakaba 1.1 $self->{state} = 'bogus comment';
498     ## $self->{next_input_character} is intentionally left as is
499     redo A;
500     }
501     } elsif ($self->{state} eq 'tag name') {
502     if ($self->{next_input_character} == 0x0009 or # HT
503     $self->{next_input_character} == 0x000A or # LF
504     $self->{next_input_character} == 0x000B or # VT
505     $self->{next_input_character} == 0x000C or # FF
506     $self->{next_input_character} == 0x0020) { # SP
507     $self->{state} = 'before attribute name';
508    
509     if (@{$self->{char}}) {
510     $self->{next_input_character} = shift @{$self->{char}};
511     } else {
512     $self->{set_next_input_character}->($self);
513     }
514    
515     redo A;
516     } elsif ($self->{next_input_character} == 0x003E) { # >
517     if ($self->{current_token}->{type} eq 'start tag') {
518 wakaba 1.28 $self->{current_token}->{first_start_tag}
519     = not defined $self->{last_emitted_start_tag_name};
520 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
521     } elsif ($self->{current_token}->{type} eq 'end tag') {
522     $self->{content_model_flag} = 'PCDATA'; # MUST
523     if ($self->{current_token}->{attributes}) {
524 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
525 wakaba 1.1 }
526     } else {
527     die "$0: $self->{current_token}->{type}: Unknown token type";
528     }
529     $self->{state} = 'data';
530    
531     if (@{$self->{char}}) {
532     $self->{next_input_character} = shift @{$self->{char}};
533     } else {
534     $self->{set_next_input_character}->($self);
535     }
536    
537    
538     return ($self->{current_token}); # start tag or end tag
539    
540     redo A;
541     } elsif (0x0041 <= $self->{next_input_character} and
542     $self->{next_input_character} <= 0x005A) { # A..Z
543     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
544     # start tag or end tag
545     ## Stay in this state
546    
547     if (@{$self->{char}}) {
548     $self->{next_input_character} = shift @{$self->{char}};
549     } else {
550     $self->{set_next_input_character}->($self);
551     }
552    
553     redo A;
554 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
555 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
556 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
557 wakaba 1.28 $self->{current_token}->{first_start_tag}
558     = not defined $self->{last_emitted_start_tag_name};
559 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
560     } elsif ($self->{current_token}->{type} eq 'end tag') {
561     $self->{content_model_flag} = 'PCDATA'; # MUST
562     if ($self->{current_token}->{attributes}) {
563 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
564 wakaba 1.1 }
565     } else {
566     die "$0: $self->{current_token}->{type}: Unknown token type";
567     }
568     $self->{state} = 'data';
569     # reconsume
570    
571     return ($self->{current_token}); # start tag or end tag
572    
573     redo A;
574     } elsif ($self->{next_input_character} == 0x002F) { # /
575    
576     if (@{$self->{char}}) {
577     $self->{next_input_character} = shift @{$self->{char}};
578     } else {
579     $self->{set_next_input_character}->($self);
580     }
581    
582     if ($self->{next_input_character} == 0x003E and # >
583     $self->{current_token}->{type} eq 'start tag' and
584     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
585     # permitted slash
586     #
587     } else {
588 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
589 wakaba 1.1 }
590     $self->{state} = 'before attribute name';
591     # next-input-character is already done
592     redo A;
593     } else {
594     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
595     # start tag or end tag
596     ## Stay in the state
597    
598     if (@{$self->{char}}) {
599     $self->{next_input_character} = shift @{$self->{char}};
600     } else {
601     $self->{set_next_input_character}->($self);
602     }
603    
604     redo A;
605     }
606     } elsif ($self->{state} eq 'before attribute name') {
607     if ($self->{next_input_character} == 0x0009 or # HT
608     $self->{next_input_character} == 0x000A or # LF
609     $self->{next_input_character} == 0x000B or # VT
610     $self->{next_input_character} == 0x000C or # FF
611     $self->{next_input_character} == 0x0020) { # SP
612     ## Stay in the state
613    
614     if (@{$self->{char}}) {
615     $self->{next_input_character} = shift @{$self->{char}};
616     } else {
617     $self->{set_next_input_character}->($self);
618     }
619    
620     redo A;
621     } elsif ($self->{next_input_character} == 0x003E) { # >
622     if ($self->{current_token}->{type} eq 'start tag') {
623 wakaba 1.28 $self->{current_token}->{first_start_tag}
624     = not defined $self->{last_emitted_start_tag_name};
625 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
626     } elsif ($self->{current_token}->{type} eq 'end tag') {
627     $self->{content_model_flag} = 'PCDATA'; # MUST
628     if ($self->{current_token}->{attributes}) {
629 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
630 wakaba 1.1 }
631     } else {
632     die "$0: $self->{current_token}->{type}: Unknown token type";
633     }
634     $self->{state} = 'data';
635    
636     if (@{$self->{char}}) {
637     $self->{next_input_character} = shift @{$self->{char}};
638     } else {
639     $self->{set_next_input_character}->($self);
640     }
641    
642    
643     return ($self->{current_token}); # start tag or end tag
644    
645     redo A;
646     } elsif (0x0041 <= $self->{next_input_character} and
647     $self->{next_input_character} <= 0x005A) { # A..Z
648     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
649     value => ''};
650     $self->{state} = 'attribute name';
651    
652     if (@{$self->{char}}) {
653     $self->{next_input_character} = shift @{$self->{char}};
654     } else {
655     $self->{set_next_input_character}->($self);
656     }
657    
658     redo A;
659     } elsif ($self->{next_input_character} == 0x002F) { # /
660    
661     if (@{$self->{char}}) {
662     $self->{next_input_character} = shift @{$self->{char}};
663     } else {
664     $self->{set_next_input_character}->($self);
665     }
666    
667     if ($self->{next_input_character} == 0x003E and # >
668     $self->{current_token}->{type} eq 'start tag' and
669     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
670     # permitted slash
671     #
672     } else {
673 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
674 wakaba 1.1 }
675     ## Stay in the state
676     # next-input-character is already done
677     redo A;
678 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
679 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
680 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
681 wakaba 1.28 $self->{current_token}->{first_start_tag}
682     = not defined $self->{last_emitted_start_tag_name};
683 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
684     } elsif ($self->{current_token}->{type} eq 'end tag') {
685     $self->{content_model_flag} = 'PCDATA'; # MUST
686     if ($self->{current_token}->{attributes}) {
687 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
688 wakaba 1.1 }
689     } else {
690     die "$0: $self->{current_token}->{type}: Unknown token type";
691     }
692     $self->{state} = 'data';
693     # reconsume
694    
695     return ($self->{current_token}); # start tag or end tag
696    
697     redo A;
698     } else {
699     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
700     value => ''};
701     $self->{state} = 'attribute name';
702    
703     if (@{$self->{char}}) {
704     $self->{next_input_character} = shift @{$self->{char}};
705     } else {
706     $self->{set_next_input_character}->($self);
707     }
708    
709     redo A;
710     }
711     } elsif ($self->{state} eq 'attribute name') {
712     my $before_leave = sub {
713     if (exists $self->{current_token}->{attributes} # start tag or end tag
714     ->{$self->{current_attribute}->{name}}) { # MUST
715 wakaba 1.3 $self->{parse_error}-> (type => 'dupulicate attribute');
716 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
717     } else {
718     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
719     = $self->{current_attribute};
720     }
721     }; # $before_leave
722    
723     if ($self->{next_input_character} == 0x0009 or # HT
724     $self->{next_input_character} == 0x000A or # LF
725     $self->{next_input_character} == 0x000B or # VT
726     $self->{next_input_character} == 0x000C or # FF
727     $self->{next_input_character} == 0x0020) { # SP
728     $before_leave->();
729     $self->{state} = 'after attribute name';
730    
731     if (@{$self->{char}}) {
732     $self->{next_input_character} = shift @{$self->{char}};
733     } else {
734     $self->{set_next_input_character}->($self);
735     }
736    
737     redo A;
738     } elsif ($self->{next_input_character} == 0x003D) { # =
739     $before_leave->();
740     $self->{state} = 'before attribute value';
741    
742     if (@{$self->{char}}) {
743     $self->{next_input_character} = shift @{$self->{char}};
744     } else {
745     $self->{set_next_input_character}->($self);
746     }
747    
748     redo A;
749     } elsif ($self->{next_input_character} == 0x003E) { # >
750     $before_leave->();
751     if ($self->{current_token}->{type} eq 'start tag') {
752 wakaba 1.28 $self->{current_token}->{first_start_tag}
753     = not defined $self->{last_emitted_start_tag_name};
754 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
755     } elsif ($self->{current_token}->{type} eq 'end tag') {
756     $self->{content_model_flag} = 'PCDATA'; # MUST
757     if ($self->{current_token}->{attributes}) {
758 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
759 wakaba 1.1 }
760     } else {
761     die "$0: $self->{current_token}->{type}: Unknown token type";
762     }
763     $self->{state} = 'data';
764    
765     if (@{$self->{char}}) {
766     $self->{next_input_character} = shift @{$self->{char}};
767     } else {
768     $self->{set_next_input_character}->($self);
769     }
770    
771    
772     return ($self->{current_token}); # start tag or end tag
773    
774     redo A;
775     } elsif (0x0041 <= $self->{next_input_character} and
776     $self->{next_input_character} <= 0x005A) { # A..Z
777     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
778     ## Stay in the state
779    
780     if (@{$self->{char}}) {
781     $self->{next_input_character} = shift @{$self->{char}};
782     } else {
783     $self->{set_next_input_character}->($self);
784     }
785    
786     redo A;
787     } elsif ($self->{next_input_character} == 0x002F) { # /
788     $before_leave->();
789    
790     if (@{$self->{char}}) {
791     $self->{next_input_character} = shift @{$self->{char}};
792     } else {
793     $self->{set_next_input_character}->($self);
794     }
795    
796     if ($self->{next_input_character} == 0x003E and # >
797     $self->{current_token}->{type} eq 'start tag' and
798     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
799     # permitted slash
800     #
801     } else {
802 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
803 wakaba 1.1 }
804     $self->{state} = 'before attribute name';
805     # next-input-character is already done
806     redo A;
807 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
808 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
809 wakaba 1.1 $before_leave->();
810     if ($self->{current_token}->{type} eq 'start tag') {
811 wakaba 1.28 $self->{current_token}->{first_start_tag}
812     = not defined $self->{last_emitted_start_tag_name};
813 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
814     } elsif ($self->{current_token}->{type} eq 'end tag') {
815     $self->{content_model_flag} = 'PCDATA'; # MUST
816     if ($self->{current_token}->{attributes}) {
817 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
818 wakaba 1.1 }
819     } else {
820     die "$0: $self->{current_token}->{type}: Unknown token type";
821     }
822     $self->{state} = 'data';
823     # reconsume
824    
825     return ($self->{current_token}); # start tag or end tag
826    
827     redo A;
828     } else {
829     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
830     ## Stay in the state
831    
832     if (@{$self->{char}}) {
833     $self->{next_input_character} = shift @{$self->{char}};
834     } else {
835     $self->{set_next_input_character}->($self);
836     }
837    
838     redo A;
839     }
840     } elsif ($self->{state} eq 'after attribute name') {
841     if ($self->{next_input_character} == 0x0009 or # HT
842     $self->{next_input_character} == 0x000A or # LF
843     $self->{next_input_character} == 0x000B or # VT
844     $self->{next_input_character} == 0x000C or # FF
845     $self->{next_input_character} == 0x0020) { # SP
846     ## Stay in the state
847    
848     if (@{$self->{char}}) {
849     $self->{next_input_character} = shift @{$self->{char}};
850     } else {
851     $self->{set_next_input_character}->($self);
852     }
853    
854     redo A;
855     } elsif ($self->{next_input_character} == 0x003D) { # =
856     $self->{state} = 'before attribute value';
857    
858     if (@{$self->{char}}) {
859     $self->{next_input_character} = shift @{$self->{char}};
860     } else {
861     $self->{set_next_input_character}->($self);
862     }
863    
864     redo A;
865     } elsif ($self->{next_input_character} == 0x003E) { # >
866     if ($self->{current_token}->{type} eq 'start tag') {
867 wakaba 1.28 $self->{current_token}->{first_start_tag}
868     = not defined $self->{last_emitted_start_tag_name};
869 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
870     } elsif ($self->{current_token}->{type} eq 'end tag') {
871     $self->{content_model_flag} = 'PCDATA'; # MUST
872     if ($self->{current_token}->{attributes}) {
873 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
874 wakaba 1.1 }
875     } else {
876     die "$0: $self->{current_token}->{type}: Unknown token type";
877     }
878     $self->{state} = 'data';
879    
880     if (@{$self->{char}}) {
881     $self->{next_input_character} = shift @{$self->{char}};
882     } else {
883     $self->{set_next_input_character}->($self);
884     }
885    
886    
887     return ($self->{current_token}); # start tag or end tag
888    
889     redo A;
890     } elsif (0x0041 <= $self->{next_input_character} and
891     $self->{next_input_character} <= 0x005A) { # A..Z
892     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
893     value => ''};
894     $self->{state} = 'attribute name';
895    
896     if (@{$self->{char}}) {
897     $self->{next_input_character} = shift @{$self->{char}};
898     } else {
899     $self->{set_next_input_character}->($self);
900     }
901    
902     redo A;
903     } elsif ($self->{next_input_character} == 0x002F) { # /
904    
905     if (@{$self->{char}}) {
906     $self->{next_input_character} = shift @{$self->{char}};
907     } else {
908     $self->{set_next_input_character}->($self);
909     }
910    
911     if ($self->{next_input_character} == 0x003E and # >
912     $self->{current_token}->{type} eq 'start tag' and
913     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
914     # permitted slash
915     #
916     } else {
917 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
918 wakaba 1.1 }
919     $self->{state} = 'before attribute name';
920     # next-input-character is already done
921     redo A;
922 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
923 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
924 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
925 wakaba 1.28 $self->{current_token}->{first_start_tag}
926     = not defined $self->{last_emitted_start_tag_name};
927 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
928     } elsif ($self->{current_token}->{type} eq 'end tag') {
929     $self->{content_model_flag} = 'PCDATA'; # MUST
930     if ($self->{current_token}->{attributes}) {
931 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
932 wakaba 1.1 }
933     } else {
934     die "$0: $self->{current_token}->{type}: Unknown token type";
935     }
936     $self->{state} = 'data';
937     # reconsume
938    
939     return ($self->{current_token}); # start tag or end tag
940    
941     redo A;
942     } else {
943     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
944     value => ''};
945     $self->{state} = 'attribute name';
946    
947     if (@{$self->{char}}) {
948     $self->{next_input_character} = shift @{$self->{char}};
949     } else {
950     $self->{set_next_input_character}->($self);
951     }
952    
953     redo A;
954     }
955     } elsif ($self->{state} eq 'before attribute value') {
956     if ($self->{next_input_character} == 0x0009 or # HT
957     $self->{next_input_character} == 0x000A or # LF
958     $self->{next_input_character} == 0x000B or # VT
959     $self->{next_input_character} == 0x000C or # FF
960     $self->{next_input_character} == 0x0020) { # SP
961     ## Stay in the state
962    
963     if (@{$self->{char}}) {
964     $self->{next_input_character} = shift @{$self->{char}};
965     } else {
966     $self->{set_next_input_character}->($self);
967     }
968    
969     redo A;
970     } elsif ($self->{next_input_character} == 0x0022) { # "
971     $self->{state} = 'attribute value (double-quoted)';
972    
973     if (@{$self->{char}}) {
974     $self->{next_input_character} = shift @{$self->{char}};
975     } else {
976     $self->{set_next_input_character}->($self);
977     }
978    
979     redo A;
980     } elsif ($self->{next_input_character} == 0x0026) { # &
981     $self->{state} = 'attribute value (unquoted)';
982     ## reconsume
983     redo A;
984     } elsif ($self->{next_input_character} == 0x0027) { # '
985     $self->{state} = 'attribute value (single-quoted)';
986    
987     if (@{$self->{char}}) {
988     $self->{next_input_character} = shift @{$self->{char}};
989     } else {
990     $self->{set_next_input_character}->($self);
991     }
992    
993     redo A;
994     } elsif ($self->{next_input_character} == 0x003E) { # >
995     if ($self->{current_token}->{type} eq 'start tag') {
996 wakaba 1.28 $self->{current_token}->{first_start_tag}
997     = not defined $self->{last_emitted_start_tag_name};
998 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
999     } elsif ($self->{current_token}->{type} eq 'end tag') {
1000     $self->{content_model_flag} = 'PCDATA'; # MUST
1001     if ($self->{current_token}->{attributes}) {
1002 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1003 wakaba 1.1 }
1004     } else {
1005     die "$0: $self->{current_token}->{type}: Unknown token type";
1006     }
1007     $self->{state} = 'data';
1008    
1009     if (@{$self->{char}}) {
1010     $self->{next_input_character} = shift @{$self->{char}};
1011     } else {
1012     $self->{set_next_input_character}->($self);
1013     }
1014    
1015    
1016     return ($self->{current_token}); # start tag or end tag
1017    
1018     redo A;
1019 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
1020 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
1021 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1022 wakaba 1.28 $self->{current_token}->{first_start_tag}
1023     = not defined $self->{last_emitted_start_tag_name};
1024 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1025     } elsif ($self->{current_token}->{type} eq 'end tag') {
1026     $self->{content_model_flag} = 'PCDATA'; # MUST
1027     if ($self->{current_token}->{attributes}) {
1028 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1029 wakaba 1.1 }
1030     } else {
1031     die "$0: $self->{current_token}->{type}: Unknown token type";
1032     }
1033     $self->{state} = 'data';
1034     ## reconsume
1035    
1036     return ($self->{current_token}); # start tag or end tag
1037    
1038     redo A;
1039     } else {
1040     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1041     $self->{state} = 'attribute value (unquoted)';
1042    
1043     if (@{$self->{char}}) {
1044     $self->{next_input_character} = shift @{$self->{char}};
1045     } else {
1046     $self->{set_next_input_character}->($self);
1047     }
1048    
1049     redo A;
1050     }
1051     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
1052     if ($self->{next_input_character} == 0x0022) { # "
1053     $self->{state} = 'before attribute name';
1054    
1055     if (@{$self->{char}}) {
1056     $self->{next_input_character} = shift @{$self->{char}};
1057     } else {
1058     $self->{set_next_input_character}->($self);
1059     }
1060    
1061     redo A;
1062     } elsif ($self->{next_input_character} == 0x0026) { # &
1063     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
1064     $self->{state} = 'entity in attribute value';
1065    
1066     if (@{$self->{char}}) {
1067     $self->{next_input_character} = shift @{$self->{char}};
1068     } else {
1069     $self->{set_next_input_character}->($self);
1070     }
1071    
1072     redo A;
1073     } elsif ($self->{next_input_character} == -1) {
1074 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1075 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1076 wakaba 1.28 $self->{current_token}->{first_start_tag}
1077     = not defined $self->{last_emitted_start_tag_name};
1078 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1079     } elsif ($self->{current_token}->{type} eq 'end tag') {
1080     $self->{content_model_flag} = 'PCDATA'; # MUST
1081     if ($self->{current_token}->{attributes}) {
1082 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1083 wakaba 1.1 }
1084     } else {
1085     die "$0: $self->{current_token}->{type}: Unknown token type";
1086     }
1087     $self->{state} = 'data';
1088     ## reconsume
1089    
1090     return ($self->{current_token}); # start tag or end tag
1091    
1092     redo A;
1093     } else {
1094     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1095     ## Stay in the state
1096    
1097     if (@{$self->{char}}) {
1098     $self->{next_input_character} = shift @{$self->{char}};
1099     } else {
1100     $self->{set_next_input_character}->($self);
1101     }
1102    
1103     redo A;
1104     }
1105     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1106     if ($self->{next_input_character} == 0x0027) { # '
1107     $self->{state} = 'before attribute name';
1108    
1109     if (@{$self->{char}}) {
1110     $self->{next_input_character} = shift @{$self->{char}};
1111     } else {
1112     $self->{set_next_input_character}->($self);
1113     }
1114    
1115     redo A;
1116     } elsif ($self->{next_input_character} == 0x0026) { # &
1117     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1118     $self->{state} = 'entity in attribute value';
1119    
1120     if (@{$self->{char}}) {
1121     $self->{next_input_character} = shift @{$self->{char}};
1122     } else {
1123     $self->{set_next_input_character}->($self);
1124     }
1125    
1126     redo A;
1127     } elsif ($self->{next_input_character} == -1) {
1128 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1129 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1130 wakaba 1.28 $self->{current_token}->{first_start_tag}
1131     = not defined $self->{last_emitted_start_tag_name};
1132 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1133     } elsif ($self->{current_token}->{type} eq 'end tag') {
1134     $self->{content_model_flag} = 'PCDATA'; # MUST
1135     if ($self->{current_token}->{attributes}) {
1136 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1137 wakaba 1.1 }
1138     } else {
1139     die "$0: $self->{current_token}->{type}: Unknown token type";
1140     }
1141     $self->{state} = 'data';
1142     ## reconsume
1143    
1144     return ($self->{current_token}); # start tag or end tag
1145    
1146     redo A;
1147     } else {
1148     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1149     ## Stay in the state
1150    
1151     if (@{$self->{char}}) {
1152     $self->{next_input_character} = shift @{$self->{char}};
1153     } else {
1154     $self->{set_next_input_character}->($self);
1155     }
1156    
1157     redo A;
1158     }
1159     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1160     if ($self->{next_input_character} == 0x0009 or # HT
1161     $self->{next_input_character} == 0x000A or # LF
1162     $self->{next_input_character} == 0x000B or # HT
1163     $self->{next_input_character} == 0x000C or # FF
1164     $self->{next_input_character} == 0x0020) { # SP
1165     $self->{state} = 'before attribute name';
1166    
1167     if (@{$self->{char}}) {
1168     $self->{next_input_character} = shift @{$self->{char}};
1169     } else {
1170     $self->{set_next_input_character}->($self);
1171     }
1172    
1173     redo A;
1174     } elsif ($self->{next_input_character} == 0x0026) { # &
1175     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1176     $self->{state} = 'entity in attribute value';
1177    
1178     if (@{$self->{char}}) {
1179     $self->{next_input_character} = shift @{$self->{char}};
1180     } else {
1181     $self->{set_next_input_character}->($self);
1182     }
1183    
1184     redo A;
1185     } elsif ($self->{next_input_character} == 0x003E) { # >
1186     if ($self->{current_token}->{type} eq 'start tag') {
1187 wakaba 1.28 $self->{current_token}->{first_start_tag}
1188     = not defined $self->{last_emitted_start_tag_name};
1189 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1190     } elsif ($self->{current_token}->{type} eq 'end tag') {
1191     $self->{content_model_flag} = 'PCDATA'; # MUST
1192     if ($self->{current_token}->{attributes}) {
1193 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1194 wakaba 1.1 }
1195     } else {
1196     die "$0: $self->{current_token}->{type}: Unknown token type";
1197     }
1198     $self->{state} = 'data';
1199    
1200     if (@{$self->{char}}) {
1201     $self->{next_input_character} = shift @{$self->{char}};
1202     } else {
1203     $self->{set_next_input_character}->($self);
1204     }
1205    
1206    
1207     return ($self->{current_token}); # start tag or end tag
1208    
1209     redo A;
1210 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
1211 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
1212 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1213 wakaba 1.28 $self->{current_token}->{first_start_tag}
1214     = not defined $self->{last_emitted_start_tag_name};
1215 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1216     } elsif ($self->{current_token}->{type} eq 'end tag') {
1217     $self->{content_model_flag} = 'PCDATA'; # MUST
1218     if ($self->{current_token}->{attributes}) {
1219 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1220 wakaba 1.1 }
1221     } else {
1222     die "$0: $self->{current_token}->{type}: Unknown token type";
1223     }
1224     $self->{state} = 'data';
1225     ## reconsume
1226    
1227     return ($self->{current_token}); # start tag or end tag
1228    
1229     redo A;
1230     } else {
1231     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1232     ## Stay in the state
1233    
1234     if (@{$self->{char}}) {
1235     $self->{next_input_character} = shift @{$self->{char}};
1236     } else {
1237     $self->{set_next_input_character}->($self);
1238     }
1239    
1240     redo A;
1241     }
1242     } elsif ($self->{state} eq 'entity in attribute value') {
1243 wakaba 1.26 my $token = $self->_tokenize_attempt_to_consume_an_entity (1);
1244 wakaba 1.1
1245     unless (defined $token) {
1246     $self->{current_attribute}->{value} .= '&';
1247     } else {
1248     $self->{current_attribute}->{value} .= $token->{data};
1249     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1250     }
1251    
1252     $self->{state} = $self->{last_attribute_value_state};
1253     # next-input-character is already done
1254     redo A;
1255     } elsif ($self->{state} eq 'bogus comment') {
1256     ## (only happen if PCDATA state)
1257    
1258     my $token = {type => 'comment', data => ''};
1259    
1260     BC: {
1261     if ($self->{next_input_character} == 0x003E) { # >
1262     $self->{state} = 'data';
1263    
1264     if (@{$self->{char}}) {
1265     $self->{next_input_character} = shift @{$self->{char}};
1266     } else {
1267     $self->{set_next_input_character}->($self);
1268     }
1269    
1270    
1271     return ($token);
1272    
1273     redo A;
1274     } elsif ($self->{next_input_character} == -1) {
1275     $self->{state} = 'data';
1276     ## reconsume
1277    
1278     return ($token);
1279    
1280     redo A;
1281     } else {
1282     $token->{data} .= chr ($self->{next_input_character});
1283    
1284     if (@{$self->{char}}) {
1285     $self->{next_input_character} = shift @{$self->{char}};
1286     } else {
1287     $self->{set_next_input_character}->($self);
1288     }
1289    
1290     redo BC;
1291     }
1292     } # BC
1293     } elsif ($self->{state} eq 'markup declaration open') {
1294     ## (only happen if PCDATA state)
1295    
1296     my @next_char;
1297     push @next_char, $self->{next_input_character};
1298    
1299     if ($self->{next_input_character} == 0x002D) { # -
1300    
1301     if (@{$self->{char}}) {
1302     $self->{next_input_character} = shift @{$self->{char}};
1303     } else {
1304     $self->{set_next_input_character}->($self);
1305     }
1306    
1307     push @next_char, $self->{next_input_character};
1308     if ($self->{next_input_character} == 0x002D) { # -
1309     $self->{current_token} = {type => 'comment', data => ''};
1310 wakaba 1.23 $self->{state} = 'comment start';
1311 wakaba 1.1
1312     if (@{$self->{char}}) {
1313     $self->{next_input_character} = shift @{$self->{char}};
1314     } else {
1315     $self->{set_next_input_character}->($self);
1316     }
1317    
1318     redo A;
1319     }
1320     } elsif ($self->{next_input_character} == 0x0044 or # D
1321     $self->{next_input_character} == 0x0064) { # d
1322    
1323     if (@{$self->{char}}) {
1324     $self->{next_input_character} = shift @{$self->{char}};
1325     } else {
1326     $self->{set_next_input_character}->($self);
1327     }
1328    
1329     push @next_char, $self->{next_input_character};
1330     if ($self->{next_input_character} == 0x004F or # O
1331     $self->{next_input_character} == 0x006F) { # o
1332    
1333     if (@{$self->{char}}) {
1334     $self->{next_input_character} = shift @{$self->{char}};
1335     } else {
1336     $self->{set_next_input_character}->($self);
1337     }
1338    
1339     push @next_char, $self->{next_input_character};
1340     if ($self->{next_input_character} == 0x0043 or # C
1341     $self->{next_input_character} == 0x0063) { # c
1342    
1343     if (@{$self->{char}}) {
1344     $self->{next_input_character} = shift @{$self->{char}};
1345     } else {
1346     $self->{set_next_input_character}->($self);
1347     }
1348    
1349     push @next_char, $self->{next_input_character};
1350     if ($self->{next_input_character} == 0x0054 or # T
1351     $self->{next_input_character} == 0x0074) { # t
1352    
1353     if (@{$self->{char}}) {
1354     $self->{next_input_character} = shift @{$self->{char}};
1355     } else {
1356     $self->{set_next_input_character}->($self);
1357     }
1358    
1359     push @next_char, $self->{next_input_character};
1360     if ($self->{next_input_character} == 0x0059 or # Y
1361     $self->{next_input_character} == 0x0079) { # y
1362    
1363     if (@{$self->{char}}) {
1364     $self->{next_input_character} = shift @{$self->{char}};
1365     } else {
1366     $self->{set_next_input_character}->($self);
1367     }
1368    
1369     push @next_char, $self->{next_input_character};
1370     if ($self->{next_input_character} == 0x0050 or # P
1371     $self->{next_input_character} == 0x0070) { # p
1372    
1373     if (@{$self->{char}}) {
1374     $self->{next_input_character} = shift @{$self->{char}};
1375     } else {
1376     $self->{set_next_input_character}->($self);
1377     }
1378    
1379     push @next_char, $self->{next_input_character};
1380     if ($self->{next_input_character} == 0x0045 or # E
1381     $self->{next_input_character} == 0x0065) { # e
1382     ## ISSUE: What a stupid code this is!
1383     $self->{state} = 'DOCTYPE';
1384    
1385     if (@{$self->{char}}) {
1386     $self->{next_input_character} = shift @{$self->{char}};
1387     } else {
1388     $self->{set_next_input_character}->($self);
1389     }
1390    
1391     redo A;
1392     }
1393     }
1394     }
1395     }
1396     }
1397     }
1398     }
1399    
1400 wakaba 1.30 $self->{parse_error}-> (type => 'bogus comment');
1401 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1402     unshift @{$self->{char}}, (@next_char);
1403     $self->{state} = 'bogus comment';
1404     redo A;
1405    
1406     ## ISSUE: typos in spec: chacacters, is is a parse error
1407     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1408 wakaba 1.23 } elsif ($self->{state} eq 'comment start') {
1409     if ($self->{next_input_character} == 0x002D) { # -
1410     $self->{state} = 'comment start dash';
1411    
1412     if (@{$self->{char}}) {
1413     $self->{next_input_character} = shift @{$self->{char}};
1414     } else {
1415     $self->{set_next_input_character}->($self);
1416     }
1417    
1418     redo A;
1419     } elsif ($self->{next_input_character} == 0x003E) { # >
1420     $self->{parse_error}-> (type => 'bogus comment');
1421     $self->{state} = 'data';
1422    
1423     if (@{$self->{char}}) {
1424     $self->{next_input_character} = shift @{$self->{char}};
1425     } else {
1426     $self->{set_next_input_character}->($self);
1427     }
1428    
1429    
1430     return ($self->{current_token}); # comment
1431    
1432     redo A;
1433     } elsif ($self->{next_input_character} == -1) {
1434     $self->{parse_error}-> (type => 'unclosed comment');
1435     $self->{state} = 'data';
1436     ## reconsume
1437    
1438     return ($self->{current_token}); # comment
1439    
1440     redo A;
1441     } else {
1442     $self->{current_token}->{data} # comment
1443     .= chr ($self->{next_input_character});
1444     $self->{state} = 'comment';
1445    
1446     if (@{$self->{char}}) {
1447     $self->{next_input_character} = shift @{$self->{char}};
1448     } else {
1449     $self->{set_next_input_character}->($self);
1450     }
1451    
1452     redo A;
1453     }
1454     } elsif ($self->{state} eq 'comment start dash') {
1455     if ($self->{next_input_character} == 0x002D) { # -
1456     $self->{state} = 'comment end';
1457    
1458     if (@{$self->{char}}) {
1459     $self->{next_input_character} = shift @{$self->{char}};
1460     } else {
1461     $self->{set_next_input_character}->($self);
1462     }
1463    
1464     redo A;
1465     } elsif ($self->{next_input_character} == 0x003E) { # >
1466     $self->{parse_error}-> (type => 'bogus comment');
1467     $self->{state} = 'data';
1468    
1469     if (@{$self->{char}}) {
1470     $self->{next_input_character} = shift @{$self->{char}};
1471     } else {
1472     $self->{set_next_input_character}->($self);
1473     }
1474    
1475    
1476     return ($self->{current_token}); # comment
1477    
1478     redo A;
1479     } elsif ($self->{next_input_character} == -1) {
1480     $self->{parse_error}-> (type => 'unclosed comment');
1481     $self->{state} = 'data';
1482     ## reconsume
1483    
1484     return ($self->{current_token}); # comment
1485    
1486     redo A;
1487     } else {
1488     $self->{current_token}->{data} # comment
1489     .= chr ($self->{next_input_character});
1490     $self->{state} = 'comment';
1491    
1492     if (@{$self->{char}}) {
1493     $self->{next_input_character} = shift @{$self->{char}};
1494     } else {
1495     $self->{set_next_input_character}->($self);
1496     }
1497    
1498     redo A;
1499     }
1500 wakaba 1.1 } elsif ($self->{state} eq 'comment') {
1501     if ($self->{next_input_character} == 0x002D) { # -
1502 wakaba 1.23 $self->{state} = 'comment end dash';
1503 wakaba 1.1
1504     if (@{$self->{char}}) {
1505     $self->{next_input_character} = shift @{$self->{char}};
1506     } else {
1507     $self->{set_next_input_character}->($self);
1508     }
1509    
1510     redo A;
1511     } elsif ($self->{next_input_character} == -1) {
1512 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1513 wakaba 1.1 $self->{state} = 'data';
1514     ## reconsume
1515    
1516     return ($self->{current_token}); # comment
1517    
1518     redo A;
1519     } else {
1520     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1521     ## Stay in the state
1522    
1523     if (@{$self->{char}}) {
1524     $self->{next_input_character} = shift @{$self->{char}};
1525     } else {
1526     $self->{set_next_input_character}->($self);
1527     }
1528    
1529     redo A;
1530     }
1531 wakaba 1.23 } elsif ($self->{state} eq 'comment end dash') {
1532 wakaba 1.1 if ($self->{next_input_character} == 0x002D) { # -
1533     $self->{state} = 'comment end';
1534    
1535     if (@{$self->{char}}) {
1536     $self->{next_input_character} = shift @{$self->{char}};
1537     } else {
1538     $self->{set_next_input_character}->($self);
1539     }
1540    
1541     redo A;
1542     } elsif ($self->{next_input_character} == -1) {
1543 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1544 wakaba 1.1 $self->{state} = 'data';
1545     ## reconsume
1546    
1547     return ($self->{current_token}); # comment
1548    
1549     redo A;
1550     } else {
1551     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1552     $self->{state} = 'comment';
1553    
1554     if (@{$self->{char}}) {
1555     $self->{next_input_character} = shift @{$self->{char}};
1556     } else {
1557     $self->{set_next_input_character}->($self);
1558     }
1559    
1560     redo A;
1561     }
1562     } elsif ($self->{state} eq 'comment end') {
1563     if ($self->{next_input_character} == 0x003E) { # >
1564     $self->{state} = 'data';
1565    
1566     if (@{$self->{char}}) {
1567     $self->{next_input_character} = shift @{$self->{char}};
1568     } else {
1569     $self->{set_next_input_character}->($self);
1570     }
1571    
1572    
1573     return ($self->{current_token}); # comment
1574    
1575     redo A;
1576     } elsif ($self->{next_input_character} == 0x002D) { # -
1577 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1578 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1579     ## Stay in the state
1580    
1581     if (@{$self->{char}}) {
1582     $self->{next_input_character} = shift @{$self->{char}};
1583     } else {
1584     $self->{set_next_input_character}->($self);
1585     }
1586    
1587     redo A;
1588     } elsif ($self->{next_input_character} == -1) {
1589 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1590 wakaba 1.1 $self->{state} = 'data';
1591     ## reconsume
1592    
1593     return ($self->{current_token}); # comment
1594    
1595     redo A;
1596     } else {
1597 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1598 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1599     $self->{state} = 'comment';
1600    
1601     if (@{$self->{char}}) {
1602     $self->{next_input_character} = shift @{$self->{char}};
1603     } else {
1604     $self->{set_next_input_character}->($self);
1605     }
1606    
1607     redo A;
1608     }
1609     } elsif ($self->{state} eq 'DOCTYPE') {
1610     if ($self->{next_input_character} == 0x0009 or # HT
1611     $self->{next_input_character} == 0x000A or # LF
1612     $self->{next_input_character} == 0x000B or # VT
1613     $self->{next_input_character} == 0x000C or # FF
1614     $self->{next_input_character} == 0x0020) { # SP
1615     $self->{state} = 'before DOCTYPE name';
1616    
1617     if (@{$self->{char}}) {
1618     $self->{next_input_character} = shift @{$self->{char}};
1619     } else {
1620     $self->{set_next_input_character}->($self);
1621     }
1622    
1623     redo A;
1624     } else {
1625 wakaba 1.3 $self->{parse_error}-> (type => 'no space before DOCTYPE name');
1626 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1627     ## reconsume
1628     redo A;
1629     }
1630     } elsif ($self->{state} eq 'before DOCTYPE name') {
1631     if ($self->{next_input_character} == 0x0009 or # HT
1632     $self->{next_input_character} == 0x000A or # LF
1633     $self->{next_input_character} == 0x000B or # VT
1634     $self->{next_input_character} == 0x000C or # FF
1635     $self->{next_input_character} == 0x0020) { # SP
1636     ## Stay in the state
1637    
1638     if (@{$self->{char}}) {
1639     $self->{next_input_character} = shift @{$self->{char}};
1640     } else {
1641     $self->{set_next_input_character}->($self);
1642     }
1643    
1644     redo A;
1645 wakaba 1.18 } elsif ($self->{next_input_character} == 0x003E) { # >
1646     $self->{parse_error}-> (type => 'no DOCTYPE name');
1647     $self->{state} = 'data';
1648    
1649     if (@{$self->{char}}) {
1650     $self->{next_input_character} = shift @{$self->{char}};
1651     } else {
1652     $self->{set_next_input_character}->($self);
1653     }
1654    
1655    
1656     return ({type => 'DOCTYPE'}); # incorrect
1657    
1658     redo A;
1659     } elsif ($self->{next_input_character} == -1) {
1660     $self->{parse_error}-> (type => 'no DOCTYPE name');
1661     $self->{state} = 'data';
1662     ## reconsume
1663    
1664     return ({type => 'DOCTYPE'}); # incorrect
1665    
1666     redo A;
1667     } else {
1668     $self->{current_token}
1669     = {type => 'DOCTYPE',
1670     name => chr ($self->{next_input_character}),
1671     correct => 1};
1672 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1673 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1674    
1675     if (@{$self->{char}}) {
1676     $self->{next_input_character} = shift @{$self->{char}};
1677     } else {
1678     $self->{set_next_input_character}->($self);
1679     }
1680    
1681     redo A;
1682 wakaba 1.18 }
1683     } elsif ($self->{state} eq 'DOCTYPE name') {
1684     ## ISSUE: Redundant "First," in the spec.
1685     if ($self->{next_input_character} == 0x0009 or # HT
1686     $self->{next_input_character} == 0x000A or # LF
1687     $self->{next_input_character} == 0x000B or # VT
1688     $self->{next_input_character} == 0x000C or # FF
1689     $self->{next_input_character} == 0x0020) { # SP
1690     $self->{state} = 'after DOCTYPE name';
1691    
1692     if (@{$self->{char}}) {
1693     $self->{next_input_character} = shift @{$self->{char}};
1694     } else {
1695     $self->{set_next_input_character}->($self);
1696     }
1697    
1698     redo A;
1699 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
1700     $self->{state} = 'data';
1701    
1702     if (@{$self->{char}}) {
1703     $self->{next_input_character} = shift @{$self->{char}};
1704     } else {
1705     $self->{set_next_input_character}->($self);
1706     }
1707    
1708    
1709 wakaba 1.18 return ($self->{current_token}); # DOCTYPE
1710 wakaba 1.1
1711     redo A;
1712 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1713     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1714 wakaba 1.1 $self->{state} = 'data';
1715     ## reconsume
1716    
1717 wakaba 1.18 delete $self->{current_token}->{correct};
1718     return ($self->{current_token}); # DOCTYPE
1719 wakaba 1.1
1720     redo A;
1721     } else {
1722 wakaba 1.18 $self->{current_token}->{name}
1723     .= chr ($self->{next_input_character}); # DOCTYPE
1724     ## Stay in the state
1725 wakaba 1.1
1726     if (@{$self->{char}}) {
1727     $self->{next_input_character} = shift @{$self->{char}};
1728     } else {
1729     $self->{set_next_input_character}->($self);
1730     }
1731    
1732     redo A;
1733     }
1734 wakaba 1.18 } elsif ($self->{state} eq 'after DOCTYPE name') {
1735 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1736     $self->{next_input_character} == 0x000A or # LF
1737     $self->{next_input_character} == 0x000B or # VT
1738     $self->{next_input_character} == 0x000C or # FF
1739     $self->{next_input_character} == 0x0020) { # SP
1740 wakaba 1.18 ## Stay in the state
1741 wakaba 1.1
1742     if (@{$self->{char}}) {
1743     $self->{next_input_character} = shift @{$self->{char}};
1744     } else {
1745     $self->{set_next_input_character}->($self);
1746     }
1747    
1748     redo A;
1749     } elsif ($self->{next_input_character} == 0x003E) { # >
1750     $self->{state} = 'data';
1751    
1752     if (@{$self->{char}}) {
1753     $self->{next_input_character} = shift @{$self->{char}};
1754     } else {
1755     $self->{set_next_input_character}->($self);
1756     }
1757    
1758    
1759     return ($self->{current_token}); # DOCTYPE
1760    
1761     redo A;
1762 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1763     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1764     $self->{state} = 'data';
1765     ## reconsume
1766    
1767     delete $self->{current_token}->{correct};
1768     return ($self->{current_token}); # DOCTYPE
1769    
1770     redo A;
1771     } elsif ($self->{next_input_character} == 0x0050 or # P
1772     $self->{next_input_character} == 0x0070) { # p
1773    
1774     if (@{$self->{char}}) {
1775     $self->{next_input_character} = shift @{$self->{char}};
1776     } else {
1777     $self->{set_next_input_character}->($self);
1778     }
1779    
1780     if ($self->{next_input_character} == 0x0055 or # U
1781     $self->{next_input_character} == 0x0075) { # u
1782    
1783     if (@{$self->{char}}) {
1784     $self->{next_input_character} = shift @{$self->{char}};
1785     } else {
1786     $self->{set_next_input_character}->($self);
1787     }
1788    
1789     if ($self->{next_input_character} == 0x0042 or # B
1790     $self->{next_input_character} == 0x0062) { # b
1791    
1792     if (@{$self->{char}}) {
1793     $self->{next_input_character} = shift @{$self->{char}};
1794     } else {
1795     $self->{set_next_input_character}->($self);
1796     }
1797    
1798     if ($self->{next_input_character} == 0x004C or # L
1799     $self->{next_input_character} == 0x006C) { # l
1800    
1801     if (@{$self->{char}}) {
1802     $self->{next_input_character} = shift @{$self->{char}};
1803     } else {
1804     $self->{set_next_input_character}->($self);
1805     }
1806    
1807     if ($self->{next_input_character} == 0x0049 or # I
1808     $self->{next_input_character} == 0x0069) { # i
1809    
1810     if (@{$self->{char}}) {
1811     $self->{next_input_character} = shift @{$self->{char}};
1812     } else {
1813     $self->{set_next_input_character}->($self);
1814     }
1815    
1816     if ($self->{next_input_character} == 0x0043 or # C
1817     $self->{next_input_character} == 0x0063) { # c
1818     $self->{state} = 'before DOCTYPE public identifier';
1819    
1820     if (@{$self->{char}}) {
1821     $self->{next_input_character} = shift @{$self->{char}};
1822     } else {
1823     $self->{set_next_input_character}->($self);
1824     }
1825    
1826     redo A;
1827     }
1828     }
1829     }
1830     }
1831     }
1832    
1833     #
1834     } elsif ($self->{next_input_character} == 0x0053 or # S
1835     $self->{next_input_character} == 0x0073) { # s
1836    
1837     if (@{$self->{char}}) {
1838     $self->{next_input_character} = shift @{$self->{char}};
1839     } else {
1840     $self->{set_next_input_character}->($self);
1841     }
1842    
1843     if ($self->{next_input_character} == 0x0059 or # Y
1844     $self->{next_input_character} == 0x0079) { # y
1845    
1846     if (@{$self->{char}}) {
1847     $self->{next_input_character} = shift @{$self->{char}};
1848     } else {
1849     $self->{set_next_input_character}->($self);
1850     }
1851    
1852     if ($self->{next_input_character} == 0x0053 or # S
1853     $self->{next_input_character} == 0x0073) { # s
1854    
1855     if (@{$self->{char}}) {
1856     $self->{next_input_character} = shift @{$self->{char}};
1857     } else {
1858     $self->{set_next_input_character}->($self);
1859     }
1860    
1861     if ($self->{next_input_character} == 0x0054 or # T
1862     $self->{next_input_character} == 0x0074) { # t
1863    
1864     if (@{$self->{char}}) {
1865     $self->{next_input_character} = shift @{$self->{char}};
1866     } else {
1867     $self->{set_next_input_character}->($self);
1868     }
1869    
1870     if ($self->{next_input_character} == 0x0045 or # E
1871     $self->{next_input_character} == 0x0065) { # e
1872    
1873     if (@{$self->{char}}) {
1874     $self->{next_input_character} = shift @{$self->{char}};
1875     } else {
1876     $self->{set_next_input_character}->($self);
1877     }
1878    
1879     if ($self->{next_input_character} == 0x004D or # M
1880     $self->{next_input_character} == 0x006D) { # m
1881     $self->{state} = 'before DOCTYPE system identifier';
1882    
1883     if (@{$self->{char}}) {
1884     $self->{next_input_character} = shift @{$self->{char}};
1885     } else {
1886     $self->{set_next_input_character}->($self);
1887     }
1888    
1889     redo A;
1890     }
1891     }
1892     }
1893     }
1894     }
1895    
1896     #
1897     } else {
1898    
1899     if (@{$self->{char}}) {
1900     $self->{next_input_character} = shift @{$self->{char}};
1901     } else {
1902     $self->{set_next_input_character}->($self);
1903     }
1904    
1905     #
1906     }
1907    
1908     $self->{parse_error}-> (type => 'string after DOCTYPE name');
1909     $self->{state} = 'bogus DOCTYPE';
1910     # next-input-character is already done
1911     redo A;
1912     } elsif ($self->{state} eq 'before DOCTYPE public identifier') {
1913     if ({
1914     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1915     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1916     }->{$self->{next_input_character}}) {
1917 wakaba 1.1 ## Stay in the state
1918    
1919     if (@{$self->{char}}) {
1920     $self->{next_input_character} = shift @{$self->{char}};
1921     } else {
1922     $self->{set_next_input_character}->($self);
1923     }
1924    
1925     redo A;
1926 wakaba 1.18 } elsif ($self->{next_input_character} eq 0x0022) { # "
1927     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1928     $self->{state} = 'DOCTYPE public identifier (double-quoted)';
1929    
1930     if (@{$self->{char}}) {
1931     $self->{next_input_character} = shift @{$self->{char}};
1932     } else {
1933     $self->{set_next_input_character}->($self);
1934     }
1935    
1936     redo A;
1937     } elsif ($self->{next_input_character} eq 0x0027) { # '
1938     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1939     $self->{state} = 'DOCTYPE public identifier (single-quoted)';
1940    
1941     if (@{$self->{char}}) {
1942     $self->{next_input_character} = shift @{$self->{char}};
1943     } else {
1944     $self->{set_next_input_character}->($self);
1945     }
1946    
1947     redo A;
1948     } elsif ($self->{next_input_character} eq 0x003E) { # >
1949     $self->{parse_error}-> (type => 'no PUBLIC literal');
1950    
1951     $self->{state} = 'data';
1952    
1953     if (@{$self->{char}}) {
1954     $self->{next_input_character} = shift @{$self->{char}};
1955     } else {
1956     $self->{set_next_input_character}->($self);
1957     }
1958    
1959    
1960     delete $self->{current_token}->{correct};
1961     return ($self->{current_token}); # DOCTYPE
1962    
1963     redo A;
1964 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
1965 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1966 wakaba 1.18
1967 wakaba 1.1 $self->{state} = 'data';
1968     ## reconsume
1969    
1970 wakaba 1.18 delete $self->{current_token}->{correct};
1971     return ($self->{current_token}); # DOCTYPE
1972 wakaba 1.1
1973     redo A;
1974     } else {
1975 wakaba 1.18 $self->{parse_error}-> (type => 'string after PUBLIC');
1976     $self->{state} = 'bogus DOCTYPE';
1977    
1978     if (@{$self->{char}}) {
1979     $self->{next_input_character} = shift @{$self->{char}};
1980     } else {
1981     $self->{set_next_input_character}->($self);
1982     }
1983    
1984     redo A;
1985     }
1986     } elsif ($self->{state} eq 'DOCTYPE public identifier (double-quoted)') {
1987     if ($self->{next_input_character} == 0x0022) { # "
1988     $self->{state} = 'after DOCTYPE public identifier';
1989    
1990     if (@{$self->{char}}) {
1991     $self->{next_input_character} = shift @{$self->{char}};
1992     } else {
1993     $self->{set_next_input_character}->($self);
1994     }
1995    
1996     redo A;
1997     } elsif ($self->{next_input_character} == -1) {
1998     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
1999    
2000     $self->{state} = 'data';
2001     ## reconsume
2002    
2003     delete $self->{current_token}->{correct};
2004     return ($self->{current_token}); # DOCTYPE
2005    
2006     redo A;
2007     } else {
2008     $self->{current_token}->{public_identifier} # DOCTYPE
2009     .= chr $self->{next_input_character};
2010     ## Stay in the state
2011    
2012     if (@{$self->{char}}) {
2013     $self->{next_input_character} = shift @{$self->{char}};
2014     } else {
2015     $self->{set_next_input_character}->($self);
2016     }
2017    
2018     redo A;
2019     }
2020     } elsif ($self->{state} eq 'DOCTYPE public identifier (single-quoted)') {
2021     if ($self->{next_input_character} == 0x0027) { # '
2022     $self->{state} = 'after DOCTYPE public identifier';
2023    
2024     if (@{$self->{char}}) {
2025     $self->{next_input_character} = shift @{$self->{char}};
2026     } else {
2027     $self->{set_next_input_character}->($self);
2028     }
2029    
2030     redo A;
2031     } elsif ($self->{next_input_character} == -1) {
2032     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
2033    
2034     $self->{state} = 'data';
2035     ## reconsume
2036    
2037     delete $self->{current_token}->{correct};
2038     return ($self->{current_token}); # DOCTYPE
2039    
2040     redo A;
2041     } else {
2042     $self->{current_token}->{public_identifier} # DOCTYPE
2043     .= chr $self->{next_input_character};
2044     ## Stay in the state
2045    
2046     if (@{$self->{char}}) {
2047     $self->{next_input_character} = shift @{$self->{char}};
2048     } else {
2049     $self->{set_next_input_character}->($self);
2050     }
2051    
2052     redo A;
2053     }
2054     } elsif ($self->{state} eq 'after DOCTYPE public identifier') {
2055     if ({
2056     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2057     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2058     }->{$self->{next_input_character}}) {
2059 wakaba 1.1 ## Stay in the state
2060    
2061     if (@{$self->{char}}) {
2062     $self->{next_input_character} = shift @{$self->{char}};
2063     } else {
2064     $self->{set_next_input_character}->($self);
2065     }
2066    
2067     redo A;
2068 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
2069     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2070     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
2071    
2072     if (@{$self->{char}}) {
2073     $self->{next_input_character} = shift @{$self->{char}};
2074     } else {
2075     $self->{set_next_input_character}->($self);
2076     }
2077    
2078     redo A;
2079     } elsif ($self->{next_input_character} == 0x0027) { # '
2080     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2081     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
2082    
2083     if (@{$self->{char}}) {
2084     $self->{next_input_character} = shift @{$self->{char}};
2085     } else {
2086     $self->{set_next_input_character}->($self);
2087     }
2088    
2089     redo A;
2090     } elsif ($self->{next_input_character} == 0x003E) { # >
2091     $self->{state} = 'data';
2092    
2093     if (@{$self->{char}}) {
2094     $self->{next_input_character} = shift @{$self->{char}};
2095     } else {
2096     $self->{set_next_input_character}->($self);
2097     }
2098    
2099    
2100     return ($self->{current_token}); # DOCTYPE
2101    
2102     redo A;
2103     } elsif ($self->{next_input_character} == -1) {
2104     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2105    
2106     $self->{state} = 'data';
2107 wakaba 1.26 ## reconsume
2108 wakaba 1.18
2109     delete $self->{current_token}->{correct};
2110     return ($self->{current_token}); # DOCTYPE
2111    
2112     redo A;
2113     } else {
2114     $self->{parse_error}-> (type => 'string after PUBLIC literal');
2115     $self->{state} = 'bogus DOCTYPE';
2116    
2117     if (@{$self->{char}}) {
2118     $self->{next_input_character} = shift @{$self->{char}};
2119     } else {
2120     $self->{set_next_input_character}->($self);
2121     }
2122    
2123     redo A;
2124 wakaba 1.1 }
2125 wakaba 1.18 } elsif ($self->{state} eq 'before DOCTYPE system identifier') {
2126     if ({
2127     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2128     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2129     }->{$self->{next_input_character}}) {
2130 wakaba 1.1 ## Stay in the state
2131    
2132     if (@{$self->{char}}) {
2133     $self->{next_input_character} = shift @{$self->{char}};
2134     } else {
2135     $self->{set_next_input_character}->($self);
2136     }
2137    
2138     redo A;
2139 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
2140     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2141     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
2142    
2143     if (@{$self->{char}}) {
2144     $self->{next_input_character} = shift @{$self->{char}};
2145     } else {
2146     $self->{set_next_input_character}->($self);
2147     }
2148    
2149     redo A;
2150     } elsif ($self->{next_input_character} == 0x0027) { # '
2151     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2152     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
2153    
2154     if (@{$self->{char}}) {
2155     $self->{next_input_character} = shift @{$self->{char}};
2156     } else {
2157     $self->{set_next_input_character}->($self);
2158     }
2159    
2160     redo A;
2161 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
2162 wakaba 1.18 $self->{parse_error}-> (type => 'no SYSTEM literal');
2163 wakaba 1.1 $self->{state} = 'data';
2164    
2165     if (@{$self->{char}}) {
2166     $self->{next_input_character} = shift @{$self->{char}};
2167     } else {
2168     $self->{set_next_input_character}->($self);
2169     }
2170    
2171    
2172 wakaba 1.18 delete $self->{current_token}->{correct};
2173 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2174    
2175     redo A;
2176     } elsif ($self->{next_input_character} == -1) {
2177 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2178 wakaba 1.18
2179     $self->{state} = 'data';
2180 wakaba 1.26 ## reconsume
2181 wakaba 1.18
2182     delete $self->{current_token}->{correct};
2183     return ($self->{current_token}); # DOCTYPE
2184    
2185     redo A;
2186     } else {
2187 wakaba 1.30 $self->{parse_error}-> (type => 'string after SYSTEM');
2188 wakaba 1.18 $self->{state} = 'bogus DOCTYPE';
2189    
2190     if (@{$self->{char}}) {
2191     $self->{next_input_character} = shift @{$self->{char}};
2192     } else {
2193     $self->{set_next_input_character}->($self);
2194     }
2195    
2196     redo A;
2197     }
2198     } elsif ($self->{state} eq 'DOCTYPE system identifier (double-quoted)') {
2199     if ($self->{next_input_character} == 0x0022) { # "
2200     $self->{state} = 'after DOCTYPE system identifier';
2201    
2202     if (@{$self->{char}}) {
2203     $self->{next_input_character} = shift @{$self->{char}};
2204     } else {
2205     $self->{set_next_input_character}->($self);
2206     }
2207    
2208     redo A;
2209     } elsif ($self->{next_input_character} == -1) {
2210     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2211    
2212 wakaba 1.1 $self->{state} = 'data';
2213     ## reconsume
2214    
2215 wakaba 1.18 delete $self->{current_token}->{correct};
2216 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2217    
2218     redo A;
2219     } else {
2220 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2221     .= chr $self->{next_input_character};
2222     ## Stay in the state
2223    
2224     if (@{$self->{char}}) {
2225     $self->{next_input_character} = shift @{$self->{char}};
2226     } else {
2227     $self->{set_next_input_character}->($self);
2228     }
2229    
2230     redo A;
2231     }
2232     } elsif ($self->{state} eq 'DOCTYPE system identifier (single-quoted)') {
2233     if ($self->{next_input_character} == 0x0027) { # '
2234     $self->{state} = 'after DOCTYPE system identifier';
2235    
2236     if (@{$self->{char}}) {
2237     $self->{next_input_character} = shift @{$self->{char}};
2238     } else {
2239     $self->{set_next_input_character}->($self);
2240     }
2241    
2242     redo A;
2243     } elsif ($self->{next_input_character} == -1) {
2244     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2245    
2246     $self->{state} = 'data';
2247     ## reconsume
2248    
2249     delete $self->{current_token}->{correct};
2250     return ($self->{current_token}); # DOCTYPE
2251    
2252     redo A;
2253     } else {
2254     $self->{current_token}->{system_identifier} # DOCTYPE
2255     .= chr $self->{next_input_character};
2256     ## Stay in the state
2257    
2258     if (@{$self->{char}}) {
2259     $self->{next_input_character} = shift @{$self->{char}};
2260     } else {
2261     $self->{set_next_input_character}->($self);
2262     }
2263    
2264     redo A;
2265     }
2266     } elsif ($self->{state} eq 'after DOCTYPE system identifier') {
2267     if ({
2268     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2269     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2270     }->{$self->{next_input_character}}) {
2271     ## Stay in the state
2272    
2273     if (@{$self->{char}}) {
2274     $self->{next_input_character} = shift @{$self->{char}};
2275     } else {
2276     $self->{set_next_input_character}->($self);
2277     }
2278    
2279     redo A;
2280     } elsif ($self->{next_input_character} == 0x003E) { # >
2281     $self->{state} = 'data';
2282    
2283     if (@{$self->{char}}) {
2284     $self->{next_input_character} = shift @{$self->{char}};
2285     } else {
2286     $self->{set_next_input_character}->($self);
2287     }
2288    
2289    
2290     return ($self->{current_token}); # DOCTYPE
2291    
2292     redo A;
2293     } elsif ($self->{next_input_character} == -1) {
2294     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2295    
2296     $self->{state} = 'data';
2297 wakaba 1.26 ## reconsume
2298 wakaba 1.18
2299     delete $self->{current_token}->{correct};
2300     return ($self->{current_token}); # DOCTYPE
2301    
2302     redo A;
2303     } else {
2304     $self->{parse_error}-> (type => 'string after SYSTEM literal');
2305 wakaba 1.1 $self->{state} = 'bogus DOCTYPE';
2306    
2307     if (@{$self->{char}}) {
2308     $self->{next_input_character} = shift @{$self->{char}};
2309     } else {
2310     $self->{set_next_input_character}->($self);
2311     }
2312    
2313     redo A;
2314     }
2315     } elsif ($self->{state} eq 'bogus DOCTYPE') {
2316     if ($self->{next_input_character} == 0x003E) { # >
2317     $self->{state} = 'data';
2318    
2319     if (@{$self->{char}}) {
2320     $self->{next_input_character} = shift @{$self->{char}};
2321     } else {
2322     $self->{set_next_input_character}->($self);
2323     }
2324    
2325    
2326 wakaba 1.18 delete $self->{current_token}->{correct};
2327 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2328    
2329     redo A;
2330     } elsif ($self->{next_input_character} == -1) {
2331 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2332 wakaba 1.1 $self->{state} = 'data';
2333     ## reconsume
2334    
2335 wakaba 1.18 delete $self->{current_token}->{correct};
2336 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2337    
2338     redo A;
2339     } else {
2340     ## Stay in the state
2341    
2342     if (@{$self->{char}}) {
2343     $self->{next_input_character} = shift @{$self->{char}};
2344     } else {
2345     $self->{set_next_input_character}->($self);
2346     }
2347    
2348     redo A;
2349     }
2350     } else {
2351     die "$0: $self->{state}: Unknown state";
2352     }
2353     } # A
2354    
2355     die "$0: _get_next_token: unexpected case";
2356     } # _get_next_token
2357    
2358 wakaba 1.26 sub _tokenize_attempt_to_consume_an_entity ($$) {
2359     my ($self, $in_attr) = @_;
2360 wakaba 1.20
2361     if ({
2362     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2363     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2364     }->{$self->{next_input_character}}) {
2365     ## Don't consume
2366     ## No error
2367     return undef;
2368     } elsif ($self->{next_input_character} == 0x0023) { # #
2369 wakaba 1.1
2370     if (@{$self->{char}}) {
2371     $self->{next_input_character} = shift @{$self->{char}};
2372     } else {
2373     $self->{set_next_input_character}->($self);
2374     }
2375    
2376     if ($self->{next_input_character} == 0x0078 or # x
2377     $self->{next_input_character} == 0x0058) { # X
2378 wakaba 1.26 my $code;
2379 wakaba 1.1 X: {
2380     my $x_char = $self->{next_input_character};
2381    
2382     if (@{$self->{char}}) {
2383     $self->{next_input_character} = shift @{$self->{char}};
2384     } else {
2385     $self->{set_next_input_character}->($self);
2386     }
2387    
2388     if (0x0030 <= $self->{next_input_character} and
2389     $self->{next_input_character} <= 0x0039) { # 0..9
2390 wakaba 1.26 $code ||= 0;
2391     $code *= 0x10;
2392     $code += $self->{next_input_character} - 0x0030;
2393 wakaba 1.1 redo X;
2394     } elsif (0x0061 <= $self->{next_input_character} and
2395     $self->{next_input_character} <= 0x0066) { # a..f
2396 wakaba 1.26 $code ||= 0;
2397     $code *= 0x10;
2398     $code += $self->{next_input_character} - 0x0060 + 9;
2399 wakaba 1.1 redo X;
2400     } elsif (0x0041 <= $self->{next_input_character} and
2401     $self->{next_input_character} <= 0x0046) { # A..F
2402 wakaba 1.26 $code ||= 0;
2403     $code *= 0x10;
2404     $code += $self->{next_input_character} - 0x0040 + 9;
2405 wakaba 1.1 redo X;
2406 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2407 wakaba 1.3 $self->{parse_error}-> (type => 'bare hcro');
2408 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
2409     unshift @{$self->{char}}, ($x_char);
2410     return undef;
2411     } elsif ($self->{next_input_character} == 0x003B) { # ;
2412    
2413     if (@{$self->{char}}) {
2414     $self->{next_input_character} = shift @{$self->{char}};
2415     } else {
2416     $self->{set_next_input_character}->($self);
2417     }
2418    
2419     } else {
2420 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2421 wakaba 1.1 }
2422    
2423 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2424     $self->{parse_error}-> (type => sprintf 'invalid character reference:U+%04X', $code);
2425     $code = 0xFFFD;
2426     } elsif ($code > 0x10FFFF) {
2427     $self->{parse_error}-> (type => sprintf 'invalid character reference:U-%08X', $code);
2428     $code = 0xFFFD;
2429     } elsif ($code == 0x000D) {
2430     $self->{parse_error}-> (type => 'CR character reference');
2431     $code = 0x000A;
2432     } elsif (0x80 <= $code and $code <= 0x9F) {
2433 wakaba 1.30 $self->{parse_error}-> (type => sprintf 'C1 character reference:U+%04X', $code);
2434 wakaba 1.26 $code = $c1_entity_char->{$code};
2435 wakaba 1.1 }
2436    
2437 wakaba 1.26 return {type => 'character', data => chr $code};
2438 wakaba 1.1 } # X
2439     } elsif (0x0030 <= $self->{next_input_character} and
2440     $self->{next_input_character} <= 0x0039) { # 0..9
2441     my $code = $self->{next_input_character} - 0x0030;
2442    
2443     if (@{$self->{char}}) {
2444     $self->{next_input_character} = shift @{$self->{char}};
2445     } else {
2446     $self->{set_next_input_character}->($self);
2447     }
2448    
2449    
2450     while (0x0030 <= $self->{next_input_character} and
2451     $self->{next_input_character} <= 0x0039) { # 0..9
2452     $code *= 10;
2453     $code += $self->{next_input_character} - 0x0030;
2454    
2455    
2456     if (@{$self->{char}}) {
2457     $self->{next_input_character} = shift @{$self->{char}};
2458     } else {
2459     $self->{set_next_input_character}->($self);
2460     }
2461    
2462     }
2463    
2464     if ($self->{next_input_character} == 0x003B) { # ;
2465    
2466     if (@{$self->{char}}) {
2467     $self->{next_input_character} = shift @{$self->{char}};
2468     } else {
2469     $self->{set_next_input_character}->($self);
2470     }
2471    
2472     } else {
2473 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2474 wakaba 1.1 }
2475    
2476 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2477     $self->{parse_error}-> (type => sprintf 'invalid character reference:U+%04X', $code);
2478     $code = 0xFFFD;
2479     } elsif ($code > 0x10FFFF) {
2480     $self->{parse_error}-> (type => sprintf 'invalid character reference:U-%08X', $code);
2481     $code = 0xFFFD;
2482     } elsif ($code == 0x000D) {
2483     $self->{parse_error}-> (type => 'CR character reference');
2484     $code = 0x000A;
2485 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2486 wakaba 1.30 $self->{parse_error}-> (type => sprintf 'C1 character reference:U+%04X', $code);
2487 wakaba 1.4 $code = $c1_entity_char->{$code};
2488 wakaba 1.1 }
2489    
2490     return {type => 'character', data => chr $code};
2491     } else {
2492 wakaba 1.3 $self->{parse_error}-> (type => 'bare nero');
2493 wakaba 1.1 unshift @{$self->{char}}, ($self->{next_input_character});
2494     $self->{next_input_character} = 0x0023; # #
2495     return undef;
2496     }
2497     } elsif ((0x0041 <= $self->{next_input_character} and
2498     $self->{next_input_character} <= 0x005A) or
2499     (0x0061 <= $self->{next_input_character} and
2500     $self->{next_input_character} <= 0x007A)) {
2501     my $entity_name = chr $self->{next_input_character};
2502    
2503     if (@{$self->{char}}) {
2504     $self->{next_input_character} = shift @{$self->{char}};
2505     } else {
2506     $self->{set_next_input_character}->($self);
2507     }
2508    
2509    
2510     my $value = $entity_name;
2511     my $match;
2512 wakaba 1.16 require Whatpm::_NamedEntityList;
2513     our $EntityChar;
2514 wakaba 1.1
2515     while (length $entity_name < 10 and
2516     ## NOTE: Some number greater than the maximum length of entity name
2517 wakaba 1.16 ((0x0041 <= $self->{next_input_character} and # a
2518     $self->{next_input_character} <= 0x005A) or # x
2519     (0x0061 <= $self->{next_input_character} and # a
2520     $self->{next_input_character} <= 0x007A) or # z
2521     (0x0030 <= $self->{next_input_character} and # 0
2522     $self->{next_input_character} <= 0x0039) or # 9
2523     $self->{next_input_character} == 0x003B)) { # ;
2524 wakaba 1.1 $entity_name .= chr $self->{next_input_character};
2525 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2526     if ($self->{next_input_character} == 0x003B) { # ;
2527 wakaba 1.26 $value = $EntityChar->{$entity_name};
2528 wakaba 1.16 $match = 1;
2529    
2530     if (@{$self->{char}}) {
2531     $self->{next_input_character} = shift @{$self->{char}};
2532     } else {
2533     $self->{set_next_input_character}->($self);
2534     }
2535    
2536     last;
2537 wakaba 1.26 } elsif (not $in_attr) {
2538     $value = $EntityChar->{$entity_name};
2539     $match = -1;
2540 wakaba 1.16 } else {
2541 wakaba 1.26 $value .= chr $self->{next_input_character};
2542 wakaba 1.16 }
2543 wakaba 1.1 } else {
2544     $value .= chr $self->{next_input_character};
2545     }
2546    
2547     if (@{$self->{char}}) {
2548     $self->{next_input_character} = shift @{$self->{char}};
2549     } else {
2550     $self->{set_next_input_character}->($self);
2551     }
2552    
2553     }
2554    
2555 wakaba 1.16 if ($match > 0) {
2556     return {type => 'character', data => $value};
2557     } elsif ($match < 0) {
2558 wakaba 1.30 $self->{parse_error}-> (type => 'no refc');
2559 wakaba 1.1 return {type => 'character', data => $value};
2560     } else {
2561 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2562 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
2563 wakaba 1.26 return {type => 'character', data => '&'.$value};
2564 wakaba 1.1 }
2565     } else {
2566     ## no characters are consumed
2567 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2568 wakaba 1.1 return undef;
2569     }
2570     } # _tokenize_attempt_to_consume_an_entity
2571    
2572     sub _initialize_tree_constructor ($) {
2573     my $self = shift;
2574     ## NOTE: $self->{document} MUST be specified before this method is called
2575     $self->{document}->strict_error_checking (0);
2576     ## TODO: Turn mutation events off # MUST
2577     ## TODO: Turn loose Document option (manakai extension) on
2578 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2579 wakaba 1.1 } # _initialize_tree_constructor
2580    
2581     sub _terminate_tree_constructor ($) {
2582     my $self = shift;
2583     $self->{document}->strict_error_checking (1);
2584     ## TODO: Turn mutation events on
2585     } # _terminate_tree_constructor
2586    
2587     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2588    
2589 wakaba 1.3 { # tree construction stage
2590     my $token;
2591    
2592 wakaba 1.1 sub _construct_tree ($) {
2593     my ($self) = @_;
2594    
2595     ## When an interactive UA render the $self->{document} available
2596     ## to the user, or when it begin accepting user input, are
2597     ## not defined.
2598    
2599     ## Append a character: collect it and all subsequent consecutive
2600     ## characters and insert one Text node whose data is concatenation
2601     ## of all those characters. # MUST
2602    
2603     $token = $self->_get_next_token;
2604    
2605 wakaba 1.3 $self->{insertion_mode} = 'before head';
2606     undef $self->{form_element};
2607     undef $self->{head_element};
2608     $self->{open_elements} = [];
2609     undef $self->{inner_html_node};
2610    
2611     $self->_tree_construction_initial; # MUST
2612     $self->_tree_construction_root_element;
2613     $self->_tree_construction_main;
2614     } # _construct_tree
2615    
2616     sub _tree_construction_initial ($) {
2617     my $self = shift;
2618 wakaba 1.18 INITIAL: {
2619     if ($token->{type} eq 'DOCTYPE') {
2620     ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2621     ## error, switch to a conformance checking mode for another
2622     ## language.
2623     my $doctype_name = $token->{name};
2624     $doctype_name = '' unless defined $doctype_name;
2625     $doctype_name =~ tr/a-z/A-Z/;
2626     if (not defined $token->{name} or # <!DOCTYPE>
2627     defined $token->{public_identifier} or
2628     defined $token->{system_identifier}) {
2629     $self->{parse_error}-> (type => 'not HTML5');
2630     } elsif ($doctype_name ne 'HTML') {
2631     ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2632     $self->{parse_error}-> (type => 'not HTML5');
2633     }
2634    
2635     my $doctype = $self->{document}->create_document_type_definition
2636     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2637     $doctype->public_id ($token->{public_identifier})
2638     if defined $token->{public_identifier};
2639     $doctype->system_id ($token->{system_identifier})
2640     if defined $token->{system_identifier};
2641     ## NOTE: Other DocumentType attributes are null or empty lists.
2642     ## ISSUE: internalSubset = null??
2643     $self->{document}->append_child ($doctype);
2644    
2645     if (not $token->{correct} or $doctype_name ne 'HTML') {
2646     $self->{document}->manakai_compat_mode ('quirks');
2647     } elsif (defined $token->{public_identifier}) {
2648     my $pubid = $token->{public_identifier};
2649     $pubid =~ tr/a-z/A-z/;
2650     if ({
2651     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2652     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2653     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2654     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2655     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2656     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2657     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2658     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2659     "-//IETF//DTD HTML 2.0//EN" => 1,
2660     "-//IETF//DTD HTML 2.1E//EN" => 1,
2661     "-//IETF//DTD HTML 3.0//EN" => 1,
2662     "-//IETF//DTD HTML 3.0//EN//" => 1,
2663     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2664     "-//IETF//DTD HTML 3.2//EN" => 1,
2665     "-//IETF//DTD HTML 3//EN" => 1,
2666     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2667     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2668     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2669     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2670     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2671     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2672     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2673     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2674     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2675     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2676     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2677     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2678     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2679     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2680     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2681     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2682     "-//IETF//DTD HTML STRICT//EN" => 1,
2683     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2684     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2685     "-//IETF//DTD HTML//EN" => 1,
2686     "-//IETF//DTD HTML//EN//2.0" => 1,
2687     "-//IETF//DTD HTML//EN//3.0" => 1,
2688     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2689     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2690     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2691     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2692     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2693     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2694     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2695     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2696     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2697     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2698     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2699     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2700     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2701     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2702     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2703     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2704     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2705     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2706     "-//W3C//DTD HTML 3.2//EN" => 1,
2707     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2708     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2709     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2710     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2711     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2712     "-//W3C//DTD W3 HTML//EN" => 1,
2713     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2714     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2715     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2716     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2717     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2718     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2719     "HTML" => 1,
2720     }->{$pubid}) {
2721     $self->{document}->manakai_compat_mode ('quirks');
2722     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2723     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2724     if (defined $token->{system_identifier}) {
2725     $self->{document}->manakai_compat_mode ('quirks');
2726     } else {
2727     $self->{document}->manakai_compat_mode ('limited quirks');
2728 wakaba 1.3 }
2729 wakaba 1.18 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 Frameset//EN" or
2730     $pubid eq "-//W3C//DTD XHTML 1.0 Transitional//EN") {
2731     $self->{document}->manakai_compat_mode ('limited quirks');
2732     }
2733     }
2734     if (defined $token->{system_identifier}) {
2735     my $sysid = $token->{system_identifier};
2736     $sysid =~ tr/A-Z/a-z/;
2737     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2738     $self->{document}->manakai_compat_mode ('quirks');
2739     }
2740     }
2741    
2742     ## Go to the root element phase.
2743     $token = $self->_get_next_token;
2744     return;
2745     } elsif ({
2746     'start tag' => 1,
2747     'end tag' => 1,
2748     'end-of-file' => 1,
2749     }->{$token->{type}}) {
2750     $self->{parse_error}-> (type => 'no DOCTYPE');
2751     $self->{document}->manakai_compat_mode ('quirks');
2752     ## Go to the root element phase
2753     ## reprocess
2754     return;
2755     } elsif ($token->{type} eq 'character') {
2756     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2757     ## Ignore the token
2758 wakaba 1.26
2759 wakaba 1.18 unless (length $token->{data}) {
2760     ## Stay in the phase
2761     $token = $self->_get_next_token;
2762     redo INITIAL;
2763 wakaba 1.3 }
2764     }
2765 wakaba 1.18
2766     $self->{parse_error}-> (type => 'no DOCTYPE');
2767     $self->{document}->manakai_compat_mode ('quirks');
2768     ## Go to the root element phase
2769     ## reprocess
2770     return;
2771     } elsif ($token->{type} eq 'comment') {
2772     my $comment = $self->{document}->create_comment ($token->{data});
2773     $self->{document}->append_child ($comment);
2774    
2775     ## Stay in the phase.
2776     $token = $self->_get_next_token;
2777     redo INITIAL;
2778     } else {
2779     die "$0: $token->{type}: Unknown token";
2780     }
2781     } # INITIAL
2782 wakaba 1.3 } # _tree_construction_initial
2783    
2784     sub _tree_construction_root_element ($) {
2785     my $self = shift;
2786    
2787     B: {
2788     if ($token->{type} eq 'DOCTYPE') {
2789     $self->{parse_error}-> (type => 'in html:#DOCTYPE');
2790     ## Ignore the token
2791     ## Stay in the phase
2792     $token = $self->_get_next_token;
2793     redo B;
2794     } elsif ($token->{type} eq 'comment') {
2795     my $comment = $self->{document}->create_comment ($token->{data});
2796     $self->{document}->append_child ($comment);
2797     ## Stay in the phase
2798     $token = $self->_get_next_token;
2799     redo B;
2800     } elsif ($token->{type} eq 'character') {
2801 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2802     ## Ignore the token.
2803    
2804 wakaba 1.3 unless (length $token->{data}) {
2805     ## Stay in the phase
2806     $token = $self->_get_next_token;
2807     redo B;
2808     }
2809     }
2810     #
2811     } elsif ({
2812     'start tag' => 1,
2813     'end tag' => 1,
2814     'end-of-file' => 1,
2815     }->{$token->{type}}) {
2816     ## ISSUE: There is an issue in the spec
2817     #
2818     } else {
2819     die "$0: $token->{type}: Unknown token";
2820     }
2821     my $root_element;
2822     $root_element = $self->{document}->create_element_ns
2823     (q<http://www.w3.org/1999/xhtml>, [undef, 'html']);
2824    
2825     $self->{document}->append_child ($root_element);
2826     push @{$self->{open_elements}}, [$root_element, 'html'];
2827     #$phase = 'main';
2828     ## reprocess
2829     #redo B;
2830     return;
2831     } # B
2832     } # _tree_construction_root_element
2833    
2834     sub _reset_insertion_mode ($) {
2835     my $self = shift;
2836    
2837     ## Step 1
2838     my $last;
2839    
2840     ## Step 2
2841     my $i = -1;
2842     my $node = $self->{open_elements}->[$i];
2843    
2844     ## Step 3
2845     S3: {
2846 wakaba 1.29 ## ISSUE: Oops! "If node is the first node in the stack of open
2847     ## elements, then set last to true. If the context element of the
2848     ## HTML fragment parsing algorithm is neither a td element nor a
2849     ## th element, then set node to the context element. (fragment case)":
2850     ## The second "if" is in the scope of the first "if"!?
2851     if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
2852     $last = 1;
2853     if (defined $self->{inner_html_node}) {
2854     if ($self->{inner_html_node}->[1] eq 'td' or
2855     $self->{inner_html_node}->[1] eq 'th') {
2856     #
2857     } else {
2858     $node = $self->{inner_html_node};
2859     }
2860 wakaba 1.3 }
2861     }
2862    
2863     ## Step 4..13
2864     my $new_mode = {
2865     select => 'in select',
2866     td => 'in cell',
2867     th => 'in cell',
2868     tr => 'in row',
2869     tbody => 'in table body',
2870     thead => 'in table head',
2871     tfoot => 'in table foot',
2872     caption => 'in caption',
2873     colgroup => 'in column group',
2874     table => 'in table',
2875     head => 'in body', # not in head!
2876     body => 'in body',
2877     frameset => 'in frameset',
2878     }->{$node->[1]};
2879     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2880    
2881     ## Step 14
2882     if ($node->[1] eq 'html') {
2883     unless (defined $self->{head_element}) {
2884     $self->{insertion_mode} = 'before head';
2885     } else {
2886     $self->{insertion_mode} = 'after head';
2887     }
2888     return;
2889     }
2890    
2891     ## Step 15
2892     $self->{insertion_mode} = 'in body' and return if $last;
2893    
2894     ## Step 16
2895     $i--;
2896     $node = $self->{open_elements}->[$i];
2897    
2898     ## Step 17
2899     redo S3;
2900     } # S3
2901     } # _reset_insertion_mode
2902    
2903     sub _tree_construction_main ($) {
2904     my $self = shift;
2905    
2906     my $phase = 'main';
2907 wakaba 1.1
2908     my $active_formatting_elements = [];
2909    
2910     my $reconstruct_active_formatting_elements = sub { # MUST
2911     my $insert = shift;
2912    
2913     ## Step 1
2914     return unless @$active_formatting_elements;
2915    
2916     ## Step 3
2917     my $i = -1;
2918     my $entry = $active_formatting_elements->[$i];
2919    
2920     ## Step 2
2921     return if $entry->[0] eq '#marker';
2922 wakaba 1.3 for (@{$self->{open_elements}}) {
2923 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2924     return;
2925     }
2926     }
2927    
2928     S4: {
2929     ## Step 4
2930     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2931    
2932     ## Step 5
2933     $i--;
2934     $entry = $active_formatting_elements->[$i];
2935    
2936     ## Step 6
2937     if ($entry->[0] eq '#marker') {
2938     #
2939     } else {
2940     my $in_open_elements;
2941 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
2942 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2943     $in_open_elements = 1;
2944     last OE;
2945     }
2946     }
2947     if ($in_open_elements) {
2948     #
2949     } else {
2950     redo S4;
2951     }
2952     }
2953    
2954     ## Step 7
2955     $i++;
2956     $entry = $active_formatting_elements->[$i];
2957     } # S4
2958    
2959     S7: {
2960     ## Step 8
2961     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2962    
2963     ## Step 9
2964     $insert->($clone->[0]);
2965 wakaba 1.3 push @{$self->{open_elements}}, $clone;
2966 wakaba 1.1
2967     ## Step 10
2968 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2969 wakaba 1.1
2970     ## Step 11
2971     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2972     ## Step 7'
2973     $i++;
2974     $entry = $active_formatting_elements->[$i];
2975    
2976     redo S7;
2977     }
2978     } # S7
2979     }; # $reconstruct_active_formatting_elements
2980    
2981     my $clear_up_to_marker = sub {
2982     for (reverse 0..$#$active_formatting_elements) {
2983     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2984     splice @$active_formatting_elements, $_;
2985     return;
2986     }
2987     }
2988     }; # $clear_up_to_marker
2989    
2990 wakaba 1.25 my $parse_rcdata = sub ($$) {
2991     my ($content_model_flag, $insert) = @_;
2992    
2993     ## Step 1
2994     my $start_tag_name = $token->{tag_name};
2995     my $el;
2996    
2997     $el = $self->{document}->create_element_ns
2998     (q<http://www.w3.org/1999/xhtml>, [undef, $start_tag_name]);
2999 wakaba 1.1
3000 wakaba 1.6 for my $attr_name (keys %{ $token->{attributes}}) {
3001 wakaba 1.25 $el->set_attribute_ns (undef, [undef, $attr_name],
3002 wakaba 1.6 $token->{attributes} ->{$attr_name}->{value});
3003     }
3004    
3005 wakaba 1.25
3006     ## Step 2
3007     $insert->($el); # /context node/->append_child ($el)
3008    
3009     ## Step 3
3010     $self->{content_model_flag} = $content_model_flag; # CDATA or RCDATA
3011 wakaba 1.13 delete $self->{escape}; # MUST
3012 wakaba 1.25
3013     ## Step 4
3014 wakaba 1.1 my $text = '';
3015     $token = $self->_get_next_token;
3016 wakaba 1.25 while ($token->{type} eq 'character') { # or until stop tokenizing
3017 wakaba 1.1 $text .= $token->{data};
3018     $token = $self->_get_next_token;
3019 wakaba 1.25 }
3020    
3021     ## Step 5
3022 wakaba 1.1 if (length $text) {
3023 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3024     $el->append_child ($text);
3025 wakaba 1.1 }
3026 wakaba 1.25
3027     ## Step 6
3028 wakaba 1.1 $self->{content_model_flag} = 'PCDATA';
3029 wakaba 1.25
3030     ## Step 7
3031     if ($token->{type} eq 'end tag' and $token->{tag_name} eq $start_tag_name) {
3032 wakaba 1.1 ## Ignore the token
3033     } else {
3034 wakaba 1.25 $self->{parse_error}-> (type => 'in '.$content_model_flag.':#'.$token->{type});
3035 wakaba 1.1 }
3036     $token = $self->_get_next_token;
3037 wakaba 1.25 }; # $parse_rcdata
3038 wakaba 1.1
3039 wakaba 1.25 my $script_start_tag = sub ($) {
3040     my $insert = $_[0];
3041 wakaba 1.1 my $script_el;
3042    
3043     $script_el = $self->{document}->create_element_ns
3044     (q<http://www.w3.org/1999/xhtml>, [undef, 'script']);
3045    
3046     for my $attr_name (keys %{ $token->{attributes}}) {
3047     $script_el->set_attribute_ns (undef, [undef, $attr_name],
3048     $token->{attributes} ->{$attr_name}->{value});
3049     }
3050    
3051     ## TODO: mark as "parser-inserted"
3052    
3053     $self->{content_model_flag} = 'CDATA';
3054 wakaba 1.13 delete $self->{escape}; # MUST
3055 wakaba 1.1
3056     my $text = '';
3057     $token = $self->_get_next_token;
3058     while ($token->{type} eq 'character') {
3059     $text .= $token->{data};
3060     $token = $self->_get_next_token;
3061     } # stop if non-character token or tokenizer stops tokenising
3062     if (length $text) {
3063     $script_el->manakai_append_text ($text);
3064     }
3065    
3066     $self->{content_model_flag} = 'PCDATA';
3067    
3068     if ($token->{type} eq 'end tag' and
3069     $token->{tag_name} eq 'script') {
3070     ## Ignore the token
3071     } else {
3072 wakaba 1.3 $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
3073 wakaba 1.1 ## ISSUE: And ignore?
3074     ## TODO: mark as "already executed"
3075     }
3076    
3077 wakaba 1.3 if (defined $self->{inner_html_node}) {
3078     ## TODO: mark as "already executed"
3079     } else {
3080 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3081     ## TODO: insertion point = just before the next input character
3082 wakaba 1.25
3083     $insert->($script_el);
3084 wakaba 1.1
3085     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3086    
3087     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3088     }
3089    
3090     $token = $self->_get_next_token;
3091     }; # $script_start_tag
3092    
3093     my $formatting_end_tag = sub {
3094     my $tag_name = shift;
3095    
3096     FET: {
3097     ## Step 1
3098     my $formatting_element;
3099     my $formatting_element_i_in_active;
3100     AFE: for (reverse 0..$#$active_formatting_elements) {
3101     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
3102     $formatting_element = $active_formatting_elements->[$_];
3103     $formatting_element_i_in_active = $_;
3104     last AFE;
3105     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
3106     last AFE;
3107     }
3108     } # AFE
3109     unless (defined $formatting_element) {
3110 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$tag_name);
3111 wakaba 1.1 ## Ignore the token
3112     $token = $self->_get_next_token;
3113     return;
3114     }
3115     ## has an element in scope
3116     my $in_scope = 1;
3117     my $formatting_element_i_in_open;
3118 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3119     my $node = $self->{open_elements}->[$_];
3120 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3121     if ($in_scope) {
3122     $formatting_element_i_in_open = $_;
3123     last INSCOPE;
3124     } else { # in open elements but not in scope
3125 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3126 wakaba 1.1 ## Ignore the token
3127     $token = $self->_get_next_token;
3128     return;
3129     }
3130     } elsif ({
3131     table => 1, caption => 1, td => 1, th => 1,
3132     button => 1, marquee => 1, object => 1, html => 1,
3133     }->{$node->[1]}) {
3134     $in_scope = 0;
3135     }
3136     } # INSCOPE
3137     unless (defined $formatting_element_i_in_open) {
3138 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3139 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3140     $token = $self->_get_next_token; ## TODO: ok?
3141     return;
3142     }
3143 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3144 wakaba 1.4 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3145 wakaba 1.1 }
3146    
3147     ## Step 2
3148     my $furthest_block;
3149     my $furthest_block_i_in_open;
3150 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3151     my $node = $self->{open_elements}->[$_];
3152 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
3153     #not $phrasing_category->{$node->[1]} and
3154     ($special_category->{$node->[1]} or
3155     $scoping_category->{$node->[1]})) {
3156     $furthest_block = $node;
3157     $furthest_block_i_in_open = $_;
3158     } elsif ($node->[0] eq $formatting_element->[0]) {
3159     last OE;
3160     }
3161     } # OE
3162    
3163     ## Step 3
3164     unless (defined $furthest_block) { # MUST
3165 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3166 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3167     $token = $self->_get_next_token;
3168     return;
3169     }
3170    
3171     ## Step 4
3172 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3173 wakaba 1.1
3174     ## Step 5
3175     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3176     if (defined $furthest_block_parent) {
3177     $furthest_block_parent->remove_child ($furthest_block->[0]);
3178     }
3179    
3180     ## Step 6
3181     my $bookmark_prev_el
3182     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3183     ->[0];
3184    
3185     ## Step 7
3186     my $node = $furthest_block;
3187     my $node_i_in_open = $furthest_block_i_in_open;
3188     my $last_node = $furthest_block;
3189     S7: {
3190     ## Step 1
3191     $node_i_in_open--;
3192 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3193 wakaba 1.1
3194     ## Step 2
3195     my $node_i_in_active;
3196     S7S2: {
3197     for (reverse 0..$#$active_formatting_elements) {
3198     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3199     $node_i_in_active = $_;
3200     last S7S2;
3201     }
3202     }
3203 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3204 wakaba 1.1 redo S7;
3205     } # S7S2
3206    
3207     ## Step 3
3208     last S7 if $node->[0] eq $formatting_element->[0];
3209    
3210     ## Step 4
3211     if ($last_node->[0] eq $furthest_block->[0]) {
3212     $bookmark_prev_el = $node->[0];
3213     }
3214    
3215     ## Step 5
3216     if ($node->[0]->has_child_nodes ()) {
3217     my $clone = [$node->[0]->clone_node (0), $node->[1]];
3218     $active_formatting_elements->[$node_i_in_active] = $clone;
3219 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3220 wakaba 1.1 $node = $clone;
3221     }
3222    
3223     ## Step 6
3224     $node->[0]->append_child ($last_node->[0]);
3225    
3226     ## Step 7
3227     $last_node = $node;
3228    
3229     ## Step 8
3230     redo S7;
3231     } # S7
3232    
3233     ## Step 8
3234     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3235    
3236     ## Step 9
3237     my $clone = [$formatting_element->[0]->clone_node (0),
3238     $formatting_element->[1]];
3239    
3240     ## Step 10
3241     my @cn = @{$furthest_block->[0]->child_nodes};
3242     $clone->[0]->append_child ($_) for @cn;
3243    
3244     ## Step 11
3245     $furthest_block->[0]->append_child ($clone->[0]);
3246    
3247     ## Step 12
3248     my $i;
3249     AFE: for (reverse 0..$#$active_formatting_elements) {
3250     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3251     splice @$active_formatting_elements, $_, 1;
3252     $i-- and last AFE if defined $i;
3253     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3254     $i = $_;
3255     }
3256     } # AFE
3257     splice @$active_formatting_elements, $i + 1, 0, $clone;
3258    
3259     ## Step 13
3260     undef $i;
3261 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3262     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3263     splice @{$self->{open_elements}}, $_, 1;
3264 wakaba 1.1 $i-- and last OE if defined $i;
3265 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3266 wakaba 1.1 $i = $_;
3267     }
3268     } # OE
3269 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3270 wakaba 1.1
3271     ## Step 14
3272     redo FET;
3273     } # FET
3274     }; # $formatting_end_tag
3275    
3276     my $insert_to_current = sub {
3277 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3278 wakaba 1.1 }; # $insert_to_current
3279    
3280     my $insert_to_foster = sub {
3281     my $child = shift;
3282     if ({
3283     table => 1, tbody => 1, tfoot => 1,
3284     thead => 1, tr => 1,
3285 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3286 wakaba 1.1 # MUST
3287     my $foster_parent_element;
3288     my $next_sibling;
3289 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3290     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3291     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3292 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3293     $foster_parent_element = $parent;
3294 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3295 wakaba 1.1 } else {
3296     $foster_parent_element
3297 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3298 wakaba 1.1 }
3299     last OE;
3300     }
3301     } # OE
3302 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3303 wakaba 1.1 unless defined $foster_parent_element;
3304     $foster_parent_element->insert_before
3305     ($child, $next_sibling);
3306     } else {
3307 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
3308 wakaba 1.1 }
3309     }; # $insert_to_foster
3310    
3311     my $in_body = sub {
3312     my $insert = shift;
3313     if ($token->{type} eq 'start tag') {
3314     if ($token->{tag_name} eq 'script') {
3315 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
3316     $script_start_tag->($insert);
3317 wakaba 1.1 return;
3318     } elsif ($token->{tag_name} eq 'style') {
3319 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
3320     $parse_rcdata->('CDATA', $insert);
3321 wakaba 1.1 return;
3322     } elsif ({
3323     base => 1, link => 1, meta => 1,
3324     }->{$token->{tag_name}}) {
3325 wakaba 1.25 ## NOTE: This is an "as if in head" code clone, only "-t" differs
3326 wakaba 1.1
3327 wakaba 1.25 {
3328     my $el;
3329    
3330 wakaba 1.1 $el = $self->{document}->create_element_ns
3331     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3332    
3333 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
3334 wakaba 1.1 $el->set_attribute_ns (undef, [undef, $attr_name],
3335 wakaba 1.25 $token->{attributes} ->{$attr_name}->{value});
3336 wakaba 1.1 }
3337    
3338 wakaba 1.25 $insert->($el);
3339     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3340     }
3341    
3342     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3343 wakaba 1.1 $token = $self->_get_next_token;
3344 wakaba 1.26 ## TODO: Extracting |charset| from |meta|.
3345 wakaba 1.1 return;
3346     } elsif ($token->{tag_name} eq 'title') {
3347 wakaba 1.3 $self->{parse_error}-> (type => 'in body:title');
3348 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
3349 wakaba 1.31 $parse_rcdata->('RCDATA', sub {
3350     if (defined $self->{head_element}) {
3351     $self->{head_element}->append_child ($_[0]);
3352     } else {
3353     $insert->($_[0]);
3354     }
3355     });
3356 wakaba 1.1 return;
3357     } elsif ($token->{tag_name} eq 'body') {
3358 wakaba 1.3 $self->{parse_error}-> (type => 'in body:body');
3359 wakaba 1.1
3360 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
3361     $self->{open_elements}->[1]->[1] ne 'body') {
3362 wakaba 1.1 ## Ignore the token
3363     } else {
3364 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
3365 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
3366     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
3367     $body_el->set_attribute_ns
3368     (undef, [undef, $attr_name],
3369     $token->{attributes}->{$attr_name}->{value});
3370     }
3371     }
3372     }
3373     $token = $self->_get_next_token;
3374     return;
3375     } elsif ({
3376     address => 1, blockquote => 1, center => 1, dir => 1,
3377     div => 1, dl => 1, fieldset => 1, listing => 1,
3378     menu => 1, ol => 1, p => 1, ul => 1,
3379     pre => 1,
3380     }->{$token->{tag_name}}) {
3381     ## has a p element in scope
3382 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3383 wakaba 1.1 if ($_->[1] eq 'p') {
3384     unshift @{$self->{token}}, $token;
3385     $token = {type => 'end tag', tag_name => 'p'};
3386     return;
3387     } elsif ({
3388     table => 1, caption => 1, td => 1, th => 1,
3389     button => 1, marquee => 1, object => 1, html => 1,
3390     }->{$_->[1]}) {
3391     last INSCOPE;
3392     }
3393     } # INSCOPE
3394    
3395    
3396     {
3397     my $el;
3398    
3399     $el = $self->{document}->create_element_ns
3400     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3401    
3402     for my $attr_name (keys %{ $token->{attributes}}) {
3403     $el->set_attribute_ns (undef, [undef, $attr_name],
3404     $token->{attributes} ->{$attr_name}->{value});
3405     }
3406    
3407     $insert->($el);
3408 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3409 wakaba 1.1 }
3410    
3411     if ($token->{tag_name} eq 'pre') {
3412     $token = $self->_get_next_token;
3413     if ($token->{type} eq 'character') {
3414     $token->{data} =~ s/^\x0A//;
3415     unless (length $token->{data}) {
3416     $token = $self->_get_next_token;
3417     }
3418     }
3419     } else {
3420     $token = $self->_get_next_token;
3421     }
3422     return;
3423     } elsif ($token->{tag_name} eq 'form') {
3424 wakaba 1.3 if (defined $self->{form_element}) {
3425     $self->{parse_error}-> (type => 'in form:form');
3426 wakaba 1.1 ## Ignore the token
3427 wakaba 1.7 $token = $self->_get_next_token;
3428     return;
3429 wakaba 1.1 } else {
3430     ## has a p element in scope
3431 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3432 wakaba 1.1 if ($_->[1] eq 'p') {
3433     unshift @{$self->{token}}, $token;
3434     $token = {type => 'end tag', tag_name => 'p'};
3435     return;
3436     } elsif ({
3437     table => 1, caption => 1, td => 1, th => 1,
3438     button => 1, marquee => 1, object => 1, html => 1,
3439     }->{$_->[1]}) {
3440     last INSCOPE;
3441     }
3442     } # INSCOPE
3443    
3444    
3445     {
3446     my $el;
3447    
3448     $el = $self->{document}->create_element_ns
3449     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3450    
3451     for my $attr_name (keys %{ $token->{attributes}}) {
3452     $el->set_attribute_ns (undef, [undef, $attr_name],
3453     $token->{attributes} ->{$attr_name}->{value});
3454     }
3455    
3456     $insert->($el);
3457 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3458 wakaba 1.1 }
3459    
3460 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
3461 wakaba 1.1 $token = $self->_get_next_token;
3462     return;
3463     }
3464     } elsif ($token->{tag_name} eq 'li') {
3465     ## has a p element in scope
3466 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3467 wakaba 1.1 if ($_->[1] eq 'p') {
3468     unshift @{$self->{token}}, $token;
3469     $token = {type => 'end tag', tag_name => 'p'};
3470     return;
3471     } elsif ({
3472     table => 1, caption => 1, td => 1, th => 1,
3473     button => 1, marquee => 1, object => 1, html => 1,
3474     }->{$_->[1]}) {
3475     last INSCOPE;
3476     }
3477     } # INSCOPE
3478    
3479     ## Step 1
3480     my $i = -1;
3481 wakaba 1.3 my $node = $self->{open_elements}->[$i];
3482 wakaba 1.1 LI: {
3483     ## Step 2
3484     if ($node->[1] eq 'li') {
3485 wakaba 1.8 if ($i != -1) {
3486     $self->{parse_error}-> (type => 'end tag missing:'.
3487     $self->{open_elements}->[-1]->[1]);
3488     }
3489 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3490 wakaba 1.1 last LI;
3491     }
3492    
3493     ## Step 3
3494     if (not $formatting_category->{$node->[1]} and
3495     #not $phrasing_category->{$node->[1]} and
3496     ($special_category->{$node->[1]} or
3497     $scoping_category->{$node->[1]}) and
3498     $node->[1] ne 'address' and $node->[1] ne 'div') {
3499     last LI;
3500     }
3501    
3502     ## Step 4
3503     $i--;
3504 wakaba 1.3 $node = $self->{open_elements}->[$i];
3505 wakaba 1.1 redo LI;
3506     } # LI
3507    
3508    
3509     {
3510     my $el;
3511    
3512     $el = $self->{document}->create_element_ns
3513     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3514    
3515     for my $attr_name (keys %{ $token->{attributes}}) {
3516     $el->set_attribute_ns (undef, [undef, $attr_name],
3517     $token->{attributes} ->{$attr_name}->{value});
3518     }
3519    
3520     $insert->($el);
3521 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3522 wakaba 1.1 }
3523    
3524     $token = $self->_get_next_token;
3525     return;
3526     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
3527     ## has a p element in scope
3528 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3529 wakaba 1.1 if ($_->[1] eq 'p') {
3530     unshift @{$self->{token}}, $token;
3531     $token = {type => 'end tag', tag_name => 'p'};
3532     return;
3533     } elsif ({
3534     table => 1, caption => 1, td => 1, th => 1,
3535     button => 1, marquee => 1, object => 1, html => 1,
3536     }->{$_->[1]}) {
3537     last INSCOPE;
3538     }
3539     } # INSCOPE
3540    
3541     ## Step 1
3542     my $i = -1;
3543 wakaba 1.3 my $node = $self->{open_elements}->[$i];
3544 wakaba 1.1 LI: {
3545     ## Step 2
3546     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
3547 wakaba 1.8 if ($i != -1) {
3548     $self->{parse_error}-> (type => 'end tag missing:'.
3549     $self->{open_elements}->[-1]->[1]);
3550     }
3551 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3552 wakaba 1.1 last LI;
3553     }
3554    
3555     ## Step 3
3556     if (not $formatting_category->{$node->[1]} and
3557     #not $phrasing_category->{$node->[1]} and
3558     ($special_category->{$node->[1]} or
3559     $scoping_category->{$node->[1]}) and
3560     $node->[1] ne 'address' and $node->[1] ne 'div') {
3561     last LI;
3562     }
3563    
3564     ## Step 4
3565     $i--;
3566 wakaba 1.3 $node = $self->{open_elements}->[$i];
3567 wakaba 1.1 redo LI;
3568     } # LI
3569    
3570    
3571     {
3572     my $el;
3573    
3574     $el = $self->{document}->create_element_ns
3575     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3576    
3577     for my $attr_name (keys %{ $token->{attributes}}) {
3578     $el->set_attribute_ns (undef, [undef, $attr_name],
3579     $token->{attributes} ->{$attr_name}->{value});
3580     }
3581    
3582     $insert->($el);
3583 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3584 wakaba 1.1 }
3585    
3586     $token = $self->_get_next_token;
3587     return;
3588     } elsif ($token->{tag_name} eq 'plaintext') {
3589     ## has a p element in scope
3590 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3591 wakaba 1.1 if ($_->[1] eq 'p') {
3592     unshift @{$self->{token}}, $token;
3593     $token = {type => 'end tag', tag_name => 'p'};
3594     return;
3595     } elsif ({
3596     table => 1, caption => 1, td => 1, th => 1,
3597     button => 1, marquee => 1, object => 1, html => 1,
3598     }->{$_->[1]}) {
3599     last INSCOPE;
3600     }
3601     } # INSCOPE
3602    
3603    
3604     {
3605     my $el;
3606    
3607     $el = $self->{document}->create_element_ns
3608     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3609    
3610     for my $attr_name (keys %{ $token->{attributes}}) {
3611     $el->set_attribute_ns (undef, [undef, $attr_name],
3612     $token->{attributes} ->{$attr_name}->{value});
3613     }
3614    
3615     $insert->($el);
3616 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3617 wakaba 1.1 }
3618    
3619    
3620     $self->{content_model_flag} = 'PLAINTEXT';
3621    
3622     $token = $self->_get_next_token;
3623     return;
3624     } elsif ({
3625     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3626     }->{$token->{tag_name}}) {
3627     ## has a p element in scope
3628 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3629     my $node = $self->{open_elements}->[$_];
3630 wakaba 1.1 if ($node->[1] eq 'p') {
3631     unshift @{$self->{token}}, $token;
3632     $token = {type => 'end tag', tag_name => 'p'};
3633     return;
3634     } elsif ({
3635     table => 1, caption => 1, td => 1, th => 1,
3636     button => 1, marquee => 1, object => 1, html => 1,
3637     }->{$node->[1]}) {
3638     last INSCOPE;
3639     }
3640     } # INSCOPE
3641    
3642 wakaba 1.23 ## NOTE: See <http://html5.org/tools/web-apps-tracker?from=925&to=926>
3643 wakaba 1.1 ## has an element in scope
3644 wakaba 1.23 #my $i;
3645     #INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3646     # my $node = $self->{open_elements}->[$_];
3647     # if ({
3648     # h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3649     # }->{$node->[1]}) {
3650     # $i = $_;
3651     # last INSCOPE;
3652     # } elsif ({
3653     # table => 1, caption => 1, td => 1, th => 1,
3654     # button => 1, marquee => 1, object => 1, html => 1,
3655     # }->{$node->[1]}) {
3656     # last INSCOPE;
3657     # }
3658     #} # INSCOPE
3659     #
3660     #if (defined $i) {
3661     # !!! parse-error (type => 'in hn:hn');
3662     # splice @{$self->{open_elements}}, $i;
3663     #}
3664 wakaba 1.1
3665    
3666     {
3667     my $el;
3668    
3669     $el = $self->{document}->create_element_ns
3670     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3671    
3672     for my $attr_name (keys %{ $token->{attributes}}) {
3673     $el->set_attribute_ns (undef, [undef, $attr_name],
3674     $token->{attributes} ->{$attr_name}->{value});
3675     }
3676    
3677     $insert->($el);
3678 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3679 wakaba 1.1 }
3680    
3681    
3682     $token = $self->_get_next_token;
3683     return;
3684     } elsif ($token->{tag_name} eq 'a') {
3685     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
3686     my $node = $active_formatting_elements->[$i];
3687     if ($node->[1] eq 'a') {
3688 wakaba 1.3 $self->{parse_error}-> (type => 'in a:a');
3689 wakaba 1.1
3690     unshift @{$self->{token}}, $token;
3691     $token = {type => 'end tag', tag_name => 'a'};
3692     $formatting_end_tag->($token->{tag_name});
3693    
3694     AFE2: for (reverse 0..$#$active_formatting_elements) {
3695     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3696     splice @$active_formatting_elements, $_, 1;
3697     last AFE2;
3698     }
3699     } # AFE2
3700 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3701     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
3702     splice @{$self->{open_elements}}, $_, 1;
3703 wakaba 1.1 last OE;
3704     }
3705     } # OE
3706     last AFE;
3707     } elsif ($node->[0] eq '#marker') {
3708     last AFE;
3709     }
3710     } # AFE
3711    
3712     $reconstruct_active_formatting_elements->($insert_to_current);
3713    
3714    
3715     {
3716     my $el;
3717    
3718     $el = $self->{document}->create_element_ns
3719     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3720    
3721     for my $attr_name (keys %{ $token->{attributes}}) {
3722     $el->set_attribute_ns (undef, [undef, $attr_name],
3723     $token->{attributes} ->{$attr_name}->{value});
3724     }
3725    
3726     $insert->($el);
3727 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3728 wakaba 1.1 }
3729    
3730 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
3731 wakaba 1.1
3732     $token = $self->_get_next_token;
3733     return;
3734     } elsif ({
3735     b => 1, big => 1, em => 1, font => 1, i => 1,
3736 wakaba 1.19 s => 1, small => 1, strile => 1,
3737 wakaba 1.1 strong => 1, tt => 1, u => 1,
3738     }->{$token->{tag_name}}) {
3739     $reconstruct_active_formatting_elements->($insert_to_current);
3740    
3741    
3742     {
3743     my $el;
3744    
3745     $el = $self->{document}->create_element_ns
3746     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3747    
3748     for my $attr_name (keys %{ $token->{attributes}}) {
3749     $el->set_attribute_ns (undef, [undef, $attr_name],
3750     $token->{attributes} ->{$attr_name}->{value});
3751     }
3752    
3753     $insert->($el);
3754 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3755 wakaba 1.1 }
3756    
3757 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
3758 wakaba 1.1
3759     $token = $self->_get_next_token;
3760     return;
3761 wakaba 1.19 } elsif ($token->{tag_name} eq 'nobr') {
3762     $reconstruct_active_formatting_elements->($insert_to_current);
3763    
3764     ## has a |nobr| element in scope
3765     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3766     my $node = $self->{open_elements}->[$_];
3767     if ($node->[1] eq 'nobr') {
3768 wakaba 1.31 $self->{parse_error}-> (type => 'not closed:nobr');
3769 wakaba 1.19 unshift @{$self->{token}}, $token;
3770     $token = {type => 'end tag', tag_name => 'nobr'};
3771     return;
3772     } elsif ({
3773     table => 1, caption => 1, td => 1, th => 1,
3774     button => 1, marquee => 1, object => 1, html => 1,
3775     }->{$node->[1]}) {
3776     last INSCOPE;
3777     }
3778     } # INSCOPE
3779    
3780    
3781     {
3782     my $el;
3783    
3784     $el = $self->{document}->create_element_ns
3785     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3786    
3787     for my $attr_name (keys %{ $token->{attributes}}) {
3788     $el->set_attribute_ns (undef, [undef, $attr_name],
3789     $token->{attributes} ->{$attr_name}->{value});
3790     }
3791    
3792     $insert->($el);
3793     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3794     }
3795    
3796     push @$active_formatting_elements, $self->{open_elements}->[-1];
3797    
3798     $token = $self->_get_next_token;
3799     return;
3800 wakaba 1.1 } elsif ($token->{tag_name} eq 'button') {
3801     ## has a button element in scope
3802 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3803     my $node = $self->{open_elements}->[$_];
3804 wakaba 1.1 if ($node->[1] eq 'button') {
3805 wakaba 1.3 $self->{parse_error}-> (type => 'in button:button');
3806 wakaba 1.1 unshift @{$self->{token}}, $token;
3807     $token = {type => 'end tag', tag_name => 'button'};
3808     return;
3809     } elsif ({
3810     table => 1, caption => 1, td => 1, th => 1,
3811     button => 1, marquee => 1, object => 1, html => 1,
3812     }->{$node->[1]}) {
3813     last INSCOPE;
3814     }
3815     } # INSCOPE
3816    
3817     $reconstruct_active_formatting_elements->($insert_to_current);
3818    
3819    
3820     {
3821     my $el;
3822    
3823     $el = $self->{document}->create_element_ns
3824     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3825    
3826     for my $attr_name (keys %{ $token->{attributes}}) {
3827     $el->set_attribute_ns (undef, [undef, $attr_name],
3828     $token->{attributes} ->{$attr_name}->{value});
3829     }
3830    
3831     $insert->($el);
3832 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3833 wakaba 1.1 }
3834    
3835     push @$active_formatting_elements, ['#marker', ''];
3836    
3837     $token = $self->_get_next_token;
3838     return;
3839     } elsif ($token->{tag_name} eq 'marquee' or
3840     $token->{tag_name} eq 'object') {
3841     $reconstruct_active_formatting_elements->($insert_to_current);
3842    
3843    
3844     {
3845     my $el;
3846    
3847     $el = $self->{document}->create_element_ns
3848     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3849    
3850     for my $attr_name (keys %{ $token->{attributes}}) {
3851     $el->set_attribute_ns (undef, [undef, $attr_name],
3852     $token->{attributes} ->{$attr_name}->{value});
3853     }
3854    
3855     $insert->($el);
3856 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3857 wakaba 1.1 }
3858    
3859     push @$active_formatting_elements, ['#marker', ''];
3860    
3861     $token = $self->_get_next_token;
3862     return;
3863     } elsif ($token->{tag_name} eq 'xmp') {
3864     $reconstruct_active_formatting_elements->($insert_to_current);
3865 wakaba 1.25 $parse_rcdata->('CDATA', $insert);
3866 wakaba 1.1 return;
3867     } elsif ($token->{tag_name} eq 'table') {
3868     ## has a p element in scope
3869 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3870 wakaba 1.1 if ($_->[1] eq 'p') {
3871     unshift @{$self->{token}}, $token;
3872     $token = {type => 'end tag', tag_name => 'p'};
3873     return;
3874     } elsif ({
3875     table => 1, caption => 1, td => 1, th => 1,
3876     button => 1, marquee => 1, object => 1, html => 1,
3877     }->{$_->[1]}) {
3878     last INSCOPE;
3879     }
3880     } # INSCOPE
3881    
3882    
3883     {
3884     my $el;
3885    
3886     $el = $self->{document}->create_element_ns
3887     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3888    
3889     for my $attr_name (keys %{ $token->{attributes}}) {
3890     $el->set_attribute_ns (undef, [undef, $attr_name],
3891     $token->{attributes} ->{$attr_name}->{value});
3892     }
3893    
3894     $insert->($el);
3895 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3896 wakaba 1.1 }
3897    
3898    
3899 wakaba 1.3 $self->{insertion_mode} = 'in table';
3900 wakaba 1.1
3901     $token = $self->_get_next_token;
3902     return;
3903     } elsif ({
3904     area => 1, basefont => 1, bgsound => 1, br => 1,
3905     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
3906     image => 1,
3907     }->{$token->{tag_name}}) {
3908     if ($token->{tag_name} eq 'image') {
3909 wakaba 1.3 $self->{parse_error}-> (type => 'image');
3910 wakaba 1.1 $token->{tag_name} = 'img';
3911     }
3912 wakaba 1.31
3913     ## NOTE: There is an "as if <br>" code clone.
3914 wakaba 1.1 $reconstruct_active_formatting_elements->($insert_to_current);
3915    
3916    
3917     {
3918     my $el;
3919    
3920     $el = $self->{document}->create_element_ns
3921     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3922    
3923     for my $attr_name (keys %{ $token->{attributes}}) {
3924     $el->set_attribute_ns (undef, [undef, $attr_name],
3925     $token->{attributes} ->{$attr_name}->{value});
3926     }
3927    
3928     $insert->($el);
3929 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3930 wakaba 1.1 }
3931    
3932 wakaba 1.3 pop @{$self->{open_elements}};
3933 wakaba 1.1
3934     $token = $self->_get_next_token;
3935     return;
3936     } elsif ($token->{tag_name} eq 'hr') {
3937     ## has a p element in scope
3938 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3939 wakaba 1.1 if ($_->[1] eq 'p') {
3940     unshift @{$self->{token}}, $token;
3941     $token = {type => 'end tag', tag_name => 'p'};
3942     return;
3943     } elsif ({
3944     table => 1, caption => 1, td => 1, th => 1,
3945     button => 1, marquee => 1, object => 1, html => 1,
3946     }->{$_->[1]}) {
3947     last INSCOPE;
3948     }
3949     } # INSCOPE
3950    
3951    
3952     {
3953     my $el;
3954    
3955     $el = $self->{document}->create_element_ns
3956     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3957    
3958     for my $attr_name (keys %{ $token->{attributes}}) {
3959     $el->set_attribute_ns (undef, [undef, $attr_name],
3960     $token->{attributes} ->{$attr_name}->{value});
3961     }
3962    
3963     $insert->($el);
3964 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3965 wakaba 1.1 }
3966    
3967 wakaba 1.3 pop @{$self->{open_elements}};
3968 wakaba 1.1
3969     $token = $self->_get_next_token;
3970     return;
3971     } elsif ($token->{tag_name} eq 'input') {
3972     $reconstruct_active_formatting_elements->($insert_to_current);
3973    
3974    
3975     {
3976     my $el;
3977    
3978     $el = $self->{document}->create_element_ns
3979     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3980    
3981     for my $attr_name (keys %{ $token->{attributes}}) {
3982     $el->set_attribute_ns (undef, [undef, $attr_name],
3983     $token->{attributes} ->{$attr_name}->{value});
3984     }
3985    
3986     $insert->($el);
3987 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3988 wakaba 1.1 }
3989    
3990 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
3991     pop @{$self->{open_elements}};
3992 wakaba 1.1
3993     $token = $self->_get_next_token;
3994     return;
3995     } elsif ($token->{tag_name} eq 'isindex') {
3996 wakaba 1.3 $self->{parse_error}-> (type => 'isindex');
3997 wakaba 1.1
3998 wakaba 1.3 if (defined $self->{form_element}) {
3999 wakaba 1.1 ## Ignore the token
4000     $token = $self->_get_next_token;
4001     return;
4002     } else {
4003     my $at = $token->{attributes};
4004 wakaba 1.22 my $form_attrs;
4005     $form_attrs->{action} = $at->{action} if $at->{action};
4006     my $prompt_attr = $at->{prompt};
4007 wakaba 1.1 $at->{name} = {name => 'name', value => 'isindex'};
4008 wakaba 1.22 delete $at->{action};
4009     delete $at->{prompt};
4010 wakaba 1.1 my @tokens = (
4011 wakaba 1.22 {type => 'start tag', tag_name => 'form',
4012     attributes => $form_attrs},
4013 wakaba 1.1 {type => 'start tag', tag_name => 'hr'},
4014     {type => 'start tag', tag_name => 'p'},
4015     {type => 'start tag', tag_name => 'label'},
4016 wakaba 1.22 );
4017     if ($prompt_attr) {
4018     push @tokens, {type => 'character', data => $prompt_attr->{value}};
4019     } else {
4020     push @tokens, {type => 'character',
4021     data => 'This is a searchable index. Insert your search keywords here: '}; # SHOULD
4022     ## TODO: make this configurable
4023     }
4024     push @tokens,
4025 wakaba 1.1 {type => 'start tag', tag_name => 'input', attributes => $at},
4026     #{type => 'character', data => ''}, # SHOULD
4027     {type => 'end tag', tag_name => 'label'},
4028     {type => 'end tag', tag_name => 'p'},
4029     {type => 'start tag', tag_name => 'hr'},
4030 wakaba 1.22 {type => 'end tag', tag_name => 'form'};
4031 wakaba 1.1 $token = shift @tokens;
4032     unshift @{$self->{token}}, (@tokens);
4033     return;
4034     }
4035 wakaba 1.25 } elsif ($token->{tag_name} eq 'textarea') {
4036 wakaba 1.1 my $tag_name = $token->{tag_name};
4037     my $el;
4038    
4039     $el = $self->{document}->create_element_ns
4040     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4041    
4042     for my $attr_name (keys %{ $token->{attributes}}) {
4043     $el->set_attribute_ns (undef, [undef, $attr_name],
4044     $token->{attributes} ->{$attr_name}->{value});
4045     }
4046    
4047    
4048 wakaba 1.25 ## TODO: $self->{form_element} if defined
4049     $self->{content_model_flag} = 'RCDATA';
4050 wakaba 1.13 delete $self->{escape}; # MUST
4051 wakaba 1.1
4052     $insert->($el);
4053    
4054     my $text = '';
4055 wakaba 1.25 $token = $self->_get_next_token;
4056     if ($token->{type} eq 'character') {
4057     $token->{data} =~ s/^\x0A//;
4058     unless (length $token->{data}) {
4059     $token = $self->_get_next_token;
4060 wakaba 1.8 }
4061     }
4062 wakaba 1.1 while ($token->{type} eq 'character') {
4063     $text .= $token->{data};
4064     $token = $self->_get_next_token;
4065     }
4066     if (length $text) {
4067     $el->manakai_append_text ($text);
4068     }
4069    
4070     $self->{content_model_flag} = 'PCDATA';
4071    
4072     if ($token->{type} eq 'end tag' and
4073     $token->{tag_name} eq $tag_name) {
4074     ## Ignore the token
4075     } else {
4076 wakaba 1.25 $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
4077 wakaba 1.1 }
4078     $token = $self->_get_next_token;
4079     return;
4080 wakaba 1.25 } elsif ({
4081     iframe => 1,
4082     noembed => 1,
4083     noframes => 1,
4084     noscript => 0, ## TODO: 1 if scripting is enabled
4085     }->{$token->{tag_name}}) {
4086     $parse_rcdata->('CDATA', $insert);
4087     return;
4088 wakaba 1.1 } elsif ($token->{tag_name} eq 'select') {
4089     $reconstruct_active_formatting_elements->($insert_to_current);
4090    
4091    
4092     {
4093     my $el;
4094    
4095     $el = $self->{document}->create_element_ns
4096     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4097    
4098     for my $attr_name (keys %{ $token->{attributes}}) {
4099     $el->set_attribute_ns (undef, [undef, $attr_name],
4100     $token->{attributes} ->{$attr_name}->{value});
4101     }
4102    
4103     $insert->($el);
4104 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4105 wakaba 1.1 }
4106    
4107    
4108 wakaba 1.3 $self->{insertion_mode} = 'in select';
4109 wakaba 1.1 $token = $self->_get_next_token;
4110     return;
4111     } elsif ({
4112     caption => 1, col => 1, colgroup => 1, frame => 1,
4113     frameset => 1, head => 1, option => 1, optgroup => 1,
4114     tbody => 1, td => 1, tfoot => 1, th => 1,
4115     thead => 1, tr => 1,
4116     }->{$token->{tag_name}}) {
4117 wakaba 1.3 $self->{parse_error}-> (type => 'in body:'.$token->{tag_name});
4118 wakaba 1.1 ## Ignore the token
4119     $token = $self->_get_next_token;
4120     return;
4121    
4122     ## ISSUE: An issue on HTML5 new elements in the spec.
4123     } else {
4124     $reconstruct_active_formatting_elements->($insert_to_current);
4125    
4126    
4127     {
4128     my $el;
4129    
4130     $el = $self->{document}->create_element_ns
4131     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4132    
4133     for my $attr_name (keys %{ $token->{attributes}}) {
4134     $el->set_attribute_ns (undef, [undef, $attr_name],
4135     $token->{attributes} ->{$attr_name}->{value});
4136     }
4137    
4138     $insert->($el);
4139 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4140 wakaba 1.1 }
4141    
4142    
4143     $token = $self->_get_next_token;
4144     return;
4145     }
4146     } elsif ($token->{type} eq 'end tag') {
4147     if ($token->{tag_name} eq 'body') {
4148 wakaba 1.20 if (@{$self->{open_elements}} > 1 and
4149     $self->{open_elements}->[1]->[1] eq 'body') {
4150     for (@{$self->{open_elements}}) {
4151     unless ({
4152     dd => 1, dt => 1, li => 1, p => 1, td => 1,
4153     th => 1, tr => 1, body => 1, html => 1,
4154 wakaba 1.31 tbody => 1, tfoot => 1, thead => 1,
4155 wakaba 1.20 }->{$_->[1]}) {
4156     $self->{parse_error}-> (type => 'not closed:'.$_->[1]);
4157     }
4158 wakaba 1.1 }
4159 wakaba 1.20
4160 wakaba 1.3 $self->{insertion_mode} = 'after body';
4161 wakaba 1.1 $token = $self->_get_next_token;
4162     return;
4163     } else {
4164 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4165 wakaba 1.1 ## Ignore the token
4166     $token = $self->_get_next_token;
4167     return;
4168     }
4169     } elsif ($token->{tag_name} eq 'html') {
4170 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
4171 wakaba 1.1 ## ISSUE: There is an issue in the spec.
4172 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
4173     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
4174 wakaba 1.1 }
4175 wakaba 1.3 $self->{insertion_mode} = 'after body';
4176 wakaba 1.1 ## reprocess
4177     return;
4178     } else {
4179 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4180 wakaba 1.1 ## Ignore the token
4181     $token = $self->_get_next_token;
4182     return;
4183     }
4184     } elsif ({
4185     address => 1, blockquote => 1, center => 1, dir => 1,
4186     div => 1, dl => 1, fieldset => 1, listing => 1,
4187     menu => 1, ol => 1, pre => 1, ul => 1,
4188     p => 1,
4189     dd => 1, dt => 1, li => 1,
4190     button => 1, marquee => 1, object => 1,
4191     }->{$token->{tag_name}}) {
4192     ## has an element in scope
4193     my $i;
4194 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4195     my $node = $self->{open_elements}->[$_];
4196 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4197     ## generate implied end tags
4198     if ({
4199     dd => ($token->{tag_name} ne 'dd'),
4200     dt => ($token->{tag_name} ne 'dt'),
4201     li => ($token->{tag_name} ne 'li'),
4202     p => ($token->{tag_name} ne 'p'),
4203     td => 1, th => 1, tr => 1,
4204 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4205 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4206 wakaba 1.1 unshift @{$self->{token}}, $token;
4207     $token = {type => 'end tag',
4208 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4209 wakaba 1.1 return;
4210     }
4211     $i = $_;
4212     last INSCOPE unless $token->{tag_name} eq 'p';
4213     } elsif ({
4214     table => 1, caption => 1, td => 1, th => 1,
4215     button => 1, marquee => 1, object => 1, html => 1,
4216     }->{$node->[1]}) {
4217     last INSCOPE;
4218     }
4219     } # INSCOPE
4220    
4221 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4222 wakaba 1.32 if (defined $i) {
4223     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4224     } else {
4225     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4226     }
4227 wakaba 1.1 }
4228    
4229 wakaba 1.31 if (defined $i) {
4230     splice @{$self->{open_elements}}, $i;
4231     } elsif ($token->{tag_name} eq 'p') {
4232     ## As if <p>, then reprocess the current token
4233     my $el;
4234    
4235     $el = $self->{document}->create_element_ns
4236     (q<http://www.w3.org/1999/xhtml>, [undef, 'p']);
4237    
4238     $insert->($el);
4239     }
4240 wakaba 1.1 $clear_up_to_marker->()
4241     if {
4242     button => 1, marquee => 1, object => 1,
4243     }->{$token->{tag_name}};
4244 wakaba 1.12 $token = $self->_get_next_token;
4245     return;
4246     } elsif ($token->{tag_name} eq 'form') {
4247     ## has an element in scope
4248     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4249     my $node = $self->{open_elements}->[$_];
4250     if ($node->[1] eq $token->{tag_name}) {
4251     ## generate implied end tags
4252     if ({
4253     dd => 1, dt => 1, li => 1, p => 1,
4254     td => 1, th => 1, tr => 1,
4255 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4256 wakaba 1.12 }->{$self->{open_elements}->[-1]->[1]}) {
4257     unshift @{$self->{token}}, $token;
4258     $token = {type => 'end tag',
4259     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4260     return;
4261     }
4262     last INSCOPE;
4263     } elsif ({
4264     table => 1, caption => 1, td => 1, th => 1,
4265     button => 1, marquee => 1, object => 1, html => 1,
4266     }->{$node->[1]}) {
4267     last INSCOPE;
4268     }
4269     } # INSCOPE
4270    
4271     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
4272     pop @{$self->{open_elements}};
4273     } else {
4274     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4275     }
4276    
4277     undef $self->{form_element};
4278 wakaba 1.1 $token = $self->_get_next_token;
4279     return;
4280     } elsif ({
4281     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
4282     }->{$token->{tag_name}}) {
4283     ## has an element in scope
4284     my $i;
4285 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4286     my $node = $self->{open_elements}->[$_];
4287 wakaba 1.1 if ({
4288     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
4289     }->{$node->[1]}) {
4290     ## generate implied end tags
4291     if ({
4292     dd => 1, dt => 1, li => 1, p => 1,
4293     td => 1, th => 1, tr => 1,
4294 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4295 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4296 wakaba 1.1 unshift @{$self->{token}}, $token;
4297     $token = {type => 'end tag',
4298 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4299 wakaba 1.1 return;
4300     }
4301     $i = $_;
4302     last INSCOPE;
4303     } elsif ({
4304     table => 1, caption => 1, td => 1, th => 1,
4305     button => 1, marquee => 1, object => 1, html => 1,
4306     }->{$node->[1]}) {
4307     last INSCOPE;
4308     }
4309     } # INSCOPE
4310    
4311 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4312     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4313 wakaba 1.1 }
4314    
4315 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
4316 wakaba 1.1 $token = $self->_get_next_token;
4317     return;
4318     } elsif ({
4319     a => 1,
4320     b => 1, big => 1, em => 1, font => 1, i => 1,
4321     nobr => 1, s => 1, small => 1, strile => 1,
4322     strong => 1, tt => 1, u => 1,
4323     }->{$token->{tag_name}}) {
4324     $formatting_end_tag->($token->{tag_name});
4325 wakaba 1.31 return;
4326     } elsif ($token->{tag_name} eq 'br') {
4327     $self->{parse_error}-> (type => 'unmatched end tag:br');
4328    
4329     ## As if <br>
4330     $reconstruct_active_formatting_elements->($insert_to_current);
4331    
4332     my $el;
4333    
4334     $el = $self->{document}->create_element_ns
4335     (q<http://www.w3.org/1999/xhtml>, [undef, 'br']);
4336    
4337     $insert->($el);
4338    
4339     ## Ignore the token.
4340     $token = $self->_get_next_token;
4341 wakaba 1.1 return;
4342     } elsif ({
4343     caption => 1, col => 1, colgroup => 1, frame => 1,
4344     frameset => 1, head => 1, option => 1, optgroup => 1,
4345     tbody => 1, td => 1, tfoot => 1, th => 1,
4346     thead => 1, tr => 1,
4347 wakaba 1.31 area => 1, basefont => 1, bgsound => 1,
4348 wakaba 1.1 embed => 1, hr => 1, iframe => 1, image => 1,
4349 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
4350 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
4351     table => 1, textarea => 1, wbr => 1,
4352     noscript => 0, ## TODO: if scripting is enabled
4353     }->{$token->{tag_name}}) {
4354 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4355 wakaba 1.1 ## Ignore the token
4356     $token = $self->_get_next_token;
4357     return;
4358    
4359     ## ISSUE: Issue on HTML5 new elements in spec
4360    
4361     } else {
4362     ## Step 1
4363     my $node_i = -1;
4364 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
4365 wakaba 1.1
4366     ## Step 2
4367     S2: {
4368     if ($node->[1] eq $token->{tag_name}) {
4369     ## Step 1
4370     ## generate implied end tags
4371     if ({
4372     dd => 1, dt => 1, li => 1, p => 1,
4373     td => 1, th => 1, tr => 1,
4374 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4375 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4376 wakaba 1.1 unshift @{$self->{token}}, $token;
4377     $token = {type => 'end tag',
4378 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4379 wakaba 1.1 return;
4380     }
4381    
4382     ## Step 2
4383 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
4384     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4385 wakaba 1.1 }
4386    
4387     ## Step 3
4388 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
4389    
4390     $token = $self->_get_next_token;
4391 wakaba 1.1 last S2;
4392     } else {
4393     ## Step 3
4394     if (not $formatting_category->{$node->[1]} and
4395     #not $phrasing_category->{$node->[1]} and
4396     ($special_category->{$node->[1]} or
4397     $scoping_category->{$node->[1]})) {
4398 wakaba 1.25 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4399 wakaba 1.1 ## Ignore the token
4400     $token = $self->_get_next_token;
4401     last S2;
4402     }
4403     }
4404    
4405     ## Step 4
4406     $node_i--;
4407 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
4408 wakaba 1.1
4409     ## Step 5;
4410     redo S2;
4411     } # S2
4412 wakaba 1.3 return;
4413 wakaba 1.1 }
4414     }
4415     }; # $in_body
4416    
4417     B: {
4418 wakaba 1.3 if ($phase eq 'main') {
4419 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
4420 wakaba 1.3 $self->{parse_error}-> (type => 'in html:#DOCTYPE');
4421 wakaba 1.1 ## Ignore the token
4422     ## Stay in the phase
4423     $token = $self->_get_next_token;
4424     redo B;
4425     } elsif ($token->{type} eq 'start tag' and
4426     $token->{tag_name} eq 'html') {
4427 wakaba 1.29 ## ISSUE: "aa<html>" is not a parse error.
4428     ## ISSUE: "<html>" in fragment is not a parse error.
4429 wakaba 1.28 unless ($token->{first_start_tag}) {
4430     $self->{parse_error}-> (type => 'not first start tag');
4431     }
4432 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
4433 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
4434     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
4435     $top_el->set_attribute_ns
4436     (undef, [undef, $attr_name],
4437     $token->{attributes}->{$attr_name}->{value});
4438     }
4439     }
4440     $token = $self->_get_next_token;
4441     redo B;
4442     } elsif ($token->{type} eq 'end-of-file') {
4443     ## Generate implied end tags
4444     if ({
4445     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
4446 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4447 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4448 wakaba 1.1 unshift @{$self->{token}}, $token;
4449 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
4450 wakaba 1.1 redo B;
4451     }
4452    
4453 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
4454     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
4455     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4456     } elsif (defined $self->{inner_html_node} and
4457     @{$self->{open_elements}} > 1 and
4458     $self->{open_elements}->[1]->[1] ne 'body') {
4459     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4460 wakaba 1.1 }
4461    
4462     ## Stop parsing
4463     last B;
4464    
4465     ## ISSUE: There is an issue in the spec.
4466     } else {
4467 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
4468 wakaba 1.1 if ($token->{type} eq 'character') {
4469     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4470 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4471 wakaba 1.1 unless (length $token->{data}) {
4472     $token = $self->_get_next_token;
4473     redo B;
4474     }
4475     }
4476     ## As if <head>
4477    
4478 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4479 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4480    
4481 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4482     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4483     $self->{insertion_mode} = 'in head';
4484 wakaba 1.1 ## reprocess
4485     redo B;
4486     } elsif ($token->{type} eq 'comment') {
4487     my $comment = $self->{document}->create_comment ($token->{data});
4488 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4489 wakaba 1.1 $token = $self->_get_next_token;
4490     redo B;
4491     } elsif ($token->{type} eq 'start tag') {
4492     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
4493    
4494 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4495 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4496    
4497     for my $attr_name (keys %{ $attr}) {
4498 wakaba 1.3 $self->{head_element}->set_attribute_ns (undef, [undef, $attr_name],
4499 wakaba 1.1 $attr ->{$attr_name}->{value});
4500     }
4501    
4502 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4503     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4504     $self->{insertion_mode} = 'in head';
4505 wakaba 1.1 if ($token->{tag_name} eq 'head') {
4506     $token = $self->_get_next_token;
4507     #} elsif ({
4508     # base => 1, link => 1, meta => 1,
4509     # script => 1, style => 1, title => 1,
4510     # }->{$token->{tag_name}}) {
4511     # ## reprocess
4512     } else {
4513     ## reprocess
4514     }
4515     redo B;
4516     } elsif ($token->{type} eq 'end tag') {
4517 wakaba 1.31 if ({
4518     head => 1, body => 1, html => 1,
4519     p => 1, br => 1,
4520     }->{$token->{tag_name}}) {
4521 wakaba 1.1 ## As if <head>
4522    
4523 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4524 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4525    
4526 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4527     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4528     $self->{insertion_mode} = 'in head';
4529 wakaba 1.1 ## reprocess
4530     redo B;
4531     } else {
4532 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4533 wakaba 1.21 ## Ignore the token ## ISSUE: An issue in the spec.
4534 wakaba 1.1 $token = $self->_get_next_token;
4535     redo B;
4536     }
4537     } else {
4538     die "$0: $token->{type}: Unknown type";
4539     }
4540 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'in head' or
4541     $self->{insertion_mode} eq 'in head noscript' or
4542     $self->{insertion_mode} eq 'after head') {
4543 wakaba 1.1 if ($token->{type} eq 'character') {
4544     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4545 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4546 wakaba 1.1 unless (length $token->{data}) {
4547     $token = $self->_get_next_token;
4548     redo B;
4549     }
4550     }
4551    
4552     #
4553     } elsif ($token->{type} eq 'comment') {
4554     my $comment = $self->{document}->create_comment ($token->{data});
4555 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4556 wakaba 1.1 $token = $self->_get_next_token;
4557     redo B;
4558     } elsif ($token->{type} eq 'start tag') {
4559 wakaba 1.25 if ({base => ($self->{insertion_mode} eq 'in head' or
4560     $self->{insertion_mode} eq 'after head'),
4561     link => 1, meta => 1}->{$token->{tag_name}}) {
4562     ## NOTE: There is a "as if in head" code clone.
4563     if ($self->{insertion_mode} eq 'after head') {
4564     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4565     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4566     }
4567 wakaba 1.1
4568 wakaba 1.25 {
4569     my $el;
4570    
4571     $el = $self->{document}->create_element_ns
4572     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4573 wakaba 1.1
4574 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
4575     $el->set_attribute_ns (undef, [undef, $attr_name],
4576     $token->{attributes} ->{$attr_name}->{value});
4577 wakaba 1.1 }
4578    
4579 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($el);
4580     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4581     }
4582    
4583     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4584 wakaba 1.26 ## TODO: Extracting |charset| from |meta|.
4585 wakaba 1.25 pop @{$self->{open_elements}}
4586     if $self->{insertion_mode} eq 'after head';
4587 wakaba 1.1 $token = $self->_get_next_token;
4588 wakaba 1.25 redo B;
4589     } elsif ($token->{tag_name} eq 'title' and
4590     $self->{insertion_mode} eq 'in head') {
4591     ## NOTE: There is a "as if in head" code clone.
4592     if ($self->{insertion_mode} eq 'after head') {
4593     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4594     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4595     }
4596 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
4597     : $self->{open_elements}->[-1]->[0];
4598     $parse_rcdata->('RCDATA', sub { $parent->append_child ($_[0]) });
4599 wakaba 1.25 pop @{$self->{open_elements}}
4600     if $self->{insertion_mode} eq 'after head';
4601 wakaba 1.1 redo B;
4602     } elsif ($token->{tag_name} eq 'style') {
4603 wakaba 1.25 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4604     ## insertion mode 'in head')
4605     ## NOTE: There is a "as if in head" code clone.
4606     if ($self->{insertion_mode} eq 'after head') {
4607     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4608     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4609     }
4610     $parse_rcdata->('CDATA', $insert_to_current);
4611     pop @{$self->{open_elements}}
4612     if $self->{insertion_mode} eq 'after head';
4613     redo B;
4614     } elsif ($token->{tag_name} eq 'noscript') {
4615     if ($self->{insertion_mode} eq 'in head') {
4616     ## NOTE: and scripting is disalbed
4617    
4618     {
4619     my $el;
4620    
4621 wakaba 1.1 $el = $self->{document}->create_element_ns
4622     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4623    
4624 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
4625 wakaba 1.1 $el->set_attribute_ns (undef, [undef, $attr_name],
4626 wakaba 1.25 $token->{attributes} ->{$attr_name}->{value});
4627 wakaba 1.1 }
4628    
4629 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($el);
4630     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4631     }
4632    
4633     $self->{insertion_mode} = 'in head noscript';
4634     $token = $self->_get_next_token;
4635     redo B;
4636     } elsif ($self->{insertion_mode} eq 'in head noscript') {
4637 wakaba 1.30 $self->{parse_error}-> (type => 'in noscript:noscript');
4638 wakaba 1.25 ## Ignore the token
4639     redo B;
4640 wakaba 1.24 } else {
4641 wakaba 1.25 #
4642 wakaba 1.24 }
4643 wakaba 1.25 } elsif ($token->{tag_name} eq 'head' and
4644     $self->{insertion_mode} ne 'after head') {
4645     $self->{parse_error}-> (type => 'in head:head'); # or in head noscript
4646 wakaba 1.1 ## Ignore the token
4647     $token = $self->_get_next_token;
4648     redo B;
4649 wakaba 1.25 } elsif ($self->{insertion_mode} ne 'in head noscript' and
4650     $token->{tag_name} eq 'script') {
4651     if ($self->{insertion_mode} eq 'after head') {
4652     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4653     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4654     }
4655     ## NOTE: There is a "as if in head" code clone.
4656     $script_start_tag->($insert_to_current);
4657     pop @{$self->{open_elements}}
4658     if $self->{insertion_mode} eq 'after head';
4659 wakaba 1.1 redo B;
4660 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'after head' and
4661     $token->{tag_name} eq 'body') {
4662 wakaba 1.1
4663     {
4664     my $el;
4665    
4666     $el = $self->{document}->create_element_ns
4667     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
4668    
4669     for my $attr_name (keys %{ $token->{attributes}}) {
4670     $el->set_attribute_ns (undef, [undef, $attr_name],
4671     $token->{attributes} ->{$attr_name}->{value});
4672     }
4673    
4674 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4675     push @{$self->{open_elements}}, [$el, 'body'];
4676 wakaba 1.1 }
4677    
4678 wakaba 1.3 $self->{insertion_mode} = 'in body';
4679 wakaba 1.1 $token = $self->_get_next_token;
4680     redo B;
4681 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'after head' and
4682     $token->{tag_name} eq 'frameset') {
4683 wakaba 1.1
4684     {
4685     my $el;
4686    
4687     $el = $self->{document}->create_element_ns
4688     (q<http://www.w3.org/1999/xhtml>, [undef, 'frameset']);
4689    
4690     for my $attr_name (keys %{ $token->{attributes}}) {
4691     $el->set_attribute_ns (undef, [undef, $attr_name],
4692     $token->{attributes} ->{$attr_name}->{value});
4693     }
4694    
4695 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4696     push @{$self->{open_elements}}, [$el, 'frameset'];
4697 wakaba 1.1 }
4698    
4699 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
4700 wakaba 1.1 $token = $self->_get_next_token;
4701     redo B;
4702 wakaba 1.25 } else {
4703     #
4704     }
4705     } elsif ($token->{type} eq 'end tag') {
4706     if ($self->{insertion_mode} eq 'in head' and
4707     $token->{tag_name} eq 'head') {
4708     pop @{$self->{open_elements}};
4709     $self->{insertion_mode} = 'after head';
4710     $token = $self->_get_next_token;
4711     redo B;
4712     } elsif ($self->{insertion_mode} eq 'in head noscript' and
4713     $token->{tag_name} eq 'noscript') {
4714     pop @{$self->{open_elements}};
4715 wakaba 1.3 $self->{insertion_mode} = 'in head';
4716 wakaba 1.25 $token = $self->_get_next_token;
4717     redo B;
4718     } elsif ($self->{insertion_mode} eq 'in head' and
4719 wakaba 1.31 {
4720     body => 1, html => 1,
4721     p => 1, br => 1,
4722     }->{$token->{tag_name}}) {
4723     #
4724     } elsif ($self->{insertion_mode} eq 'in head noscript' and
4725     {
4726     p => 1, br => 1,
4727     }->{$token->{tag_name}}) {
4728 wakaba 1.25 #
4729     } elsif ($self->{insertion_mode} ne 'after head') {
4730     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4731     ## Ignore the token
4732     $token = $self->_get_next_token;
4733 wakaba 1.1 redo B;
4734     } else {
4735 wakaba 1.25 #
4736     }
4737 wakaba 1.1 } else {
4738     #
4739     }
4740 wakaba 1.25
4741     ## As if </head> or </noscript> or <body>
4742     if ($self->{insertion_mode} eq 'in head') {
4743     pop @{$self->{open_elements}};
4744     $self->{insertion_mode} = 'after head';
4745     } elsif ($self->{insertion_mode} eq 'in head noscript') {
4746     pop @{$self->{open_elements}};
4747     $self->{parse_error}-> (type => 'in noscript:'.(defined $token->{tag_name} ? ($token->{type} eq 'end tag' ? '/' : '') . $token->{tag_name} : '#' . $token->{type}));
4748     $self->{insertion_mode} = 'in head';
4749     } else { # 'after head'
4750    
4751 wakaba 1.1 {
4752     my $el;
4753    
4754     $el = $self->{document}->create_element_ns
4755     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
4756    
4757 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4758     push @{$self->{open_elements}}, [$el, 'body'];
4759 wakaba 1.1 }
4760    
4761 wakaba 1.25 $self->{insertion_mode} = 'in body';
4762     }
4763 wakaba 1.1 ## reprocess
4764     redo B;
4765 wakaba 1.25
4766     ## ISSUE: An issue in the spec.
4767 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
4768 wakaba 1.1 if ($token->{type} eq 'character') {
4769     ## NOTE: There is a code clone of "character in body".
4770     $reconstruct_active_formatting_elements->($insert_to_current);
4771    
4772 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4773 wakaba 1.1
4774     $token = $self->_get_next_token;
4775     redo B;
4776     } elsif ($token->{type} eq 'comment') {
4777     ## NOTE: There is a code clone of "comment in body".
4778     my $comment = $self->{document}->create_comment ($token->{data});
4779 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4780 wakaba 1.1 $token = $self->_get_next_token;
4781     redo B;
4782     } else {
4783     $in_body->($insert_to_current);
4784     redo B;
4785     }
4786 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
4787 wakaba 1.1 if ($token->{type} eq 'character') {
4788     ## NOTE: There are "character in table" code clones.
4789     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4790 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4791 wakaba 1.1
4792     unless (length $token->{data}) {
4793     $token = $self->_get_next_token;
4794     redo B;
4795     }
4796     }
4797    
4798 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
4799    
4800 wakaba 1.1 ## As if in body, but insert into foster parent element
4801     ## ISSUE: Spec says that "whenever a node would be inserted
4802     ## into the current node" while characters might not be
4803     ## result in a new Text node.
4804     $reconstruct_active_formatting_elements->($insert_to_foster);
4805    
4806     if ({
4807     table => 1, tbody => 1, tfoot => 1,
4808     thead => 1, tr => 1,
4809 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4810 wakaba 1.1 # MUST
4811     my $foster_parent_element;
4812     my $next_sibling;
4813     my $prev_sibling;
4814 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4815     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4816     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4817 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4818     $foster_parent_element = $parent;
4819 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4820 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4821     } else {
4822 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4823 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4824     }
4825     last OE;
4826     }
4827     } # OE
4828 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4829 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4830     unless defined $foster_parent_element;
4831     if (defined $prev_sibling and
4832     $prev_sibling->node_type == 3) {
4833     $prev_sibling->manakai_append_text ($token->{data});
4834     } else {
4835     $foster_parent_element->insert_before
4836     ($self->{document}->create_text_node ($token->{data}),
4837     $next_sibling);
4838     }
4839     } else {
4840 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4841 wakaba 1.1 }
4842    
4843     $token = $self->_get_next_token;
4844     redo B;
4845     } elsif ($token->{type} eq 'comment') {
4846     my $comment = $self->{document}->create_comment ($token->{data});
4847 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4848 wakaba 1.1 $token = $self->_get_next_token;
4849     redo B;
4850     } elsif ($token->{type} eq 'start tag') {
4851     if ({
4852     caption => 1,
4853     colgroup => 1,
4854     tbody => 1, tfoot => 1, thead => 1,
4855     }->{$token->{tag_name}}) {
4856     ## Clear back to table context
4857 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
4858     $self->{open_elements}->[-1]->[1] ne 'html') {
4859     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4860     pop @{$self->{open_elements}};
4861 wakaba 1.1 }
4862    
4863     push @$active_formatting_elements, ['#marker', '']
4864     if $token->{tag_name} eq 'caption';
4865    
4866    
4867     {
4868     my $el;
4869    
4870     $el = $self->{document}->create_element_ns
4871     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4872    
4873     for my $attr_name (keys %{ $token->{attributes}}) {
4874     $el->set_attribute_ns (undef, [undef, $attr_name],
4875     $token->{attributes} ->{$attr_name}->{value});
4876     }
4877    
4878 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4879     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4880 wakaba 1.1 }
4881    
4882 wakaba 1.3 $self->{insertion_mode} = {
4883 wakaba 1.1 caption => 'in caption',
4884     colgroup => 'in column group',
4885     tbody => 'in table body',
4886     tfoot => 'in table body',
4887     thead => 'in table body',
4888     }->{$token->{tag_name}};
4889     $token = $self->_get_next_token;
4890     redo B;
4891     } elsif ({
4892     col => 1,
4893     td => 1, th => 1, tr => 1,
4894     }->{$token->{tag_name}}) {
4895     ## Clear back to table context
4896 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
4897     $self->{open_elements}->[-1]->[1] ne 'html') {
4898     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4899     pop @{$self->{open_elements}};
4900 wakaba 1.1 }
4901    
4902    
4903     {
4904     my $el;
4905    
4906     $el = $self->{document}->create_element_ns
4907     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name} eq 'col' ? 'colgroup' : 'tbody']);
4908    
4909 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4910     push @{$self->{open_elements}}, [$el, $token->{tag_name} eq 'col' ? 'colgroup' : 'tbody'];
4911 wakaba 1.1 }
4912    
4913 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
4914 wakaba 1.1 ? 'in column group' : 'in table body';
4915     ## reprocess
4916     redo B;
4917     } elsif ($token->{tag_name} eq 'table') {
4918     ## NOTE: There are code clones for this "table in table"
4919 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4920 wakaba 1.1
4921     ## As if </table>
4922     ## have a table element in table scope
4923     my $i;
4924 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4925     my $node = $self->{open_elements}->[$_];
4926 wakaba 1.1 if ($node->[1] eq 'table') {
4927     $i = $_;
4928     last INSCOPE;
4929     } elsif ({
4930     table => 1, html => 1,
4931     }->{$node->[1]}) {
4932     last INSCOPE;
4933     }
4934     } # INSCOPE
4935     unless (defined $i) {
4936 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
4937 wakaba 1.1 ## Ignore tokens </table><table>
4938     $token = $self->_get_next_token;
4939     redo B;
4940     }
4941    
4942     ## generate implied end tags
4943     if ({
4944     dd => 1, dt => 1, li => 1, p => 1,
4945     td => 1, th => 1, tr => 1,
4946 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4947 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4948 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
4949     $token = {type => 'end tag', tag_name => 'table'};
4950     unshift @{$self->{token}}, $token;
4951     $token = {type => 'end tag',
4952 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4953 wakaba 1.1 redo B;
4954     }
4955    
4956 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4957     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4958 wakaba 1.1 }
4959    
4960 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4961 wakaba 1.1
4962 wakaba 1.3 $self->_reset_insertion_mode;
4963 wakaba 1.1
4964     ## reprocess
4965     redo B;
4966     } else {
4967     #
4968     }
4969     } elsif ($token->{type} eq 'end tag') {
4970     if ($token->{tag_name} eq 'table') {
4971     ## have a table element in table scope
4972     my $i;
4973 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4974     my $node = $self->{open_elements}->[$_];
4975 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4976     $i = $_;
4977     last INSCOPE;
4978     } elsif ({
4979     table => 1, html => 1,
4980     }->{$node->[1]}) {
4981     last INSCOPE;
4982     }
4983     } # INSCOPE
4984     unless (defined $i) {
4985 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4986 wakaba 1.1 ## Ignore the token
4987     $token = $self->_get_next_token;
4988     redo B;
4989     }
4990    
4991     ## generate implied end tags
4992     if ({
4993     dd => 1, dt => 1, li => 1, p => 1,
4994     td => 1, th => 1, tr => 1,
4995 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
4996 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4997 wakaba 1.1 unshift @{$self->{token}}, $token;
4998     $token = {type => 'end tag',
4999 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5000 wakaba 1.1 redo B;
5001     }
5002    
5003 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5004     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5005 wakaba 1.1 }
5006    
5007 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5008 wakaba 1.1
5009 wakaba 1.3 $self->_reset_insertion_mode;
5010 wakaba 1.1
5011     $token = $self->_get_next_token;
5012     redo B;
5013     } elsif ({
5014     body => 1, caption => 1, col => 1, colgroup => 1,
5015     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
5016     thead => 1, tr => 1,
5017     }->{$token->{tag_name}}) {
5018 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5019 wakaba 1.1 ## Ignore the token
5020     $token = $self->_get_next_token;
5021     redo B;
5022     } else {
5023     #
5024     }
5025     } else {
5026     #
5027     }
5028    
5029 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
5030 wakaba 1.1 $in_body->($insert_to_foster);
5031     redo B;
5032 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
5033 wakaba 1.1 if ($token->{type} eq 'character') {
5034     ## NOTE: This is a code clone of "character in body".
5035     $reconstruct_active_formatting_elements->($insert_to_current);
5036    
5037 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5038 wakaba 1.1
5039     $token = $self->_get_next_token;
5040     redo B;
5041     } elsif ($token->{type} eq 'comment') {
5042     ## NOTE: This is a code clone of "comment in body".
5043     my $comment = $self->{document}->create_comment ($token->{data});
5044 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5045 wakaba 1.1 $token = $self->_get_next_token;
5046     redo B;
5047     } elsif ($token->{type} eq 'start tag') {
5048     if ({
5049     caption => 1, col => 1, colgroup => 1, tbody => 1,
5050     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
5051     }->{$token->{tag_name}}) {
5052 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:caption');
5053 wakaba 1.1
5054     ## As if </caption>
5055     ## have a table element in table scope
5056     my $i;
5057 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5058     my $node = $self->{open_elements}->[$_];
5059 wakaba 1.1 if ($node->[1] eq 'caption') {
5060     $i = $_;
5061     last INSCOPE;
5062     } elsif ({
5063     table => 1, html => 1,
5064     }->{$node->[1]}) {
5065     last INSCOPE;
5066     }
5067     } # INSCOPE
5068     unless (defined $i) {
5069 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:caption');
5070 wakaba 1.1 ## Ignore the token
5071     $token = $self->_get_next_token;
5072     redo B;
5073     }
5074    
5075     ## generate implied end tags
5076     if ({
5077     dd => 1, dt => 1, li => 1, p => 1,
5078     td => 1, th => 1, tr => 1,
5079 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
5080 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5081 wakaba 1.1 unshift @{$self->{token}}, $token; # <?>
5082     $token = {type => 'end tag', tag_name => 'caption'};
5083     unshift @{$self->{token}}, $token;
5084     $token = {type => 'end tag',
5085 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5086 wakaba 1.1 redo B;
5087     }
5088    
5089 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
5090     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5091 wakaba 1.1 }
5092    
5093 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5094 wakaba 1.1
5095     $clear_up_to_marker->();
5096    
5097 wakaba 1.3 $self->{insertion_mode} = 'in table';
5098 wakaba 1.1
5099     ## reprocess
5100     redo B;
5101     } else {
5102     #
5103     }
5104     } elsif ($token->{type} eq 'end tag') {
5105     if ($token->{tag_name} eq 'caption') {
5106     ## have a table element in table scope
5107     my $i;
5108 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5109     my $node = $self->{open_elements}->[$_];
5110 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5111     $i = $_;
5112     last INSCOPE;
5113     } elsif ({
5114     table => 1, html => 1,
5115     }->{$node->[1]}) {
5116     last INSCOPE;
5117     }
5118     } # INSCOPE
5119     unless (defined $i) {
5120 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5121 wakaba 1.1 ## Ignore the token
5122     $token = $self->_get_next_token;
5123     redo B;
5124     }
5125    
5126     ## generate implied end tags
5127     if ({
5128     dd => 1, dt => 1, li => 1, p => 1,
5129     td => 1, th => 1, tr => 1,
5130 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
5131 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5132 wakaba 1.1 unshift @{$self->{token}}, $token;
5133     $token = {type => 'end tag',
5134 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5135 wakaba 1.1 redo B;
5136     }
5137    
5138 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
5139     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5140 wakaba 1.1 }
5141    
5142 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5143 wakaba 1.1
5144     $clear_up_to_marker->();
5145    
5146 wakaba 1.3 $self->{insertion_mode} = 'in table';
5147 wakaba 1.1
5148     $token = $self->_get_next_token;
5149     redo B;
5150     } elsif ($token->{tag_name} eq 'table') {
5151 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:caption');
5152 wakaba 1.1
5153     ## As if </caption>
5154     ## have a table element in table scope
5155     my $i;
5156 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5157     my $node = $self->{open_elements}->[$_];
5158 wakaba 1.1 if ($node->[1] eq 'caption') {
5159     $i = $_;
5160     last INSCOPE;
5161     } elsif ({
5162     table => 1, html => 1,
5163     }->{$node->[1]}) {
5164     last INSCOPE;
5165     }
5166     } # INSCOPE
5167     unless (defined $i) {
5168 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:caption');
5169 wakaba 1.1 ## Ignore the token
5170     $token = $self->_get_next_token;
5171     redo B;
5172     }
5173    
5174     ## generate implied end tags
5175     if ({
5176     dd => 1, dt => 1, li => 1, p => 1,
5177     td => 1, th => 1, tr => 1,
5178 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
5179 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5180 wakaba 1.1 unshift @{$self->{token}}, $token; # </table>
5181     $token = {type => 'end tag', tag_name => 'caption'};
5182     unshift @{$self->{token}}, $token;
5183     $token = {type => 'end tag',
5184 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5185 wakaba 1.1 redo B;
5186     }
5187    
5188 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
5189     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5190 wakaba 1.1 }
5191    
5192 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5193 wakaba 1.1
5194     $clear_up_to_marker->();
5195    
5196 wakaba 1.3 $self->{insertion_mode} = 'in table';
5197 wakaba 1.1
5198     ## reprocess
5199     redo B;
5200     } elsif ({
5201     body => 1, col => 1, colgroup => 1,
5202     html => 1, tbody => 1, td => 1, tfoot => 1,
5203     th => 1, thead => 1, tr => 1,
5204     }->{$token->{tag_name}}) {
5205 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5206 wakaba 1.1 ## Ignore the token
5207     redo B;
5208     } else {
5209     #
5210     }
5211     } else {
5212     #
5213     }
5214    
5215     $in_body->($insert_to_current);
5216     redo B;
5217 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
5218 wakaba 1.1 if ($token->{type} eq 'character') {
5219     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5220 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5221 wakaba 1.1 unless (length $token->{data}) {
5222     $token = $self->_get_next_token;
5223     redo B;
5224     }
5225     }
5226    
5227     #
5228     } elsif ($token->{type} eq 'comment') {
5229     my $comment = $self->{document}->create_comment ($token->{data});
5230 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5231 wakaba 1.1 $token = $self->_get_next_token;
5232     redo B;
5233     } elsif ($token->{type} eq 'start tag') {
5234     if ($token->{tag_name} eq 'col') {
5235    
5236     {
5237     my $el;
5238    
5239     $el = $self->{document}->create_element_ns
5240     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5241    
5242     for my $attr_name (keys %{ $token->{attributes}}) {
5243     $el->set_attribute_ns (undef, [undef, $attr_name],
5244     $token->{attributes} ->{$attr_name}->{value});
5245     }
5246    
5247 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5248     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5249 wakaba 1.1 }
5250    
5251 wakaba 1.3 pop @{$self->{open_elements}};
5252 wakaba 1.1 $token = $self->_get_next_token;
5253     redo B;
5254     } else {
5255     #
5256     }
5257     } elsif ($token->{type} eq 'end tag') {
5258     if ($token->{tag_name} eq 'colgroup') {
5259 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
5260     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
5261 wakaba 1.1 ## Ignore the token
5262     $token = $self->_get_next_token;
5263     redo B;
5264     } else {
5265 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
5266     $self->{insertion_mode} = 'in table';
5267 wakaba 1.1 $token = $self->_get_next_token;
5268     redo B;
5269     }
5270     } elsif ($token->{tag_name} eq 'col') {
5271 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:col');
5272 wakaba 1.1 ## Ignore the token
5273     $token = $self->_get_next_token;
5274     redo B;
5275     } else {
5276     #
5277     }
5278     } else {
5279     #
5280     }
5281    
5282     ## As if </colgroup>
5283 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
5284     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
5285 wakaba 1.1 ## Ignore the token
5286     $token = $self->_get_next_token;
5287     redo B;
5288     } else {
5289 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
5290     $self->{insertion_mode} = 'in table';
5291 wakaba 1.1 ## reprocess
5292     redo B;
5293     }
5294 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
5295 wakaba 1.1 if ($token->{type} eq 'character') {
5296     ## NOTE: This is a "character in table" code clone.
5297     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5298 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5299 wakaba 1.1
5300     unless (length $token->{data}) {
5301     $token = $self->_get_next_token;
5302     redo B;
5303     }
5304     }
5305    
5306 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
5307    
5308 wakaba 1.1 ## As if in body, but insert into foster parent element
5309     ## ISSUE: Spec says that "whenever a node would be inserted
5310     ## into the current node" while characters might not be
5311     ## result in a new Text node.
5312     $reconstruct_active_formatting_elements->($insert_to_foster);
5313    
5314     if ({
5315     table => 1, tbody => 1, tfoot => 1,
5316     thead => 1, tr => 1,
5317 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5318 wakaba 1.1 # MUST
5319     my $foster_parent_element;
5320     my $next_sibling;
5321     my $prev_sibling;
5322 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
5323     if ($self->{open_elements}->[$_]->[1] eq 'table') {
5324     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5325 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
5326     $foster_parent_element = $parent;
5327 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
5328 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
5329     } else {
5330 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5331 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
5332     }
5333     last OE;
5334     }
5335     } # OE
5336 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5337 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
5338     unless defined $foster_parent_element;
5339     if (defined $prev_sibling and
5340     $prev_sibling->node_type == 3) {
5341     $prev_sibling->manakai_append_text ($token->{data});
5342     } else {
5343     $foster_parent_element->insert_before
5344     ($self->{document}->create_text_node ($token->{data}),
5345     $next_sibling);
5346     }
5347     } else {
5348 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5349 wakaba 1.1 }
5350    
5351     $token = $self->_get_next_token;
5352     redo B;
5353     } elsif ($token->{type} eq 'comment') {
5354     ## Copied from 'in table'
5355     my $comment = $self->{document}->create_comment ($token->{data});
5356 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5357 wakaba 1.1 $token = $self->_get_next_token;
5358     redo B;
5359     } elsif ($token->{type} eq 'start tag') {
5360     if ({
5361     tr => 1,
5362     th => 1, td => 1,
5363     }->{$token->{tag_name}}) {
5364 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
5365     $self->{parse_error}-> (type => 'missing start tag:tr');
5366     }
5367    
5368 wakaba 1.1 ## Clear back to table body context
5369     while (not {
5370     tbody => 1, tfoot => 1, thead => 1, html => 1,
5371 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5372     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5373     pop @{$self->{open_elements}};
5374 wakaba 1.1 }
5375    
5376 wakaba 1.3 $self->{insertion_mode} = 'in row';
5377 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
5378    
5379     {
5380     my $el;
5381    
5382     $el = $self->{document}->create_element_ns
5383     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5384    
5385     for my $attr_name (keys %{ $token->{attributes}}) {
5386     $el->set_attribute_ns (undef, [undef, $attr_name],
5387     $token->{attributes} ->{$attr_name}->{value});
5388     }
5389    
5390 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5391     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5392 wakaba 1.1 }
5393    
5394     $token = $self->_get_next_token;
5395     } else {
5396    
5397     {
5398     my $el;
5399    
5400     $el = $self->{document}->create_element_ns
5401     (q<http://www.w3.org/1999/xhtml>, [undef, 'tr']);
5402    
5403 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5404     push @{$self->{open_elements}}, [$el, 'tr'];
5405 wakaba 1.1 }
5406    
5407     ## reprocess
5408     }
5409     redo B;
5410     } elsif ({
5411     caption => 1, col => 1, colgroup => 1,
5412     tbody => 1, tfoot => 1, thead => 1,
5413     }->{$token->{tag_name}}) {
5414     ## have an element in table scope
5415     my $i;
5416 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5417     my $node = $self->{open_elements}->[$_];
5418 wakaba 1.1 if ({
5419     tbody => 1, thead => 1, tfoot => 1,
5420     }->{$node->[1]}) {
5421     $i = $_;
5422     last INSCOPE;
5423     } elsif ({
5424     table => 1, html => 1,
5425     }->{$node->[1]}) {
5426     last INSCOPE;
5427     }
5428     } # INSCOPE
5429     unless (defined $i) {
5430 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5431 wakaba 1.1 ## Ignore the token
5432     $token = $self->_get_next_token;
5433     redo B;
5434     }
5435    
5436     ## Clear back to table body context
5437     while (not {
5438     tbody => 1, tfoot => 1, thead => 1, html => 1,
5439 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5440     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5441     pop @{$self->{open_elements}};
5442 wakaba 1.1 }
5443    
5444     ## As if <{current node}>
5445     ## have an element in table scope
5446     ## true by definition
5447    
5448     ## Clear back to table body context
5449     ## nop by definition
5450    
5451 wakaba 1.3 pop @{$self->{open_elements}};
5452     $self->{insertion_mode} = 'in table';
5453 wakaba 1.1 ## reprocess
5454     redo B;
5455     } elsif ($token->{tag_name} eq 'table') {
5456     ## NOTE: This is a code clone of "table in table"
5457 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:table');
5458 wakaba 1.1
5459     ## As if </table>
5460     ## have a table element in table scope
5461     my $i;
5462 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5463     my $node = $self->{open_elements}->[$_];
5464 wakaba 1.1 if ($node->[1] eq 'table') {
5465     $i = $_;
5466     last INSCOPE;
5467     } elsif ({
5468     table => 1, html => 1,
5469     }->{$node->[1]}) {
5470     last INSCOPE;
5471     }
5472     } # INSCOPE
5473     unless (defined $i) {
5474 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
5475 wakaba 1.1 ## Ignore tokens </table><table>
5476     $token = $self->_get_next_token;
5477     redo B;
5478     }
5479    
5480     ## generate implied end tags
5481     if ({
5482     dd => 1, dt => 1, li => 1, p => 1,
5483     td => 1, th => 1, tr => 1,
5484 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
5485 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5486 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
5487     $token = {type => 'end tag', tag_name => 'table'};
5488     unshift @{$self->{token}}, $token;
5489     $token = {type => 'end tag',
5490 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5491 wakaba 1.1 redo B;
5492     }
5493    
5494 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5495     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5496 wakaba 1.1 }
5497    
5498 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5499 wakaba 1.1
5500 wakaba 1.3 $self->_reset_insertion_mode;
5501 wakaba 1.1
5502     ## reprocess
5503     redo B;
5504     } else {
5505     #
5506     }
5507     } elsif ($token->{type} eq 'end tag') {
5508     if ({
5509     tbody => 1, tfoot => 1, thead => 1,
5510     }->{$token->{tag_name}}) {
5511     ## have an element in table scope
5512     my $i;
5513 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5514     my $node = $self->{open_elements}->[$_];
5515 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5516     $i = $_;
5517     last INSCOPE;
5518     } elsif ({
5519     table => 1, html => 1,
5520     }->{$node->[1]}) {
5521     last INSCOPE;
5522     }
5523     } # INSCOPE
5524     unless (defined $i) {
5525 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5526 wakaba 1.1 ## Ignore the token
5527     $token = $self->_get_next_token;
5528     redo B;
5529     }
5530    
5531     ## Clear back to table body context
5532     while (not {
5533     tbody => 1, tfoot => 1, thead => 1, html => 1,
5534 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5535     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5536     pop @{$self->{open_elements}};
5537 wakaba 1.1 }
5538    
5539 wakaba 1.3 pop @{$self->{open_elements}};
5540     $self->{insertion_mode} = 'in table';
5541 wakaba 1.1 $token = $self->_get_next_token;
5542     redo B;
5543     } elsif ($token->{tag_name} eq 'table') {
5544     ## have an element in table scope
5545     my $i;
5546 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5547     my $node = $self->{open_elements}->[$_];
5548 wakaba 1.1 if ({
5549     tbody => 1, thead => 1, tfoot => 1,
5550     }->{$node->[1]}) {
5551     $i = $_;
5552     last INSCOPE;
5553     } elsif ({
5554     table => 1, html => 1,
5555     }->{$node->[1]}) {
5556     last INSCOPE;
5557     }
5558     } # INSCOPE
5559     unless (defined $i) {
5560 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5561 wakaba 1.1 ## Ignore the token
5562     $token = $self->_get_next_token;
5563     redo B;
5564     }
5565    
5566     ## Clear back to table body context
5567     while (not {
5568     tbody => 1, tfoot => 1, thead => 1, html => 1,
5569 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5570     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5571     pop @{$self->{open_elements}};
5572 wakaba 1.1 }
5573    
5574     ## As if <{current node}>
5575     ## have an element in table scope
5576     ## true by definition
5577    
5578     ## Clear back to table body context
5579     ## nop by definition
5580    
5581 wakaba 1.3 pop @{$self->{open_elements}};
5582     $self->{insertion_mode} = 'in table';
5583 wakaba 1.1 ## reprocess
5584     redo B;
5585     } elsif ({
5586     body => 1, caption => 1, col => 1, colgroup => 1,
5587     html => 1, td => 1, th => 1, tr => 1,
5588     }->{$token->{tag_name}}) {
5589 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5590 wakaba 1.1 ## Ignore the token
5591     $token = $self->_get_next_token;
5592     redo B;
5593     } else {
5594     #
5595     }
5596     } else {
5597     #
5598     }
5599    
5600     ## As if in table
5601 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
5602 wakaba 1.1 $in_body->($insert_to_foster);
5603     redo B;
5604 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
5605 wakaba 1.1 if ($token->{type} eq 'character') {
5606     ## NOTE: This is a "character in table" code clone.
5607     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5608 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5609 wakaba 1.1
5610     unless (length $token->{data}) {
5611     $token = $self->_get_next_token;
5612     redo B;
5613     }
5614     }
5615    
5616 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
5617    
5618 wakaba 1.1 ## As if in body, but insert into foster parent element
5619     ## ISSUE: Spec says that "whenever a node would be inserted
5620     ## into the current node" while characters might not be
5621     ## result in a new Text node.
5622     $reconstruct_active_formatting_elements->($insert_to_foster);
5623    
5624     if ({
5625     table => 1, tbody => 1, tfoot => 1,
5626     thead => 1, tr => 1,
5627 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5628 wakaba 1.1 # MUST
5629     my $foster_parent_element;
5630     my $next_sibling;
5631     my $prev_sibling;
5632 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
5633     if ($self->{open_elements}->[$_]->[1] eq 'table') {
5634     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5635 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
5636     $foster_parent_element = $parent;
5637 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
5638 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
5639     } else {
5640 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5641 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
5642     }
5643     last OE;
5644     }
5645     } # OE
5646 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5647 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
5648     unless defined $foster_parent_element;
5649     if (defined $prev_sibling and
5650     $prev_sibling->node_type == 3) {
5651     $prev_sibling->manakai_append_text ($token->{data});
5652     } else {
5653     $foster_parent_element->insert_before
5654     ($self->{document}->create_text_node ($token->{data}),
5655     $next_sibling);
5656     }
5657     } else {
5658 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5659 wakaba 1.1 }
5660    
5661     $token = $self->_get_next_token;
5662     redo B;
5663     } elsif ($token->{type} eq 'comment') {
5664     ## Copied from 'in table'
5665     my $comment = $self->{document}->create_comment ($token->{data});
5666 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5667 wakaba 1.1 $token = $self->_get_next_token;
5668     redo B;
5669     } elsif ($token->{type} eq 'start tag') {
5670     if ($token->{tag_name} eq 'th' or
5671     $token->{tag_name} eq 'td') {
5672     ## Clear back to table row context
5673     while (not {
5674     tr => 1, html => 1,
5675 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5676     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5677     pop @{$self->{open_elements}};
5678 wakaba 1.1 }
5679    
5680    
5681     {
5682     my $el;
5683    
5684     $el = $self->{document}->create_element_ns
5685     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5686    
5687     for my $attr_name (keys %{ $token->{attributes}}) {
5688     $el->set_attribute_ns (undef, [undef, $attr_name],
5689     $token->{attributes} ->{$attr_name}->{value});
5690     }
5691    
5692 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5693     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5694 wakaba 1.1 }
5695    
5696 wakaba 1.3 $self->{insertion_mode} = 'in cell';
5697 wakaba 1.1
5698     push @$active_formatting_elements, ['#marker', ''];
5699    
5700     $token = $self->_get_next_token;
5701     redo B;
5702     } elsif ({
5703     caption => 1, col => 1, colgroup => 1,
5704     tbody => 1, tfoot => 1, thead => 1, tr => 1,
5705     }->{$token->{tag_name}}) {
5706     ## As if </tr>
5707     ## have an element in table scope
5708     my $i;
5709 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5710     my $node = $self->{open_elements}->[$_];
5711 wakaba 1.1 if ($node->[1] eq 'tr') {
5712     $i = $_;
5713     last INSCOPE;
5714     } elsif ({
5715     table => 1, html => 1,
5716     }->{$node->[1]}) {
5717     last INSCOPE;
5718     }
5719     } # INSCOPE
5720     unless (defined $i) {
5721 wakaba 1.3 $self->{parse_error}-> (type => 'unmacthed end tag:'.$token->{tag_name});
5722 wakaba 1.1 ## Ignore the token
5723     $token = $self->_get_next_token;
5724     redo B;
5725     }
5726    
5727     ## Clear back to table row context
5728     while (not {
5729     tr => 1, html => 1,
5730 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5731     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5732     pop @{$self->{open_elements}};
5733 wakaba 1.1 }
5734    
5735 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5736     $self->{insertion_mode} = 'in table body';
5737 wakaba 1.1 ## reprocess
5738     redo B;
5739     } elsif ($token->{tag_name} eq 'table') {
5740     ## NOTE: This is a code clone of "table in table"
5741 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:table');
5742 wakaba 1.1
5743     ## As if </table>
5744     ## have a table element in table scope
5745     my $i;
5746 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5747     my $node = $self->{open_elements}->[$_];
5748 wakaba 1.1 if ($node->[1] eq 'table') {
5749     $i = $_;
5750     last INSCOPE;
5751     } elsif ({
5752     table => 1, html => 1,
5753     }->{$node->[1]}) {
5754     last INSCOPE;
5755     }
5756     } # INSCOPE
5757     unless (defined $i) {
5758 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
5759 wakaba 1.1 ## Ignore tokens </table><table>
5760     $token = $self->_get_next_token;
5761     redo B;
5762     }
5763    
5764     ## generate implied end tags
5765     if ({
5766     dd => 1, dt => 1, li => 1, p => 1,
5767     td => 1, th => 1, tr => 1,
5768 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
5769 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5770 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
5771     $token = {type => 'end tag', tag_name => 'table'};
5772     unshift @{$self->{token}}, $token;
5773     $token = {type => 'end tag',
5774 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5775 wakaba 1.1 redo B;
5776     }
5777    
5778 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5779     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5780 wakaba 1.1 }
5781    
5782 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5783 wakaba 1.1
5784 wakaba 1.3 $self->_reset_insertion_mode;
5785 wakaba 1.1
5786     ## reprocess
5787     redo B;
5788     } else {
5789     #
5790     }
5791     } elsif ($token->{type} eq 'end tag') {
5792     if ($token->{tag_name} eq 'tr') {
5793     ## have an element in table scope
5794     my $i;
5795 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5796     my $node = $self->{open_elements}->[$_];
5797 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5798     $i = $_;
5799     last INSCOPE;
5800     } elsif ({
5801     table => 1, html => 1,
5802     }->{$node->[1]}) {
5803     last INSCOPE;
5804     }
5805     } # INSCOPE
5806     unless (defined $i) {
5807 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5808 wakaba 1.1 ## Ignore the token
5809     $token = $self->_get_next_token;
5810     redo B;
5811     }
5812    
5813     ## Clear back to table row context
5814     while (not {
5815     tr => 1, html => 1,
5816 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5817     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5818     pop @{$self->{open_elements}};
5819 wakaba 1.1 }
5820    
5821 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5822     $self->{insertion_mode} = 'in table body';
5823 wakaba 1.1 $token = $self->_get_next_token;
5824     redo B;
5825     } elsif ($token->{tag_name} eq 'table') {
5826     ## As if </tr>
5827     ## have an element in table scope
5828     my $i;
5829 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5830     my $node = $self->{open_elements}->[$_];
5831 wakaba 1.1 if ($node->[1] eq 'tr') {
5832     $i = $_;
5833     last INSCOPE;
5834     } elsif ({
5835     table => 1, html => 1,
5836     }->{$node->[1]}) {
5837     last INSCOPE;
5838     }
5839     } # INSCOPE
5840     unless (defined $i) {
5841 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{type});
5842 wakaba 1.1 ## Ignore the token
5843     $token = $self->_get_next_token;
5844     redo B;
5845     }
5846    
5847     ## Clear back to table row context
5848     while (not {
5849     tr => 1, html => 1,
5850 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5851     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5852     pop @{$self->{open_elements}};
5853 wakaba 1.1 }
5854    
5855 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5856     $self->{insertion_mode} = 'in table body';
5857 wakaba 1.1 ## reprocess
5858     redo B;
5859     } elsif ({
5860     tbody => 1, tfoot => 1, thead => 1,
5861     }->{$token->{tag_name}}) {
5862     ## have an element in table scope
5863     my $i;
5864 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5865     my $node = $self->{open_elements}->[$_];
5866 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5867     $i = $_;
5868     last INSCOPE;
5869     } elsif ({
5870     table => 1, html => 1,
5871     }->{$node->[1]}) {
5872     last INSCOPE;
5873     }
5874     } # INSCOPE
5875     unless (defined $i) {
5876 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5877 wakaba 1.1 ## Ignore the token
5878     $token = $self->_get_next_token;
5879     redo B;
5880     }
5881    
5882     ## As if </tr>
5883     ## have an element in table scope
5884     my $i;
5885 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5886     my $node = $self->{open_elements}->[$_];
5887 wakaba 1.1 if ($node->[1] eq 'tr') {
5888     $i = $_;
5889     last INSCOPE;
5890     } elsif ({
5891     table => 1, html => 1,
5892     }->{$node->[1]}) {
5893     last INSCOPE;
5894     }
5895     } # INSCOPE
5896     unless (defined $i) {
5897 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:tr');
5898 wakaba 1.1 ## Ignore the token
5899     $token = $self->_get_next_token;
5900     redo B;
5901     }
5902    
5903     ## Clear back to table row context
5904     while (not {
5905     tr => 1, html => 1,
5906 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5907     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5908     pop @{$self->{open_elements}};
5909 wakaba 1.1 }
5910    
5911 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5912     $self->{insertion_mode} = 'in table body';
5913 wakaba 1.1 ## reprocess
5914     redo B;
5915     } elsif ({
5916     body => 1, caption => 1, col => 1,
5917     colgroup => 1, html => 1, td => 1, th => 1,
5918     }->{$token->{tag_name}}) {
5919 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5920 wakaba 1.1 ## Ignore the token
5921     $token = $self->_get_next_token;
5922     redo B;
5923     } else {
5924     #
5925     }
5926     } else {
5927     #
5928     }
5929    
5930     ## As if in table
5931 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
5932 wakaba 1.1 $in_body->($insert_to_foster);
5933     redo B;
5934 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
5935 wakaba 1.1 if ($token->{type} eq 'character') {
5936     ## NOTE: This is a code clone of "character in body".
5937     $reconstruct_active_formatting_elements->($insert_to_current);
5938    
5939 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5940 wakaba 1.1
5941     $token = $self->_get_next_token;
5942     redo B;
5943     } elsif ($token->{type} eq 'comment') {
5944     ## NOTE: This is a code clone of "comment in body".
5945     my $comment = $self->{document}->create_comment ($token->{data});
5946 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5947 wakaba 1.1 $token = $self->_get_next_token;
5948     redo B;
5949     } elsif ($token->{type} eq 'start tag') {
5950     if ({
5951     caption => 1, col => 1, colgroup => 1,
5952     tbody => 1, td => 1, tfoot => 1, th => 1,
5953     thead => 1, tr => 1,
5954     }->{$token->{tag_name}}) {
5955     ## have an element in table scope
5956     my $tn;
5957 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5958     my $node = $self->{open_elements}->[$_];
5959 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
5960     $tn = $node->[1];
5961     last INSCOPE;
5962     } elsif ({
5963     table => 1, html => 1,
5964     }->{$node->[1]}) {
5965     last INSCOPE;
5966     }
5967     } # INSCOPE
5968     unless (defined $tn) {
5969 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5970 wakaba 1.1 ## Ignore the token
5971     $token = $self->_get_next_token;
5972     redo B;
5973     }
5974    
5975     ## Close the cell
5976     unshift @{$self->{token}}, $token; # <?>
5977     $token = {type => 'end tag', tag_name => $tn};
5978     redo B;
5979     } else {
5980     #
5981     }
5982     } elsif ($token->{type} eq 'end tag') {
5983     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
5984     ## have an element in table scope
5985     my $i;
5986 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5987     my $node = $self->{open_elements}->[$_];
5988 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5989     $i = $_;
5990     last INSCOPE;
5991     } elsif ({
5992     table => 1, html => 1,
5993     }->{$node->[1]}) {
5994     last INSCOPE;
5995     }
5996     } # INSCOPE
5997     unless (defined $i) {
5998 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5999 wakaba 1.1 ## Ignore the token
6000     $token = $self->_get_next_token;
6001     redo B;
6002     }
6003    
6004     ## generate implied end tags
6005     if ({
6006     dd => 1, dt => 1, li => 1, p => 1,
6007     td => ($token->{tag_name} eq 'th'),
6008     th => ($token->{tag_name} eq 'td'),
6009     tr => 1,
6010 wakaba 1.31 tbody => 1, tfoot=> 1, thead => 1,
6011 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
6012 wakaba 1.1 unshift @{$self->{token}}, $token;
6013     $token = {type => 'end tag',
6014 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
6015 wakaba 1.1 redo B;
6016     }
6017    
6018 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6019     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
6020 wakaba 1.1 }
6021    
6022 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6023 wakaba 1.1
6024     $clear_up_to_marker->();
6025    
6026 wakaba 1.3 $self->{insertion_mode} = 'in row';
6027 wakaba 1.1
6028     $token = $self->_get_next_token;
6029     redo B;
6030     } elsif ({
6031     body => 1, caption => 1, col => 1,
6032     colgroup => 1, html => 1,
6033     }->{$token->{tag_name}}) {
6034 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6035 wakaba 1.1 ## Ignore the token
6036     $token = $self->_get_next_token;
6037     redo B;
6038     } elsif ({
6039     table => 1, tbody => 1, tfoot => 1,
6040     thead => 1, tr => 1,
6041     }->{$token->{tag_name}}) {
6042     ## have an element in table scope
6043     my $i;
6044     my $tn;
6045 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6046     my $node = $self->{open_elements}->[$_];
6047 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6048     $i = $_;
6049     last INSCOPE;
6050     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
6051     $tn = $node->[1];
6052     ## NOTE: There is exactly one |td| or |th| element
6053     ## in scope in the stack of open elements by definition.
6054     } elsif ({
6055     table => 1, html => 1,
6056     }->{$node->[1]}) {
6057     last INSCOPE;
6058     }
6059     } # INSCOPE
6060     unless (defined $i) {
6061 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6062 wakaba 1.1 ## Ignore the token
6063     $token = $self->_get_next_token;
6064     redo B;
6065     }
6066    
6067     ## Close the cell
6068     unshift @{$self->{token}}, $token; # </?>
6069     $token = {type => 'end tag', tag_name => $tn};
6070     redo B;
6071     } else {
6072     #
6073     }
6074     } else {
6075     #
6076     }
6077    
6078     $in_body->($insert_to_current);
6079     redo B;
6080 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
6081 wakaba 1.1 if ($token->{type} eq 'character') {
6082 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6083 wakaba 1.1 $token = $self->_get_next_token;
6084     redo B;
6085     } elsif ($token->{type} eq 'comment') {
6086     my $comment = $self->{document}->create_comment ($token->{data});
6087 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
6088 wakaba 1.1 $token = $self->_get_next_token;
6089     redo B;
6090     } elsif ($token->{type} eq 'start tag') {
6091     if ($token->{tag_name} eq 'option') {
6092 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6093 wakaba 1.1 ## As if </option>
6094 wakaba 1.3 pop @{$self->{open_elements}};
6095 wakaba 1.1 }
6096    
6097    
6098     {
6099     my $el;
6100    
6101     $el = $self->{document}->create_element_ns
6102     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6103    
6104     for my $attr_name (keys %{ $token->{attributes}}) {
6105     $el->set_attribute_ns (undef, [undef, $attr_name],
6106     $token->{attributes} ->{$attr_name}->{value});
6107     }
6108    
6109 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6110     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6111 wakaba 1.1 }
6112    
6113     $token = $self->_get_next_token;
6114     redo B;
6115     } elsif ($token->{tag_name} eq 'optgroup') {
6116 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6117 wakaba 1.1 ## As if </option>
6118 wakaba 1.3 pop @{$self->{open_elements}};
6119 wakaba 1.1 }
6120    
6121 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
6122 wakaba 1.1 ## As if </optgroup>
6123 wakaba 1.3 pop @{$self->{open_elements}};
6124 wakaba 1.1 }
6125    
6126    
6127     {
6128     my $el;
6129    
6130     $el = $self->{document}->create_element_ns
6131     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6132    
6133     for my $attr_name (keys %{ $token->{attributes}}) {
6134     $el->set_attribute_ns (undef, [undef, $attr_name],
6135     $token->{attributes} ->{$attr_name}->{value});
6136     }
6137    
6138 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6139     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6140 wakaba 1.1 }
6141    
6142     $token = $self->_get_next_token;
6143     redo B;
6144     } elsif ($token->{tag_name} eq 'select') {
6145 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:select');
6146 wakaba 1.1 ## As if </select> instead
6147     ## have an element in table scope
6148     my $i;
6149 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6150     my $node = $self->{open_elements}->[$_];
6151 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6152     $i = $_;
6153     last INSCOPE;
6154     } elsif ({
6155     table => 1, html => 1,
6156     }->{$node->[1]}) {
6157     last INSCOPE;
6158     }
6159     } # INSCOPE
6160     unless (defined $i) {
6161 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:select');
6162 wakaba 1.1 ## Ignore the token
6163     $token = $self->_get_next_token;
6164     redo B;
6165     }
6166    
6167 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6168 wakaba 1.1
6169 wakaba 1.3 $self->_reset_insertion_mode;
6170 wakaba 1.1
6171     $token = $self->_get_next_token;
6172     redo B;
6173     } else {
6174     #
6175     }
6176     } elsif ($token->{type} eq 'end tag') {
6177     if ($token->{tag_name} eq 'optgroup') {
6178 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
6179     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
6180 wakaba 1.1 ## As if </option>
6181 wakaba 1.3 splice @{$self->{open_elements}}, -2;
6182     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
6183     pop @{$self->{open_elements}};
6184 wakaba 1.1 } else {
6185 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6186 wakaba 1.1 ## Ignore the token
6187     }
6188     $token = $self->_get_next_token;
6189     redo B;
6190     } elsif ($token->{tag_name} eq 'option') {
6191 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6192     pop @{$self->{open_elements}};
6193 wakaba 1.1 } else {
6194 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6195 wakaba 1.1 ## Ignore the token
6196     }
6197     $token = $self->_get_next_token;
6198     redo B;
6199     } elsif ($token->{tag_name} eq 'select') {
6200     ## have an element in table scope
6201     my $i;
6202 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6203     my $node = $self->{open_elements}->[$_];
6204 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6205     $i = $_;
6206     last INSCOPE;
6207     } elsif ({
6208     table => 1, html => 1,
6209     }->{$node->[1]}) {
6210     last INSCOPE;
6211     }
6212     } # INSCOPE
6213     unless (defined $i) {
6214 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6215 wakaba 1.1 ## Ignore the token
6216     $token = $self->_get_next_token;
6217     redo B;
6218     }
6219    
6220 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6221 wakaba 1.1
6222 wakaba 1.3 $self->_reset_insertion_mode;
6223 wakaba 1.1
6224     $token = $self->_get_next_token;
6225     redo B;
6226     } elsif ({
6227     caption => 1, table => 1, tbody => 1,
6228     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6229     }->{$token->{tag_name}}) {
6230 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6231 wakaba 1.1
6232     ## have an element in table scope
6233     my $i;
6234 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6235     my $node = $self->{open_elements}->[$_];
6236 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6237     $i = $_;
6238     last INSCOPE;
6239     } elsif ({
6240     table => 1, html => 1,
6241     }->{$node->[1]}) {
6242     last INSCOPE;
6243     }
6244     } # INSCOPE
6245     unless (defined $i) {
6246     ## Ignore the token
6247     $token = $self->_get_next_token;
6248     redo B;
6249     }
6250    
6251     ## As if </select>
6252     ## have an element in table scope
6253     undef $i;
6254 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6255     my $node = $self->{open_elements}->[$_];
6256 wakaba 1.1 if ($node->[1] eq 'select') {
6257     $i = $_;
6258     last INSCOPE;
6259     } elsif ({
6260     table => 1, html => 1,
6261     }->{$node->[1]}) {
6262     last INSCOPE;
6263     }
6264     } # INSCOPE
6265     unless (defined $i) {
6266 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:select');
6267 wakaba 1.1 ## Ignore the </select> token
6268     $token = $self->_get_next_token; ## TODO: ok?
6269     redo B;
6270     }
6271    
6272 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6273 wakaba 1.1
6274 wakaba 1.3 $self->_reset_insertion_mode;
6275 wakaba 1.1
6276     ## reprocess
6277     redo B;
6278     } else {
6279     #
6280     }
6281     } else {
6282     #
6283     }
6284    
6285 wakaba 1.3 $self->{parse_error}-> (type => 'in select:'.$token->{tag_name});
6286 wakaba 1.1 ## Ignore the token
6287     $token = $self->_get_next_token;
6288     redo B;
6289 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
6290 wakaba 1.1 if ($token->{type} eq 'character') {
6291     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6292     ## As if in body
6293     $reconstruct_active_formatting_elements->($insert_to_current);
6294    
6295 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6296 wakaba 1.1
6297     unless (length $token->{data}) {
6298     $token = $self->_get_next_token;
6299     redo B;
6300     }
6301     }
6302    
6303     #
6304 wakaba 1.3 $self->{parse_error}-> (type => 'after body:#'.$token->{type});
6305 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
6306     my $comment = $self->{document}->create_comment ($token->{data});
6307 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
6308 wakaba 1.1 $token = $self->_get_next_token;
6309     redo B;
6310 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
6311     $self->{parse_error}-> (type => 'after body:'.$token->{tag_name});
6312     #
6313 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
6314     if ($token->{tag_name} eq 'html') {
6315 wakaba 1.3 if (defined $self->{inner_html_node}) {
6316     $self->{parse_error}-> (type => 'unmatched end tag:html');
6317     ## Ignore the token
6318     $token = $self->_get_next_token;
6319     redo B;
6320     } else {
6321     $phase = 'trailing end';
6322     $token = $self->_get_next_token;
6323     redo B;
6324     }
6325 wakaba 1.1 } else {
6326 wakaba 1.3 $self->{parse_error}-> (type => 'after body:/'.$token->{tag_name});
6327 wakaba 1.1 }
6328     } else {
6329 wakaba 1.3 $self->{parse_error}-> (type => 'after body:#'.$token->{type});
6330 wakaba 1.1 }
6331    
6332 wakaba 1.3 $self->{insertion_mode} = 'in body';
6333 wakaba 1.1 ## reprocess
6334     redo B;
6335 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
6336 wakaba 1.1 if ($token->{type} eq 'character') {
6337     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6338 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6339 wakaba 1.1
6340     unless (length $token->{data}) {
6341     $token = $self->_get_next_token;
6342     redo B;
6343     }
6344     }
6345    
6346     #
6347     } elsif ($token->{type} eq 'comment') {
6348     my $comment = $self->{document}->create_comment ($token->{data});
6349 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
6350 wakaba 1.1 $token = $self->_get_next_token;
6351     redo B;
6352     } elsif ($token->{type} eq 'start tag') {
6353     if ($token->{tag_name} eq 'frameset') {
6354    
6355     {
6356     my $el;
6357    
6358     $el = $self->{document}->create_element_ns
6359     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6360    
6361     for my $attr_name (keys %{ $token->{attributes}}) {
6362     $el->set_attribute_ns (undef, [undef, $attr_name],
6363     $token->{attributes} ->{$attr_name}->{value});
6364     }
6365    
6366 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6367     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6368 wakaba 1.1 }
6369    
6370     $token = $self->_get_next_token;
6371     redo B;
6372     } elsif ($token->{tag_name} eq 'frame') {
6373    
6374     {
6375     my $el;
6376    
6377     $el = $self->{document}->create_element_ns
6378     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6379    
6380     for my $attr_name (keys %{ $token->{attributes}}) {
6381     $el->set_attribute_ns (undef, [undef, $attr_name],
6382     $token->{attributes} ->{$attr_name}->{value});
6383     }
6384    
6385 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6386     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6387 wakaba 1.1 }
6388    
6389 wakaba 1.3 pop @{$self->{open_elements}};
6390 wakaba 1.1 $token = $self->_get_next_token;
6391     redo B;
6392     } elsif ($token->{tag_name} eq 'noframes') {
6393     $in_body->($insert_to_current);
6394     redo B;
6395     } else {
6396     #
6397     }
6398     } elsif ($token->{type} eq 'end tag') {
6399     if ($token->{tag_name} eq 'frameset') {
6400 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
6401     @{$self->{open_elements}} == 1) {
6402     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6403 wakaba 1.1 ## Ignore the token
6404     $token = $self->_get_next_token;
6405     } else {
6406 wakaba 1.3 pop @{$self->{open_elements}};
6407 wakaba 1.1 $token = $self->_get_next_token;
6408     }
6409    
6410     ## if not inner_html and
6411 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
6412     $self->{insertion_mode} = 'after frameset';
6413 wakaba 1.1 }
6414     redo B;
6415     } else {
6416     #
6417     }
6418     } else {
6419     #
6420     }
6421    
6422 wakaba 1.3 if (defined $token->{tag_name}) {
6423 wakaba 1.30 $self->{parse_error}-> (type => 'in frameset:'.($token->{type} eq 'end tag' ? '/' : '').$token->{tag_name});
6424 wakaba 1.3 } else {
6425     $self->{parse_error}-> (type => 'in frameset:#'.$token->{type});
6426     }
6427 wakaba 1.1 ## Ignore the token
6428     $token = $self->_get_next_token;
6429     redo B;
6430 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
6431 wakaba 1.1 if ($token->{type} eq 'character') {
6432     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6433 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6434 wakaba 1.1
6435     unless (length $token->{data}) {
6436     $token = $self->_get_next_token;
6437     redo B;
6438     }
6439     }
6440    
6441     #
6442     } elsif ($token->{type} eq 'comment') {
6443     my $comment = $self->{document}->create_comment ($token->{data});
6444 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
6445 wakaba 1.1 $token = $self->_get_next_token;
6446     redo B;
6447     } elsif ($token->{type} eq 'start tag') {
6448     if ($token->{tag_name} eq 'noframes') {
6449     $in_body->($insert_to_current);
6450     redo B;
6451     } else {
6452     #
6453     }
6454     } elsif ($token->{type} eq 'end tag') {
6455     if ($token->{tag_name} eq 'html') {
6456     $phase = 'trailing end';
6457     $token = $self->_get_next_token;
6458     redo B;
6459     } else {
6460     #
6461     }
6462     } else {
6463     #
6464     }
6465    
6466 wakaba 1.3 if (defined $token->{tag_name}) {
6467 wakaba 1.30 $self->{parse_error}-> (type => 'after frameset:'.($token->{tag_name} eq 'end tag' ? '/' : '').$token->{tag_name});
6468 wakaba 1.3 } else {
6469     $self->{parse_error}-> (type => 'after frameset:#'.$token->{type});
6470     }
6471 wakaba 1.1 ## Ignore the token
6472     $token = $self->_get_next_token;
6473     redo B;
6474    
6475     ## ISSUE: An issue in spec there
6476     } else {
6477 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
6478 wakaba 1.1 }
6479     }
6480     } elsif ($phase eq 'trailing end') {
6481     ## states in the main stage is preserved yet # MUST
6482    
6483     if ($token->{type} eq 'DOCTYPE') {
6484 wakaba 1.3 $self->{parse_error}-> (type => 'after html:#DOCTYPE');
6485 wakaba 1.1 ## Ignore the token
6486     $token = $self->_get_next_token;
6487     redo B;
6488     } elsif ($token->{type} eq 'comment') {
6489     my $comment = $self->{document}->create_comment ($token->{data});
6490     $self->{document}->append_child ($comment);
6491     $token = $self->_get_next_token;
6492     redo B;
6493     } elsif ($token->{type} eq 'character') {
6494     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6495     my $data = $1;
6496     ## As if in the main phase.
6497     ## NOTE: The insertion mode in the main phase
6498     ## just before the phase has been changed to the trailing
6499     ## end phase is either "after body" or "after frameset".
6500     $reconstruct_active_formatting_elements->($insert_to_current)
6501     if $phase eq 'main';
6502    
6503 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
6504 wakaba 1.1
6505     unless (length $token->{data}) {
6506     $token = $self->_get_next_token;
6507     redo B;
6508     }
6509     }
6510    
6511 wakaba 1.3 $self->{parse_error}-> (type => 'after html:#character');
6512 wakaba 1.1 $phase = 'main';
6513     ## reprocess
6514     redo B;
6515     } elsif ($token->{type} eq 'start tag' or
6516     $token->{type} eq 'end tag') {
6517 wakaba 1.30 $self->{parse_error}-> (type => 'after html:'.($token->{type} eq 'end tag' ? '/' : '').$token->{tag_name});
6518 wakaba 1.1 $phase = 'main';
6519     ## reprocess
6520     redo B;
6521     } elsif ($token->{type} eq 'end-of-file') {
6522     ## Stop parsing
6523     last B;
6524     } else {
6525     die "$0: $token->{type}: Unknown token";
6526     }
6527     }
6528     } # B
6529    
6530     ## Stop parsing # MUST
6531    
6532     ## TODO: script stuffs
6533 wakaba 1.3 } # _tree_construct_main
6534    
6535     sub set_inner_html ($$$) {
6536     my $class = shift;
6537     my $node = shift;
6538     my $s = \$_[0];
6539     my $onerror = $_[1];
6540    
6541     my $nt = $node->node_type;
6542     if ($nt == 9) {
6543     # MUST
6544    
6545     ## Step 1 # MUST
6546     ## TODO: If the document has an active parser, ...
6547     ## ISSUE: There is an issue in the spec.
6548    
6549     ## Step 2 # MUST
6550     my @cn = @{$node->child_nodes};
6551     for (@cn) {
6552     $node->remove_child ($_);
6553     }
6554    
6555     ## Step 3, 4, 5 # MUST
6556     $class->parse_string ($$s => $node, $onerror);
6557     } elsif ($nt == 1) {
6558     ## TODO: If non-html element
6559    
6560     ## NOTE: Most of this code is copied from |parse_string|
6561    
6562     ## Step 1 # MUST
6563 wakaba 1.14 my $this_doc = $node->owner_document;
6564     my $doc = $this_doc->implementation->create_document;
6565 wakaba 1.18 $doc->manakai_is_html (1);
6566 wakaba 1.3 my $p = $class->new;
6567     $p->{document} = $doc;
6568    
6569     ## Step 9 # MUST
6570     my $i = 0;
6571     my $line = 1;
6572     my $column = 0;
6573     $p->{set_next_input_character} = sub {
6574     my $self = shift;
6575 wakaba 1.14
6576     pop @{$self->{prev_input_character}};
6577     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
6578    
6579 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
6580     $self->{next_input_character} = ord substr $$s, $i++, 1;
6581     $column++;
6582 wakaba 1.4
6583     if ($self->{next_input_character} == 0x000A) { # LF
6584     $line++;
6585     $column = 0;
6586     } elsif ($self->{next_input_character} == 0x000D) { # CR
6587 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6588 wakaba 1.3 $self->{next_input_character} = 0x000A; # LF # MUST
6589     $line++;
6590 wakaba 1.4 $column = 0;
6591 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
6592     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6593     } elsif ($self->{next_input_character} == 0x0000) { # NULL
6594 wakaba 1.14 $self->{parse_error}-> (type => 'NULL');
6595 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6596     }
6597     };
6598 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
6599     $p->{next_input_character} = -1;
6600 wakaba 1.3
6601     my $ponerror = $onerror || sub {
6602     my (%opt) = @_;
6603     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
6604     };
6605     $p->{parse_error} = sub {
6606     $ponerror->(@_, line => $line, column => $column);
6607     };
6608    
6609     $p->_initialize_tokenizer;
6610     $p->_initialize_tree_constructor;
6611    
6612     ## Step 2
6613     my $node_ln = $node->local_name;
6614     $p->{content_model_flag} = {
6615     title => 'RCDATA',
6616     textarea => 'RCDATA',
6617     style => 'CDATA',
6618     script => 'CDATA',
6619     xmp => 'CDATA',
6620     iframe => 'CDATA',
6621     noembed => 'CDATA',
6622     noframes => 'CDATA',
6623     noscript => 'CDATA',
6624     plaintext => 'PLAINTEXT',
6625     }->{$node_ln} || 'PCDATA';
6626     ## ISSUE: What is "the name of the element"? local name?
6627    
6628     $p->{inner_html_node} = [$node, $node_ln];
6629    
6630     ## Step 4
6631     my $root = $doc->create_element_ns
6632     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6633    
6634     ## Step 5 # MUST
6635     $doc->append_child ($root);
6636    
6637     ## Step 6 # MUST
6638     push @{$p->{open_elements}}, [$root, 'html'];
6639    
6640     undef $p->{head_element};
6641    
6642     ## Step 7 # MUST
6643     $p->_reset_insertion_mode;
6644    
6645     ## Step 8 # MUST
6646     my $anode = $node;
6647     AN: while (defined $anode) {
6648     if ($anode->node_type == 1) {
6649     my $nsuri = $anode->namespace_uri;
6650     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6651     if ($anode->local_name eq 'form') { ## TODO: case?
6652     $p->{form_element} = $anode;
6653     last AN;
6654     }
6655     }
6656     }
6657     $anode = $anode->parent_node;
6658     } # AN
6659    
6660     ## Step 3 # MUST
6661     ## Step 10 # MUST
6662     {
6663     my $self = $p;
6664     $token = $self->_get_next_token;
6665     }
6666     $p->_tree_construction_main;
6667    
6668     ## Step 11 # MUST
6669     my @cn = @{$node->child_nodes};
6670     for (@cn) {
6671     $node->remove_child ($_);
6672     }
6673     ## ISSUE: mutation events? read-only?
6674    
6675     ## Step 12 # MUST
6676     @cn = @{$root->child_nodes};
6677     for (@cn) {
6678 wakaba 1.14 $this_doc->adopt_node ($_);
6679 wakaba 1.3 $node->append_child ($_);
6680     }
6681 wakaba 1.14 ## ISSUE: mutation events?
6682 wakaba 1.3
6683     $p->_terminate_tree_constructor;
6684     } else {
6685     die "$0: |set_inner_html| is not defined for node of type $nt";
6686     }
6687     } # set_inner_html
6688    
6689     } # tree construction stage
6690 wakaba 1.1
6691     sub get_inner_html ($$$) {
6692 wakaba 1.3 my (undef, $node, $on_error) = @_;
6693 wakaba 1.1
6694     ## Step 1
6695     my $s = '';
6696    
6697     my $in_cdata;
6698     my $parent = $node;
6699     while (defined $parent) {
6700     if ($parent->node_type == 1 and
6701     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
6702     {
6703     style => 1, script => 1, xmp => 1, iframe => 1,
6704     noembed => 1, noframes => 1, noscript => 1,
6705     }->{$parent->local_name}) { ## TODO: case thingy
6706     $in_cdata = 1;
6707     }
6708     $parent = $parent->parent_node;
6709     }
6710    
6711     ## Step 2
6712     my @node = @{$node->child_nodes};
6713     C: while (@node) {
6714     my $child = shift @node;
6715     unless (ref $child) {
6716     if ($child eq 'cdata-out') {
6717     $in_cdata = 0;
6718     } else {
6719     $s .= $child; # end tag
6720     }
6721     next C;
6722     }
6723    
6724     my $nt = $child->node_type;
6725     if ($nt == 1) { # Element
6726 wakaba 1.27 my $tag_name = $child->tag_name; ## TODO: manakai_tag_name
6727 wakaba 1.1 $s .= '<' . $tag_name;
6728 wakaba 1.27 ## NOTE: Non-HTML case:
6729     ## <http://permalink.gmane.org/gmane.org.w3c.whatwg.discuss/11191>
6730 wakaba 1.1
6731     my @attrs = @{$child->attributes}; # sort order MUST be stable
6732     for my $attr (@attrs) { # order is implementation dependent
6733 wakaba 1.27 my $attr_name = $attr->name; ## TODO: manakai_name
6734 wakaba 1.1 $s .= ' ' . $attr_name . '="';
6735     my $attr_value = $attr->value;
6736     ## escape
6737     $attr_value =~ s/&/&amp;/g;
6738     $attr_value =~ s/</&lt;/g;
6739     $attr_value =~ s/>/&gt;/g;
6740     $attr_value =~ s/"/&quot;/g;
6741     $s .= $attr_value . '"';
6742     }
6743     $s .= '>';
6744    
6745     next C if {
6746     area => 1, base => 1, basefont => 1, bgsound => 1,
6747     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
6748     img => 1, input => 1, link => 1, meta => 1, param => 1,
6749     spacer => 1, wbr => 1,
6750     }->{$tag_name};
6751    
6752 wakaba 1.23 $s .= "\x0A" if $tag_name eq 'pre' or $tag_name eq 'textarea';
6753    
6754 wakaba 1.1 if (not $in_cdata and {
6755     style => 1, script => 1, xmp => 1, iframe => 1,
6756     noembed => 1, noframes => 1, noscript => 1,
6757 wakaba 1.26 plaintext => 1,
6758 wakaba 1.1 }->{$tag_name}) {
6759     unshift @node, 'cdata-out';
6760     $in_cdata = 1;
6761     }
6762    
6763     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
6764     } elsif ($nt == 3 or $nt == 4) {
6765     if ($in_cdata) {
6766     $s .= $child->data;
6767     } else {
6768     my $value = $child->data;
6769     $value =~ s/&/&amp;/g;
6770     $value =~ s/</&lt;/g;
6771     $value =~ s/>/&gt;/g;
6772     $value =~ s/"/&quot;/g;
6773     $s .= $value;
6774     }
6775     } elsif ($nt == 8) {
6776     $s .= '<!--' . $child->data . '-->';
6777     } elsif ($nt == 10) {
6778     $s .= '<!DOCTYPE ' . $child->name . '>';
6779     } elsif ($nt == 5) { # entrefs
6780     push @node, @{$child->child_nodes};
6781     } else {
6782     $on_error->($child) if defined $on_error;
6783     }
6784     ## ISSUE: This code does not support PIs.
6785     } # C
6786    
6787     ## Step 3
6788     return \$s;
6789     } # get_inner_html
6790    
6791     1;
6792 wakaba 1.32 # $Date: 2007/06/30 14:13:19 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24