/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.27 - (hide annotations) (download)
Sun Jun 24 14:24:21 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.26: +6 -6 lines
++ whatpm/t/ChangeLog	24 Jun 2007 14:19:51 -0000
	* content-model-1.dat: Tests for |footer|
	content model are added.

	* content-model-2.dat: Tests for |ping|
	and |tabindex| attributes are added.  Tests for |datetime|
	attribute of |ins| and |del| elements are added.

	* content-model-4.dat: New test data.

	* ContentChecker.t: |content-model-4.dat| is added.

2007-06-24  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	24 Jun 2007 14:20:06 -0000
	* URIChecker.pm (check_iri_reference): A |decode| method name was
	incorrect.

	* ContentChecker.pm: Support for the |footer| element.
	Check URI syntax for space-separated URI attributes.
	Support for the |tabindex| attribute.  Support
	for |datetime| attribute.

2007-06-24  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.27 our $VERSION=do{my @r=(q$Revision: 1.26 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 ## ISSUE:
6     ## var doc = implementation.createDocument (null, null, null);
7     ## doc.write ('');
8     ## alert (doc.compatMode);
9 wakaba 1.1
10     my $permitted_slash_tag_name = {
11     base => 1,
12     link => 1,
13     meta => 1,
14     hr => 1,
15     br => 1,
16     img=> 1,
17     embed => 1,
18     param => 1,
19     area => 1,
20     col => 1,
21     input => 1,
22     };
23    
24 wakaba 1.4 my $c1_entity_char = {
25 wakaba 1.9 0x80 => 0x20AC,
26     0x81 => 0xFFFD,
27     0x82 => 0x201A,
28     0x83 => 0x0192,
29     0x84 => 0x201E,
30     0x85 => 0x2026,
31     0x86 => 0x2020,
32     0x87 => 0x2021,
33     0x88 => 0x02C6,
34     0x89 => 0x2030,
35     0x8A => 0x0160,
36     0x8B => 0x2039,
37     0x8C => 0x0152,
38     0x8D => 0xFFFD,
39     0x8E => 0x017D,
40     0x8F => 0xFFFD,
41     0x90 => 0xFFFD,
42     0x91 => 0x2018,
43     0x92 => 0x2019,
44     0x93 => 0x201C,
45     0x94 => 0x201D,
46     0x95 => 0x2022,
47     0x96 => 0x2013,
48     0x97 => 0x2014,
49     0x98 => 0x02DC,
50     0x99 => 0x2122,
51     0x9A => 0x0161,
52     0x9B => 0x203A,
53     0x9C => 0x0153,
54     0x9D => 0xFFFD,
55     0x9E => 0x017E,
56     0x9F => 0x0178,
57 wakaba 1.4 }; # $c1_entity_char
58 wakaba 1.1
59     my $special_category = {
60     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
61     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
62     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
63     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
64     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
65     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
66     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
67     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
68     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
69     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
70     };
71     my $scoping_category = {
72     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
73     table => 1, td => 1, th => 1,
74     };
75     my $formatting_category = {
76     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
77     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
78     };
79     # $phrasing_category: all other elements
80    
81     sub parse_string ($$$;$) {
82     my $self = shift->new;
83     my $s = \$_[0];
84     $self->{document} = $_[1];
85    
86 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
87    
88 wakaba 1.1 my $i = 0;
89 wakaba 1.3 my $line = 1;
90     my $column = 0;
91 wakaba 1.1 $self->{set_next_input_character} = sub {
92     my $self = shift;
93 wakaba 1.13
94     pop @{$self->{prev_input_character}};
95     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
96    
97 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
98     $self->{next_input_character} = ord substr $$s, $i++, 1;
99 wakaba 1.3 $column++;
100 wakaba 1.1
101 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
102     $line++;
103     $column = 0;
104     } elsif ($self->{next_input_character} == 0x000D) { # CR
105 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
106 wakaba 1.1 $self->{next_input_character} = 0x000A; # LF # MUST
107 wakaba 1.3 $line++;
108 wakaba 1.4 $column = 0;
109 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
110     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
111     } elsif ($self->{next_input_character} == 0x0000) { # NULL
112 wakaba 1.8 $self->{parse_error}-> (type => 'NULL');
113 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
114     }
115     };
116 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
117     $self->{next_input_character} = -1;
118 wakaba 1.1
119 wakaba 1.3 my $onerror = $_[2] || sub {
120     my (%opt) = @_;
121     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
122     };
123     $self->{parse_error} = sub {
124     $onerror->(@_, line => $line, column => $column);
125 wakaba 1.1 };
126    
127     $self->_initialize_tokenizer;
128     $self->_initialize_tree_constructor;
129     $self->_construct_tree;
130     $self->_terminate_tree_constructor;
131    
132     return $self->{document};
133     } # parse_string
134    
135     sub new ($) {
136     my $class = shift;
137     my $self = bless {}, $class;
138     $self->{set_next_input_character} = sub {
139     $self->{next_input_character} = -1;
140     };
141     $self->{parse_error} = sub {
142     #
143     };
144     return $self;
145     } # new
146    
147     ## Implementations MUST act as if state machine in the spec
148    
149     sub _initialize_tokenizer ($) {
150     my $self = shift;
151     $self->{state} = 'data'; # MUST
152     $self->{content_model_flag} = 'PCDATA'; # be
153     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
154     undef $self->{current_attribute};
155     undef $self->{last_emitted_start_tag_name};
156     undef $self->{last_attribute_value_state};
157     $self->{char} = [];
158     # $self->{next_input_character}
159    
160     if (@{$self->{char}}) {
161     $self->{next_input_character} = shift @{$self->{char}};
162     } else {
163     $self->{set_next_input_character}->($self);
164     }
165    
166     $self->{token} = [];
167 wakaba 1.18 # $self->{escape}
168 wakaba 1.1 } # _initialize_tokenizer
169    
170     ## A token has:
171     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
172     ## 'character', or 'end-of-file'
173 wakaba 1.18 ## ->{name} (DOCTYPE, start tag (tag name), end tag (tag name))
174     ## ->{public_identifier} (DOCTYPE)
175     ## ->{system_identifier} (DOCTYPE)
176     ## ->{correct} == 1 or 0 (DOCTYPE)
177 wakaba 1.1 ## ->{attributes} isa HASH (start tag, end tag)
178     ## ->{data} (comment, character)
179    
180     ## Emitted token MUST immediately be handled by the tree construction state.
181    
182     ## Before each step, UA MAY check to see if either one of the scripts in
183     ## "list of scripts that will execute as soon as possible" or the first
184     ## script in the "list of scripts that will execute asynchronously",
185     ## has completed loading. If one has, then it MUST be executed
186     ## and removed from the list.
187    
188     sub _get_next_token ($) {
189     my $self = shift;
190     if (@{$self->{token}}) {
191     return shift @{$self->{token}};
192     }
193    
194     A: {
195     if ($self->{state} eq 'data') {
196     if ($self->{next_input_character} == 0x0026) { # &
197     if ($self->{content_model_flag} eq 'PCDATA' or
198     $self->{content_model_flag} eq 'RCDATA') {
199     $self->{state} = 'entity data';
200    
201     if (@{$self->{char}}) {
202     $self->{next_input_character} = shift @{$self->{char}};
203     } else {
204     $self->{set_next_input_character}->($self);
205     }
206    
207     redo A;
208     } else {
209     #
210     }
211 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
212     if ($self->{content_model_flag} eq 'RCDATA' or
213     $self->{content_model_flag} eq 'CDATA') {
214     unless ($self->{escape}) {
215     if ($self->{prev_input_character}->[0] == 0x002D and # -
216     $self->{prev_input_character}->[1] == 0x0021 and # !
217     $self->{prev_input_character}->[2] == 0x003C) { # <
218     $self->{escape} = 1;
219     }
220     }
221     }
222    
223     #
224 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
225 wakaba 1.13 if ($self->{content_model_flag} eq 'PCDATA' or
226     (($self->{content_model_flag} eq 'CDATA' or
227     $self->{content_model_flag} eq 'RCDATA') and
228     not $self->{escape})) {
229 wakaba 1.1 $self->{state} = 'tag open';
230    
231     if (@{$self->{char}}) {
232     $self->{next_input_character} = shift @{$self->{char}};
233     } else {
234     $self->{set_next_input_character}->($self);
235     }
236    
237     redo A;
238     } else {
239     #
240     }
241 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
242     if ($self->{escape} and
243     ($self->{content_model_flag} eq 'RCDATA' or
244     $self->{content_model_flag} eq 'CDATA')) {
245     if ($self->{prev_input_character}->[0] == 0x002D and # -
246     $self->{prev_input_character}->[1] == 0x002D) { # -
247     delete $self->{escape};
248     }
249     }
250    
251     #
252 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
253     return ({type => 'end-of-file'});
254     last A; ## TODO: ok?
255     }
256     # Anything else
257     my $token = {type => 'character',
258     data => chr $self->{next_input_character}};
259     ## Stay in the data state
260    
261     if (@{$self->{char}}) {
262     $self->{next_input_character} = shift @{$self->{char}};
263     } else {
264     $self->{set_next_input_character}->($self);
265     }
266    
267    
268     return ($token);
269    
270     redo A;
271     } elsif ($self->{state} eq 'entity data') {
272     ## (cannot happen in CDATA state)
273    
274 wakaba 1.26 my $token = $self->_tokenize_attempt_to_consume_an_entity (0);
275 wakaba 1.1
276     $self->{state} = 'data';
277     # next-input-character is already done
278    
279     unless (defined $token) {
280     return ({type => 'character', data => '&'});
281     } else {
282     return ($token);
283     }
284    
285     redo A;
286     } elsif ($self->{state} eq 'tag open') {
287     if ($self->{content_model_flag} eq 'RCDATA' or
288     $self->{content_model_flag} eq 'CDATA') {
289     if ($self->{next_input_character} == 0x002F) { # /
290    
291     if (@{$self->{char}}) {
292     $self->{next_input_character} = shift @{$self->{char}};
293     } else {
294     $self->{set_next_input_character}->($self);
295     }
296    
297     $self->{state} = 'close tag open';
298     redo A;
299     } else {
300     ## reconsume
301     $self->{state} = 'data';
302    
303     return ({type => 'character', data => '<'});
304    
305     redo A;
306     }
307     } elsif ($self->{content_model_flag} eq 'PCDATA') {
308     if ($self->{next_input_character} == 0x0021) { # !
309     $self->{state} = 'markup declaration open';
310    
311     if (@{$self->{char}}) {
312     $self->{next_input_character} = shift @{$self->{char}};
313     } else {
314     $self->{set_next_input_character}->($self);
315     }
316    
317     redo A;
318     } elsif ($self->{next_input_character} == 0x002F) { # /
319     $self->{state} = 'close tag open';
320    
321     if (@{$self->{char}}) {
322     $self->{next_input_character} = shift @{$self->{char}};
323     } else {
324     $self->{set_next_input_character}->($self);
325     }
326    
327     redo A;
328     } elsif (0x0041 <= $self->{next_input_character} and
329     $self->{next_input_character} <= 0x005A) { # A..Z
330     $self->{current_token}
331     = {type => 'start tag',
332     tag_name => chr ($self->{next_input_character} + 0x0020)};
333     $self->{state} = 'tag name';
334    
335     if (@{$self->{char}}) {
336     $self->{next_input_character} = shift @{$self->{char}};
337     } else {
338     $self->{set_next_input_character}->($self);
339     }
340    
341     redo A;
342     } elsif (0x0061 <= $self->{next_input_character} and
343     $self->{next_input_character} <= 0x007A) { # a..z
344     $self->{current_token} = {type => 'start tag',
345     tag_name => chr ($self->{next_input_character})};
346     $self->{state} = 'tag name';
347    
348     if (@{$self->{char}}) {
349     $self->{next_input_character} = shift @{$self->{char}};
350     } else {
351     $self->{set_next_input_character}->($self);
352     }
353    
354     redo A;
355     } elsif ($self->{next_input_character} == 0x003E) { # >
356 wakaba 1.3 $self->{parse_error}-> (type => 'empty start tag');
357 wakaba 1.1 $self->{state} = 'data';
358    
359     if (@{$self->{char}}) {
360     $self->{next_input_character} = shift @{$self->{char}};
361     } else {
362     $self->{set_next_input_character}->($self);
363     }
364    
365    
366     return ({type => 'character', data => '<>'});
367    
368     redo A;
369     } elsif ($self->{next_input_character} == 0x003F) { # ?
370 wakaba 1.3 $self->{parse_error}-> (type => 'pio');
371 wakaba 1.1 $self->{state} = 'bogus comment';
372     ## $self->{next_input_character} is intentionally left as is
373     redo A;
374     } else {
375 wakaba 1.3 $self->{parse_error}-> (type => 'bare stago');
376 wakaba 1.1 $self->{state} = 'data';
377     ## reconsume
378    
379     return ({type => 'character', data => '<'});
380    
381     redo A;
382     }
383     } else {
384     die "$0: $self->{content_model_flag}: Unknown content model flag";
385     }
386     } elsif ($self->{state} eq 'close tag open') {
387     if ($self->{content_model_flag} eq 'RCDATA' or
388     $self->{content_model_flag} eq 'CDATA') {
389 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
390     my @next_char;
391     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
392     push @next_char, $self->{next_input_character};
393     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
394     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
395     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
396    
397 wakaba 1.1 if (@{$self->{char}}) {
398     $self->{next_input_character} = shift @{$self->{char}};
399     } else {
400     $self->{set_next_input_character}->($self);
401     }
402    
403 wakaba 1.23 next TAGNAME;
404     } else {
405     $self->{next_input_character} = shift @next_char; # reconsume
406     unshift @{$self->{char}}, (@next_char);
407     $self->{state} = 'data';
408    
409     return ({type => 'character', data => '</'});
410    
411     redo A;
412     }
413     }
414     push @next_char, $self->{next_input_character};
415    
416     unless ($self->{next_input_character} == 0x0009 or # HT
417     $self->{next_input_character} == 0x000A or # LF
418     $self->{next_input_character} == 0x000B or # VT
419     $self->{next_input_character} == 0x000C or # FF
420     $self->{next_input_character} == 0x0020 or # SP
421     $self->{next_input_character} == 0x003E or # >
422     $self->{next_input_character} == 0x002F or # /
423     $self->{next_input_character} == -1) {
424 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
425     unshift @{$self->{char}}, (@next_char);
426     $self->{state} = 'data';
427     return ({type => 'character', data => '</'});
428     redo A;
429 wakaba 1.23 } else {
430     $self->{next_input_character} = shift @next_char;
431     unshift @{$self->{char}}, (@next_char);
432     # and consume...
433 wakaba 1.1 }
434 wakaba 1.23 } else {
435     ## No start tag token has ever been emitted
436     # next-input-character is already done
437 wakaba 1.1 $self->{state} = 'data';
438     return ({type => 'character', data => '</'});
439     redo A;
440     }
441     }
442    
443     if (0x0041 <= $self->{next_input_character} and
444     $self->{next_input_character} <= 0x005A) { # A..Z
445     $self->{current_token} = {type => 'end tag',
446     tag_name => chr ($self->{next_input_character} + 0x0020)};
447     $self->{state} = 'tag name';
448    
449     if (@{$self->{char}}) {
450     $self->{next_input_character} = shift @{$self->{char}};
451     } else {
452     $self->{set_next_input_character}->($self);
453     }
454    
455     redo A;
456     } elsif (0x0061 <= $self->{next_input_character} and
457     $self->{next_input_character} <= 0x007A) { # a..z
458     $self->{current_token} = {type => 'end tag',
459     tag_name => chr ($self->{next_input_character})};
460     $self->{state} = 'tag name';
461    
462     if (@{$self->{char}}) {
463     $self->{next_input_character} = shift @{$self->{char}};
464     } else {
465     $self->{set_next_input_character}->($self);
466     }
467    
468     redo A;
469     } elsif ($self->{next_input_character} == 0x003E) { # >
470 wakaba 1.3 $self->{parse_error}-> (type => 'empty end tag');
471 wakaba 1.1 $self->{state} = 'data';
472    
473     if (@{$self->{char}}) {
474     $self->{next_input_character} = shift @{$self->{char}};
475     } else {
476     $self->{set_next_input_character}->($self);
477     }
478    
479     redo A;
480     } elsif ($self->{next_input_character} == -1) {
481 wakaba 1.3 $self->{parse_error}-> (type => 'bare etago');
482 wakaba 1.1 $self->{state} = 'data';
483     # reconsume
484    
485     return ({type => 'character', data => '</'});
486    
487     redo A;
488     } else {
489 wakaba 1.3 $self->{parse_error}-> (type => 'bogus end tag');
490 wakaba 1.1 $self->{state} = 'bogus comment';
491     ## $self->{next_input_character} is intentionally left as is
492     redo A;
493     }
494     } elsif ($self->{state} eq 'tag name') {
495     if ($self->{next_input_character} == 0x0009 or # HT
496     $self->{next_input_character} == 0x000A or # LF
497     $self->{next_input_character} == 0x000B or # VT
498     $self->{next_input_character} == 0x000C or # FF
499     $self->{next_input_character} == 0x0020) { # SP
500     $self->{state} = 'before attribute name';
501    
502     if (@{$self->{char}}) {
503     $self->{next_input_character} = shift @{$self->{char}};
504     } else {
505     $self->{set_next_input_character}->($self);
506     }
507    
508     redo A;
509     } elsif ($self->{next_input_character} == 0x003E) { # >
510     if ($self->{current_token}->{type} eq 'start tag') {
511     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
512     } elsif ($self->{current_token}->{type} eq 'end tag') {
513     $self->{content_model_flag} = 'PCDATA'; # MUST
514     if ($self->{current_token}->{attributes}) {
515 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
516 wakaba 1.1 }
517     } else {
518     die "$0: $self->{current_token}->{type}: Unknown token type";
519     }
520     $self->{state} = 'data';
521    
522     if (@{$self->{char}}) {
523     $self->{next_input_character} = shift @{$self->{char}};
524     } else {
525     $self->{set_next_input_character}->($self);
526     }
527    
528    
529     return ($self->{current_token}); # start tag or end tag
530    
531     redo A;
532     } elsif (0x0041 <= $self->{next_input_character} and
533     $self->{next_input_character} <= 0x005A) { # A..Z
534     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
535     # start tag or end tag
536     ## Stay in this state
537    
538     if (@{$self->{char}}) {
539     $self->{next_input_character} = shift @{$self->{char}};
540     } else {
541     $self->{set_next_input_character}->($self);
542     }
543    
544     redo A;
545 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
546 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
547 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
548     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
549     } elsif ($self->{current_token}->{type} eq 'end tag') {
550     $self->{content_model_flag} = 'PCDATA'; # MUST
551     if ($self->{current_token}->{attributes}) {
552 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
553 wakaba 1.1 }
554     } else {
555     die "$0: $self->{current_token}->{type}: Unknown token type";
556     }
557     $self->{state} = 'data';
558     # reconsume
559    
560     return ($self->{current_token}); # start tag or end tag
561    
562     redo A;
563     } elsif ($self->{next_input_character} == 0x002F) { # /
564    
565     if (@{$self->{char}}) {
566     $self->{next_input_character} = shift @{$self->{char}};
567     } else {
568     $self->{set_next_input_character}->($self);
569     }
570    
571     if ($self->{next_input_character} == 0x003E and # >
572     $self->{current_token}->{type} eq 'start tag' and
573     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
574     # permitted slash
575     #
576     } else {
577 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
578 wakaba 1.1 }
579     $self->{state} = 'before attribute name';
580     # next-input-character is already done
581     redo A;
582     } else {
583     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
584     # start tag or end tag
585     ## Stay in the state
586    
587     if (@{$self->{char}}) {
588     $self->{next_input_character} = shift @{$self->{char}};
589     } else {
590     $self->{set_next_input_character}->($self);
591     }
592    
593     redo A;
594     }
595     } elsif ($self->{state} eq 'before attribute name') {
596     if ($self->{next_input_character} == 0x0009 or # HT
597     $self->{next_input_character} == 0x000A or # LF
598     $self->{next_input_character} == 0x000B or # VT
599     $self->{next_input_character} == 0x000C or # FF
600     $self->{next_input_character} == 0x0020) { # SP
601     ## Stay in the state
602    
603     if (@{$self->{char}}) {
604     $self->{next_input_character} = shift @{$self->{char}};
605     } else {
606     $self->{set_next_input_character}->($self);
607     }
608    
609     redo A;
610     } elsif ($self->{next_input_character} == 0x003E) { # >
611     if ($self->{current_token}->{type} eq 'start tag') {
612     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
613     } elsif ($self->{current_token}->{type} eq 'end tag') {
614     $self->{content_model_flag} = 'PCDATA'; # MUST
615     if ($self->{current_token}->{attributes}) {
616 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
617 wakaba 1.1 }
618     } else {
619     die "$0: $self->{current_token}->{type}: Unknown token type";
620     }
621     $self->{state} = 'data';
622    
623     if (@{$self->{char}}) {
624     $self->{next_input_character} = shift @{$self->{char}};
625     } else {
626     $self->{set_next_input_character}->($self);
627     }
628    
629    
630     return ($self->{current_token}); # start tag or end tag
631    
632     redo A;
633     } elsif (0x0041 <= $self->{next_input_character} and
634     $self->{next_input_character} <= 0x005A) { # A..Z
635     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
636     value => ''};
637     $self->{state} = 'attribute name';
638    
639     if (@{$self->{char}}) {
640     $self->{next_input_character} = shift @{$self->{char}};
641     } else {
642     $self->{set_next_input_character}->($self);
643     }
644    
645     redo A;
646     } elsif ($self->{next_input_character} == 0x002F) { # /
647    
648     if (@{$self->{char}}) {
649     $self->{next_input_character} = shift @{$self->{char}};
650     } else {
651     $self->{set_next_input_character}->($self);
652     }
653    
654     if ($self->{next_input_character} == 0x003E and # >
655     $self->{current_token}->{type} eq 'start tag' and
656     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
657     # permitted slash
658     #
659     } else {
660 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
661 wakaba 1.1 }
662     ## Stay in the state
663     # next-input-character is already done
664     redo A;
665 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
666 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
667 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
668     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
669     } elsif ($self->{current_token}->{type} eq 'end tag') {
670     $self->{content_model_flag} = 'PCDATA'; # MUST
671     if ($self->{current_token}->{attributes}) {
672 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
673 wakaba 1.1 }
674     } else {
675     die "$0: $self->{current_token}->{type}: Unknown token type";
676     }
677     $self->{state} = 'data';
678     # reconsume
679    
680     return ($self->{current_token}); # start tag or end tag
681    
682     redo A;
683     } else {
684     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
685     value => ''};
686     $self->{state} = 'attribute name';
687    
688     if (@{$self->{char}}) {
689     $self->{next_input_character} = shift @{$self->{char}};
690     } else {
691     $self->{set_next_input_character}->($self);
692     }
693    
694     redo A;
695     }
696     } elsif ($self->{state} eq 'attribute name') {
697     my $before_leave = sub {
698     if (exists $self->{current_token}->{attributes} # start tag or end tag
699     ->{$self->{current_attribute}->{name}}) { # MUST
700 wakaba 1.3 $self->{parse_error}-> (type => 'dupulicate attribute');
701 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
702     } else {
703     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
704     = $self->{current_attribute};
705     }
706     }; # $before_leave
707    
708     if ($self->{next_input_character} == 0x0009 or # HT
709     $self->{next_input_character} == 0x000A or # LF
710     $self->{next_input_character} == 0x000B or # VT
711     $self->{next_input_character} == 0x000C or # FF
712     $self->{next_input_character} == 0x0020) { # SP
713     $before_leave->();
714     $self->{state} = 'after attribute name';
715    
716     if (@{$self->{char}}) {
717     $self->{next_input_character} = shift @{$self->{char}};
718     } else {
719     $self->{set_next_input_character}->($self);
720     }
721    
722     redo A;
723     } elsif ($self->{next_input_character} == 0x003D) { # =
724     $before_leave->();
725     $self->{state} = 'before attribute value';
726    
727     if (@{$self->{char}}) {
728     $self->{next_input_character} = shift @{$self->{char}};
729     } else {
730     $self->{set_next_input_character}->($self);
731     }
732    
733     redo A;
734     } elsif ($self->{next_input_character} == 0x003E) { # >
735     $before_leave->();
736     if ($self->{current_token}->{type} eq 'start tag') {
737     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
738     } elsif ($self->{current_token}->{type} eq 'end tag') {
739     $self->{content_model_flag} = 'PCDATA'; # MUST
740     if ($self->{current_token}->{attributes}) {
741 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
742 wakaba 1.1 }
743     } else {
744     die "$0: $self->{current_token}->{type}: Unknown token type";
745     }
746     $self->{state} = 'data';
747    
748     if (@{$self->{char}}) {
749     $self->{next_input_character} = shift @{$self->{char}};
750     } else {
751     $self->{set_next_input_character}->($self);
752     }
753    
754    
755     return ($self->{current_token}); # start tag or end tag
756    
757     redo A;
758     } elsif (0x0041 <= $self->{next_input_character} and
759     $self->{next_input_character} <= 0x005A) { # A..Z
760     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
761     ## Stay in the state
762    
763     if (@{$self->{char}}) {
764     $self->{next_input_character} = shift @{$self->{char}};
765     } else {
766     $self->{set_next_input_character}->($self);
767     }
768    
769     redo A;
770     } elsif ($self->{next_input_character} == 0x002F) { # /
771     $before_leave->();
772    
773     if (@{$self->{char}}) {
774     $self->{next_input_character} = shift @{$self->{char}};
775     } else {
776     $self->{set_next_input_character}->($self);
777     }
778    
779     if ($self->{next_input_character} == 0x003E and # >
780     $self->{current_token}->{type} eq 'start tag' and
781     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
782     # permitted slash
783     #
784     } else {
785 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
786 wakaba 1.1 }
787     $self->{state} = 'before attribute name';
788     # next-input-character is already done
789     redo A;
790 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
791 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
792 wakaba 1.1 $before_leave->();
793     if ($self->{current_token}->{type} eq 'start tag') {
794     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
795     } elsif ($self->{current_token}->{type} eq 'end tag') {
796     $self->{content_model_flag} = 'PCDATA'; # MUST
797     if ($self->{current_token}->{attributes}) {
798 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
799 wakaba 1.1 }
800     } else {
801     die "$0: $self->{current_token}->{type}: Unknown token type";
802     }
803     $self->{state} = 'data';
804     # reconsume
805    
806     return ($self->{current_token}); # start tag or end tag
807    
808     redo A;
809     } else {
810     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
811     ## Stay in the state
812    
813     if (@{$self->{char}}) {
814     $self->{next_input_character} = shift @{$self->{char}};
815     } else {
816     $self->{set_next_input_character}->($self);
817     }
818    
819     redo A;
820     }
821     } elsif ($self->{state} eq 'after attribute name') {
822     if ($self->{next_input_character} == 0x0009 or # HT
823     $self->{next_input_character} == 0x000A or # LF
824     $self->{next_input_character} == 0x000B or # VT
825     $self->{next_input_character} == 0x000C or # FF
826     $self->{next_input_character} == 0x0020) { # SP
827     ## Stay in the state
828    
829     if (@{$self->{char}}) {
830     $self->{next_input_character} = shift @{$self->{char}};
831     } else {
832     $self->{set_next_input_character}->($self);
833     }
834    
835     redo A;
836     } elsif ($self->{next_input_character} == 0x003D) { # =
837     $self->{state} = 'before attribute value';
838    
839     if (@{$self->{char}}) {
840     $self->{next_input_character} = shift @{$self->{char}};
841     } else {
842     $self->{set_next_input_character}->($self);
843     }
844    
845     redo A;
846     } elsif ($self->{next_input_character} == 0x003E) { # >
847     if ($self->{current_token}->{type} eq 'start tag') {
848     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
849     } elsif ($self->{current_token}->{type} eq 'end tag') {
850     $self->{content_model_flag} = 'PCDATA'; # MUST
851     if ($self->{current_token}->{attributes}) {
852 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
853 wakaba 1.1 }
854     } else {
855     die "$0: $self->{current_token}->{type}: Unknown token type";
856     }
857     $self->{state} = 'data';
858    
859     if (@{$self->{char}}) {
860     $self->{next_input_character} = shift @{$self->{char}};
861     } else {
862     $self->{set_next_input_character}->($self);
863     }
864    
865    
866     return ($self->{current_token}); # start tag or end tag
867    
868     redo A;
869     } elsif (0x0041 <= $self->{next_input_character} and
870     $self->{next_input_character} <= 0x005A) { # A..Z
871     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
872     value => ''};
873     $self->{state} = 'attribute name';
874    
875     if (@{$self->{char}}) {
876     $self->{next_input_character} = shift @{$self->{char}};
877     } else {
878     $self->{set_next_input_character}->($self);
879     }
880    
881     redo A;
882     } elsif ($self->{next_input_character} == 0x002F) { # /
883    
884     if (@{$self->{char}}) {
885     $self->{next_input_character} = shift @{$self->{char}};
886     } else {
887     $self->{set_next_input_character}->($self);
888     }
889    
890     if ($self->{next_input_character} == 0x003E and # >
891     $self->{current_token}->{type} eq 'start tag' and
892     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
893     # permitted slash
894     #
895     } else {
896 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
897 wakaba 1.1 }
898     $self->{state} = 'before attribute name';
899     # next-input-character is already done
900     redo A;
901 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
902 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
903 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
904     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
905     } elsif ($self->{current_token}->{type} eq 'end tag') {
906     $self->{content_model_flag} = 'PCDATA'; # MUST
907     if ($self->{current_token}->{attributes}) {
908 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
909 wakaba 1.1 }
910     } else {
911     die "$0: $self->{current_token}->{type}: Unknown token type";
912     }
913     $self->{state} = 'data';
914     # reconsume
915    
916     return ($self->{current_token}); # start tag or end tag
917    
918     redo A;
919     } else {
920     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
921     value => ''};
922     $self->{state} = 'attribute name';
923    
924     if (@{$self->{char}}) {
925     $self->{next_input_character} = shift @{$self->{char}};
926     } else {
927     $self->{set_next_input_character}->($self);
928     }
929    
930     redo A;
931     }
932     } elsif ($self->{state} eq 'before attribute value') {
933     if ($self->{next_input_character} == 0x0009 or # HT
934     $self->{next_input_character} == 0x000A or # LF
935     $self->{next_input_character} == 0x000B or # VT
936     $self->{next_input_character} == 0x000C or # FF
937     $self->{next_input_character} == 0x0020) { # SP
938     ## Stay in the state
939    
940     if (@{$self->{char}}) {
941     $self->{next_input_character} = shift @{$self->{char}};
942     } else {
943     $self->{set_next_input_character}->($self);
944     }
945    
946     redo A;
947     } elsif ($self->{next_input_character} == 0x0022) { # "
948     $self->{state} = 'attribute value (double-quoted)';
949    
950     if (@{$self->{char}}) {
951     $self->{next_input_character} = shift @{$self->{char}};
952     } else {
953     $self->{set_next_input_character}->($self);
954     }
955    
956     redo A;
957     } elsif ($self->{next_input_character} == 0x0026) { # &
958     $self->{state} = 'attribute value (unquoted)';
959     ## reconsume
960     redo A;
961     } elsif ($self->{next_input_character} == 0x0027) { # '
962     $self->{state} = 'attribute value (single-quoted)';
963    
964     if (@{$self->{char}}) {
965     $self->{next_input_character} = shift @{$self->{char}};
966     } else {
967     $self->{set_next_input_character}->($self);
968     }
969    
970     redo A;
971     } elsif ($self->{next_input_character} == 0x003E) { # >
972     if ($self->{current_token}->{type} eq 'start tag') {
973     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
974     } elsif ($self->{current_token}->{type} eq 'end tag') {
975     $self->{content_model_flag} = 'PCDATA'; # MUST
976     if ($self->{current_token}->{attributes}) {
977 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
978 wakaba 1.1 }
979     } else {
980     die "$0: $self->{current_token}->{type}: Unknown token type";
981     }
982     $self->{state} = 'data';
983    
984     if (@{$self->{char}}) {
985     $self->{next_input_character} = shift @{$self->{char}};
986     } else {
987     $self->{set_next_input_character}->($self);
988     }
989    
990    
991     return ($self->{current_token}); # start tag or end tag
992    
993     redo A;
994 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
995 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
996 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
997     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
998     } elsif ($self->{current_token}->{type} eq 'end tag') {
999     $self->{content_model_flag} = 'PCDATA'; # MUST
1000     if ($self->{current_token}->{attributes}) {
1001 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1002 wakaba 1.1 }
1003     } else {
1004     die "$0: $self->{current_token}->{type}: Unknown token type";
1005     }
1006     $self->{state} = 'data';
1007     ## reconsume
1008    
1009     return ($self->{current_token}); # start tag or end tag
1010    
1011     redo A;
1012     } else {
1013     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1014     $self->{state} = 'attribute value (unquoted)';
1015    
1016     if (@{$self->{char}}) {
1017     $self->{next_input_character} = shift @{$self->{char}};
1018     } else {
1019     $self->{set_next_input_character}->($self);
1020     }
1021    
1022     redo A;
1023     }
1024     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
1025     if ($self->{next_input_character} == 0x0022) { # "
1026     $self->{state} = 'before attribute name';
1027    
1028     if (@{$self->{char}}) {
1029     $self->{next_input_character} = shift @{$self->{char}};
1030     } else {
1031     $self->{set_next_input_character}->($self);
1032     }
1033    
1034     redo A;
1035     } elsif ($self->{next_input_character} == 0x0026) { # &
1036     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
1037     $self->{state} = 'entity in attribute value';
1038    
1039     if (@{$self->{char}}) {
1040     $self->{next_input_character} = shift @{$self->{char}};
1041     } else {
1042     $self->{set_next_input_character}->($self);
1043     }
1044    
1045     redo A;
1046     } elsif ($self->{next_input_character} == -1) {
1047 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1048 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1049     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1050     } elsif ($self->{current_token}->{type} eq 'end tag') {
1051     $self->{content_model_flag} = 'PCDATA'; # MUST
1052     if ($self->{current_token}->{attributes}) {
1053 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1054 wakaba 1.1 }
1055     } else {
1056     die "$0: $self->{current_token}->{type}: Unknown token type";
1057     }
1058     $self->{state} = 'data';
1059     ## reconsume
1060    
1061     return ($self->{current_token}); # start tag or end tag
1062    
1063     redo A;
1064     } else {
1065     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1066     ## Stay in the state
1067    
1068     if (@{$self->{char}}) {
1069     $self->{next_input_character} = shift @{$self->{char}};
1070     } else {
1071     $self->{set_next_input_character}->($self);
1072     }
1073    
1074     redo A;
1075     }
1076     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1077     if ($self->{next_input_character} == 0x0027) { # '
1078     $self->{state} = 'before attribute name';
1079    
1080     if (@{$self->{char}}) {
1081     $self->{next_input_character} = shift @{$self->{char}};
1082     } else {
1083     $self->{set_next_input_character}->($self);
1084     }
1085    
1086     redo A;
1087     } elsif ($self->{next_input_character} == 0x0026) { # &
1088     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1089     $self->{state} = 'entity in attribute value';
1090    
1091     if (@{$self->{char}}) {
1092     $self->{next_input_character} = shift @{$self->{char}};
1093     } else {
1094     $self->{set_next_input_character}->($self);
1095     }
1096    
1097     redo A;
1098     } elsif ($self->{next_input_character} == -1) {
1099 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1100 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1101     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1102     } elsif ($self->{current_token}->{type} eq 'end tag') {
1103     $self->{content_model_flag} = 'PCDATA'; # MUST
1104     if ($self->{current_token}->{attributes}) {
1105 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1106 wakaba 1.1 }
1107     } else {
1108     die "$0: $self->{current_token}->{type}: Unknown token type";
1109     }
1110     $self->{state} = 'data';
1111     ## reconsume
1112    
1113     return ($self->{current_token}); # start tag or end tag
1114    
1115     redo A;
1116     } else {
1117     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1118     ## Stay in the state
1119    
1120     if (@{$self->{char}}) {
1121     $self->{next_input_character} = shift @{$self->{char}};
1122     } else {
1123     $self->{set_next_input_character}->($self);
1124     }
1125    
1126     redo A;
1127     }
1128     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1129     if ($self->{next_input_character} == 0x0009 or # HT
1130     $self->{next_input_character} == 0x000A or # LF
1131     $self->{next_input_character} == 0x000B or # HT
1132     $self->{next_input_character} == 0x000C or # FF
1133     $self->{next_input_character} == 0x0020) { # SP
1134     $self->{state} = 'before attribute name';
1135    
1136     if (@{$self->{char}}) {
1137     $self->{next_input_character} = shift @{$self->{char}};
1138     } else {
1139     $self->{set_next_input_character}->($self);
1140     }
1141    
1142     redo A;
1143     } elsif ($self->{next_input_character} == 0x0026) { # &
1144     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1145     $self->{state} = 'entity in attribute value';
1146    
1147     if (@{$self->{char}}) {
1148     $self->{next_input_character} = shift @{$self->{char}};
1149     } else {
1150     $self->{set_next_input_character}->($self);
1151     }
1152    
1153     redo A;
1154     } elsif ($self->{next_input_character} == 0x003E) { # >
1155     if ($self->{current_token}->{type} eq 'start tag') {
1156     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1157     } elsif ($self->{current_token}->{type} eq 'end tag') {
1158     $self->{content_model_flag} = 'PCDATA'; # MUST
1159     if ($self->{current_token}->{attributes}) {
1160 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1161 wakaba 1.1 }
1162     } else {
1163     die "$0: $self->{current_token}->{type}: Unknown token type";
1164     }
1165     $self->{state} = 'data';
1166    
1167     if (@{$self->{char}}) {
1168     $self->{next_input_character} = shift @{$self->{char}};
1169     } else {
1170     $self->{set_next_input_character}->($self);
1171     }
1172    
1173    
1174     return ($self->{current_token}); # start tag or end tag
1175    
1176     redo A;
1177 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
1178 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
1179 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1180     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1181     } elsif ($self->{current_token}->{type} eq 'end tag') {
1182     $self->{content_model_flag} = 'PCDATA'; # MUST
1183     if ($self->{current_token}->{attributes}) {
1184 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1185 wakaba 1.1 }
1186     } else {
1187     die "$0: $self->{current_token}->{type}: Unknown token type";
1188     }
1189     $self->{state} = 'data';
1190     ## reconsume
1191    
1192     return ($self->{current_token}); # start tag or end tag
1193    
1194     redo A;
1195     } else {
1196     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1197     ## Stay in the state
1198    
1199     if (@{$self->{char}}) {
1200     $self->{next_input_character} = shift @{$self->{char}};
1201     } else {
1202     $self->{set_next_input_character}->($self);
1203     }
1204    
1205     redo A;
1206     }
1207     } elsif ($self->{state} eq 'entity in attribute value') {
1208 wakaba 1.26 my $token = $self->_tokenize_attempt_to_consume_an_entity (1);
1209 wakaba 1.1
1210     unless (defined $token) {
1211     $self->{current_attribute}->{value} .= '&';
1212     } else {
1213     $self->{current_attribute}->{value} .= $token->{data};
1214     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1215     }
1216    
1217     $self->{state} = $self->{last_attribute_value_state};
1218     # next-input-character is already done
1219     redo A;
1220     } elsif ($self->{state} eq 'bogus comment') {
1221     ## (only happen if PCDATA state)
1222    
1223     my $token = {type => 'comment', data => ''};
1224    
1225     BC: {
1226     if ($self->{next_input_character} == 0x003E) { # >
1227     $self->{state} = 'data';
1228    
1229     if (@{$self->{char}}) {
1230     $self->{next_input_character} = shift @{$self->{char}};
1231     } else {
1232     $self->{set_next_input_character}->($self);
1233     }
1234    
1235    
1236     return ($token);
1237    
1238     redo A;
1239     } elsif ($self->{next_input_character} == -1) {
1240     $self->{state} = 'data';
1241     ## reconsume
1242    
1243     return ($token);
1244    
1245     redo A;
1246     } else {
1247     $token->{data} .= chr ($self->{next_input_character});
1248    
1249     if (@{$self->{char}}) {
1250     $self->{next_input_character} = shift @{$self->{char}};
1251     } else {
1252     $self->{set_next_input_character}->($self);
1253     }
1254    
1255     redo BC;
1256     }
1257     } # BC
1258     } elsif ($self->{state} eq 'markup declaration open') {
1259     ## (only happen if PCDATA state)
1260    
1261     my @next_char;
1262     push @next_char, $self->{next_input_character};
1263    
1264     if ($self->{next_input_character} == 0x002D) { # -
1265    
1266     if (@{$self->{char}}) {
1267     $self->{next_input_character} = shift @{$self->{char}};
1268     } else {
1269     $self->{set_next_input_character}->($self);
1270     }
1271    
1272     push @next_char, $self->{next_input_character};
1273     if ($self->{next_input_character} == 0x002D) { # -
1274     $self->{current_token} = {type => 'comment', data => ''};
1275 wakaba 1.23 $self->{state} = 'comment start';
1276 wakaba 1.1
1277     if (@{$self->{char}}) {
1278     $self->{next_input_character} = shift @{$self->{char}};
1279     } else {
1280     $self->{set_next_input_character}->($self);
1281     }
1282    
1283     redo A;
1284     }
1285     } elsif ($self->{next_input_character} == 0x0044 or # D
1286     $self->{next_input_character} == 0x0064) { # d
1287    
1288     if (@{$self->{char}}) {
1289     $self->{next_input_character} = shift @{$self->{char}};
1290     } else {
1291     $self->{set_next_input_character}->($self);
1292     }
1293    
1294     push @next_char, $self->{next_input_character};
1295     if ($self->{next_input_character} == 0x004F or # O
1296     $self->{next_input_character} == 0x006F) { # o
1297    
1298     if (@{$self->{char}}) {
1299     $self->{next_input_character} = shift @{$self->{char}};
1300     } else {
1301     $self->{set_next_input_character}->($self);
1302     }
1303    
1304     push @next_char, $self->{next_input_character};
1305     if ($self->{next_input_character} == 0x0043 or # C
1306     $self->{next_input_character} == 0x0063) { # c
1307    
1308     if (@{$self->{char}}) {
1309     $self->{next_input_character} = shift @{$self->{char}};
1310     } else {
1311     $self->{set_next_input_character}->($self);
1312     }
1313    
1314     push @next_char, $self->{next_input_character};
1315     if ($self->{next_input_character} == 0x0054 or # T
1316     $self->{next_input_character} == 0x0074) { # t
1317    
1318     if (@{$self->{char}}) {
1319     $self->{next_input_character} = shift @{$self->{char}};
1320     } else {
1321     $self->{set_next_input_character}->($self);
1322     }
1323    
1324     push @next_char, $self->{next_input_character};
1325     if ($self->{next_input_character} == 0x0059 or # Y
1326     $self->{next_input_character} == 0x0079) { # y
1327    
1328     if (@{$self->{char}}) {
1329     $self->{next_input_character} = shift @{$self->{char}};
1330     } else {
1331     $self->{set_next_input_character}->($self);
1332     }
1333    
1334     push @next_char, $self->{next_input_character};
1335     if ($self->{next_input_character} == 0x0050 or # P
1336     $self->{next_input_character} == 0x0070) { # p
1337    
1338     if (@{$self->{char}}) {
1339     $self->{next_input_character} = shift @{$self->{char}};
1340     } else {
1341     $self->{set_next_input_character}->($self);
1342     }
1343    
1344     push @next_char, $self->{next_input_character};
1345     if ($self->{next_input_character} == 0x0045 or # E
1346     $self->{next_input_character} == 0x0065) { # e
1347     ## ISSUE: What a stupid code this is!
1348     $self->{state} = 'DOCTYPE';
1349    
1350     if (@{$self->{char}}) {
1351     $self->{next_input_character} = shift @{$self->{char}};
1352     } else {
1353     $self->{set_next_input_character}->($self);
1354     }
1355    
1356     redo A;
1357     }
1358     }
1359     }
1360     }
1361     }
1362     }
1363     }
1364    
1365 wakaba 1.3 $self->{parse_error}-> (type => 'bogus comment open');
1366 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1367     unshift @{$self->{char}}, (@next_char);
1368     $self->{state} = 'bogus comment';
1369     redo A;
1370    
1371     ## ISSUE: typos in spec: chacacters, is is a parse error
1372     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1373 wakaba 1.23 } elsif ($self->{state} eq 'comment start') {
1374     if ($self->{next_input_character} == 0x002D) { # -
1375     $self->{state} = 'comment start dash';
1376    
1377     if (@{$self->{char}}) {
1378     $self->{next_input_character} = shift @{$self->{char}};
1379     } else {
1380     $self->{set_next_input_character}->($self);
1381     }
1382    
1383     redo A;
1384     } elsif ($self->{next_input_character} == 0x003E) { # >
1385     $self->{parse_error}-> (type => 'bogus comment');
1386     $self->{state} = 'data';
1387    
1388     if (@{$self->{char}}) {
1389     $self->{next_input_character} = shift @{$self->{char}};
1390     } else {
1391     $self->{set_next_input_character}->($self);
1392     }
1393    
1394    
1395     return ($self->{current_token}); # comment
1396    
1397     redo A;
1398     } elsif ($self->{next_input_character} == -1) {
1399     $self->{parse_error}-> (type => 'unclosed comment');
1400     $self->{state} = 'data';
1401     ## reconsume
1402    
1403     return ($self->{current_token}); # comment
1404    
1405     redo A;
1406     } else {
1407     $self->{current_token}->{data} # comment
1408     .= chr ($self->{next_input_character});
1409     $self->{state} = 'comment';
1410    
1411     if (@{$self->{char}}) {
1412     $self->{next_input_character} = shift @{$self->{char}};
1413     } else {
1414     $self->{set_next_input_character}->($self);
1415     }
1416    
1417     redo A;
1418     }
1419     } elsif ($self->{state} eq 'comment start dash') {
1420     if ($self->{next_input_character} == 0x002D) { # -
1421     $self->{state} = 'comment end';
1422    
1423     if (@{$self->{char}}) {
1424     $self->{next_input_character} = shift @{$self->{char}};
1425     } else {
1426     $self->{set_next_input_character}->($self);
1427     }
1428    
1429     redo A;
1430     } elsif ($self->{next_input_character} == 0x003E) { # >
1431     $self->{parse_error}-> (type => 'bogus comment');
1432     $self->{state} = 'data';
1433    
1434     if (@{$self->{char}}) {
1435     $self->{next_input_character} = shift @{$self->{char}};
1436     } else {
1437     $self->{set_next_input_character}->($self);
1438     }
1439    
1440    
1441     return ($self->{current_token}); # comment
1442    
1443     redo A;
1444     } elsif ($self->{next_input_character} == -1) {
1445     $self->{parse_error}-> (type => 'unclosed comment');
1446     $self->{state} = 'data';
1447     ## reconsume
1448    
1449     return ($self->{current_token}); # comment
1450    
1451     redo A;
1452     } else {
1453     $self->{current_token}->{data} # comment
1454     .= chr ($self->{next_input_character});
1455     $self->{state} = 'comment';
1456    
1457     if (@{$self->{char}}) {
1458     $self->{next_input_character} = shift @{$self->{char}};
1459     } else {
1460     $self->{set_next_input_character}->($self);
1461     }
1462    
1463     redo A;
1464     }
1465 wakaba 1.1 } elsif ($self->{state} eq 'comment') {
1466     if ($self->{next_input_character} == 0x002D) { # -
1467 wakaba 1.23 $self->{state} = 'comment end dash';
1468 wakaba 1.1
1469     if (@{$self->{char}}) {
1470     $self->{next_input_character} = shift @{$self->{char}};
1471     } else {
1472     $self->{set_next_input_character}->($self);
1473     }
1474    
1475     redo A;
1476     } elsif ($self->{next_input_character} == -1) {
1477 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1478 wakaba 1.1 $self->{state} = 'data';
1479     ## reconsume
1480    
1481     return ($self->{current_token}); # comment
1482    
1483     redo A;
1484     } else {
1485     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1486     ## Stay in the state
1487    
1488     if (@{$self->{char}}) {
1489     $self->{next_input_character} = shift @{$self->{char}};
1490     } else {
1491     $self->{set_next_input_character}->($self);
1492     }
1493    
1494     redo A;
1495     }
1496 wakaba 1.23 } elsif ($self->{state} eq 'comment end dash') {
1497 wakaba 1.1 if ($self->{next_input_character} == 0x002D) { # -
1498     $self->{state} = 'comment end';
1499    
1500     if (@{$self->{char}}) {
1501     $self->{next_input_character} = shift @{$self->{char}};
1502     } else {
1503     $self->{set_next_input_character}->($self);
1504     }
1505    
1506     redo A;
1507     } elsif ($self->{next_input_character} == -1) {
1508 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1509 wakaba 1.1 $self->{state} = 'data';
1510     ## reconsume
1511    
1512     return ($self->{current_token}); # comment
1513    
1514     redo A;
1515     } else {
1516     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1517     $self->{state} = 'comment';
1518    
1519     if (@{$self->{char}}) {
1520     $self->{next_input_character} = shift @{$self->{char}};
1521     } else {
1522     $self->{set_next_input_character}->($self);
1523     }
1524    
1525     redo A;
1526     }
1527     } elsif ($self->{state} eq 'comment end') {
1528     if ($self->{next_input_character} == 0x003E) { # >
1529     $self->{state} = 'data';
1530    
1531     if (@{$self->{char}}) {
1532     $self->{next_input_character} = shift @{$self->{char}};
1533     } else {
1534     $self->{set_next_input_character}->($self);
1535     }
1536    
1537    
1538     return ($self->{current_token}); # comment
1539    
1540     redo A;
1541     } elsif ($self->{next_input_character} == 0x002D) { # -
1542 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1543 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1544     ## Stay in the state
1545    
1546     if (@{$self->{char}}) {
1547     $self->{next_input_character} = shift @{$self->{char}};
1548     } else {
1549     $self->{set_next_input_character}->($self);
1550     }
1551    
1552     redo A;
1553     } elsif ($self->{next_input_character} == -1) {
1554 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1555 wakaba 1.1 $self->{state} = 'data';
1556     ## reconsume
1557    
1558     return ($self->{current_token}); # comment
1559    
1560     redo A;
1561     } else {
1562 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1563 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1564     $self->{state} = 'comment';
1565    
1566     if (@{$self->{char}}) {
1567     $self->{next_input_character} = shift @{$self->{char}};
1568     } else {
1569     $self->{set_next_input_character}->($self);
1570     }
1571    
1572     redo A;
1573     }
1574     } elsif ($self->{state} eq 'DOCTYPE') {
1575     if ($self->{next_input_character} == 0x0009 or # HT
1576     $self->{next_input_character} == 0x000A or # LF
1577     $self->{next_input_character} == 0x000B or # VT
1578     $self->{next_input_character} == 0x000C or # FF
1579     $self->{next_input_character} == 0x0020) { # SP
1580     $self->{state} = 'before DOCTYPE name';
1581    
1582     if (@{$self->{char}}) {
1583     $self->{next_input_character} = shift @{$self->{char}};
1584     } else {
1585     $self->{set_next_input_character}->($self);
1586     }
1587    
1588     redo A;
1589     } else {
1590 wakaba 1.3 $self->{parse_error}-> (type => 'no space before DOCTYPE name');
1591 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1592     ## reconsume
1593     redo A;
1594     }
1595     } elsif ($self->{state} eq 'before DOCTYPE name') {
1596     if ($self->{next_input_character} == 0x0009 or # HT
1597     $self->{next_input_character} == 0x000A or # LF
1598     $self->{next_input_character} == 0x000B or # VT
1599     $self->{next_input_character} == 0x000C or # FF
1600     $self->{next_input_character} == 0x0020) { # SP
1601     ## Stay in the state
1602    
1603     if (@{$self->{char}}) {
1604     $self->{next_input_character} = shift @{$self->{char}};
1605     } else {
1606     $self->{set_next_input_character}->($self);
1607     }
1608    
1609     redo A;
1610 wakaba 1.18 } elsif ($self->{next_input_character} == 0x003E) { # >
1611     $self->{parse_error}-> (type => 'no DOCTYPE name');
1612     $self->{state} = 'data';
1613    
1614     if (@{$self->{char}}) {
1615     $self->{next_input_character} = shift @{$self->{char}};
1616     } else {
1617     $self->{set_next_input_character}->($self);
1618     }
1619    
1620    
1621     return ({type => 'DOCTYPE'}); # incorrect
1622    
1623     redo A;
1624     } elsif ($self->{next_input_character} == -1) {
1625     $self->{parse_error}-> (type => 'no DOCTYPE name');
1626     $self->{state} = 'data';
1627     ## reconsume
1628    
1629     return ({type => 'DOCTYPE'}); # incorrect
1630    
1631     redo A;
1632     } else {
1633     $self->{current_token}
1634     = {type => 'DOCTYPE',
1635     name => chr ($self->{next_input_character}),
1636     correct => 1};
1637 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1638 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1639    
1640     if (@{$self->{char}}) {
1641     $self->{next_input_character} = shift @{$self->{char}};
1642     } else {
1643     $self->{set_next_input_character}->($self);
1644     }
1645    
1646     redo A;
1647 wakaba 1.18 }
1648     } elsif ($self->{state} eq 'DOCTYPE name') {
1649     ## ISSUE: Redundant "First," in the spec.
1650     if ($self->{next_input_character} == 0x0009 or # HT
1651     $self->{next_input_character} == 0x000A or # LF
1652     $self->{next_input_character} == 0x000B or # VT
1653     $self->{next_input_character} == 0x000C or # FF
1654     $self->{next_input_character} == 0x0020) { # SP
1655     $self->{state} = 'after DOCTYPE name';
1656    
1657     if (@{$self->{char}}) {
1658     $self->{next_input_character} = shift @{$self->{char}};
1659     } else {
1660     $self->{set_next_input_character}->($self);
1661     }
1662    
1663     redo A;
1664 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
1665     $self->{state} = 'data';
1666    
1667     if (@{$self->{char}}) {
1668     $self->{next_input_character} = shift @{$self->{char}};
1669     } else {
1670     $self->{set_next_input_character}->($self);
1671     }
1672    
1673    
1674 wakaba 1.18 return ($self->{current_token}); # DOCTYPE
1675 wakaba 1.1
1676     redo A;
1677 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1678     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1679 wakaba 1.1 $self->{state} = 'data';
1680     ## reconsume
1681    
1682 wakaba 1.18 delete $self->{current_token}->{correct};
1683     return ($self->{current_token}); # DOCTYPE
1684 wakaba 1.1
1685     redo A;
1686     } else {
1687 wakaba 1.18 $self->{current_token}->{name}
1688     .= chr ($self->{next_input_character}); # DOCTYPE
1689     ## Stay in the state
1690 wakaba 1.1
1691     if (@{$self->{char}}) {
1692     $self->{next_input_character} = shift @{$self->{char}};
1693     } else {
1694     $self->{set_next_input_character}->($self);
1695     }
1696    
1697     redo A;
1698     }
1699 wakaba 1.18 } elsif ($self->{state} eq 'after DOCTYPE name') {
1700 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1701     $self->{next_input_character} == 0x000A or # LF
1702     $self->{next_input_character} == 0x000B or # VT
1703     $self->{next_input_character} == 0x000C or # FF
1704     $self->{next_input_character} == 0x0020) { # SP
1705 wakaba 1.18 ## Stay in the state
1706 wakaba 1.1
1707     if (@{$self->{char}}) {
1708     $self->{next_input_character} = shift @{$self->{char}};
1709     } else {
1710     $self->{set_next_input_character}->($self);
1711     }
1712    
1713     redo A;
1714     } elsif ($self->{next_input_character} == 0x003E) { # >
1715     $self->{state} = 'data';
1716    
1717     if (@{$self->{char}}) {
1718     $self->{next_input_character} = shift @{$self->{char}};
1719     } else {
1720     $self->{set_next_input_character}->($self);
1721     }
1722    
1723    
1724     return ($self->{current_token}); # DOCTYPE
1725    
1726     redo A;
1727 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1728     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1729     $self->{state} = 'data';
1730     ## reconsume
1731    
1732     delete $self->{current_token}->{correct};
1733     return ($self->{current_token}); # DOCTYPE
1734    
1735     redo A;
1736     } elsif ($self->{next_input_character} == 0x0050 or # P
1737     $self->{next_input_character} == 0x0070) { # p
1738    
1739     if (@{$self->{char}}) {
1740     $self->{next_input_character} = shift @{$self->{char}};
1741     } else {
1742     $self->{set_next_input_character}->($self);
1743     }
1744    
1745     if ($self->{next_input_character} == 0x0055 or # U
1746     $self->{next_input_character} == 0x0075) { # u
1747    
1748     if (@{$self->{char}}) {
1749     $self->{next_input_character} = shift @{$self->{char}};
1750     } else {
1751     $self->{set_next_input_character}->($self);
1752     }
1753    
1754     if ($self->{next_input_character} == 0x0042 or # B
1755     $self->{next_input_character} == 0x0062) { # b
1756    
1757     if (@{$self->{char}}) {
1758     $self->{next_input_character} = shift @{$self->{char}};
1759     } else {
1760     $self->{set_next_input_character}->($self);
1761     }
1762    
1763     if ($self->{next_input_character} == 0x004C or # L
1764     $self->{next_input_character} == 0x006C) { # l
1765    
1766     if (@{$self->{char}}) {
1767     $self->{next_input_character} = shift @{$self->{char}};
1768     } else {
1769     $self->{set_next_input_character}->($self);
1770     }
1771    
1772     if ($self->{next_input_character} == 0x0049 or # I
1773     $self->{next_input_character} == 0x0069) { # i
1774    
1775     if (@{$self->{char}}) {
1776     $self->{next_input_character} = shift @{$self->{char}};
1777     } else {
1778     $self->{set_next_input_character}->($self);
1779     }
1780    
1781     if ($self->{next_input_character} == 0x0043 or # C
1782     $self->{next_input_character} == 0x0063) { # c
1783     $self->{state} = 'before DOCTYPE public identifier';
1784    
1785     if (@{$self->{char}}) {
1786     $self->{next_input_character} = shift @{$self->{char}};
1787     } else {
1788     $self->{set_next_input_character}->($self);
1789     }
1790    
1791     redo A;
1792     }
1793     }
1794     }
1795     }
1796     }
1797    
1798     #
1799     } elsif ($self->{next_input_character} == 0x0053 or # S
1800     $self->{next_input_character} == 0x0073) { # s
1801    
1802     if (@{$self->{char}}) {
1803     $self->{next_input_character} = shift @{$self->{char}};
1804     } else {
1805     $self->{set_next_input_character}->($self);
1806     }
1807    
1808     if ($self->{next_input_character} == 0x0059 or # Y
1809     $self->{next_input_character} == 0x0079) { # y
1810    
1811     if (@{$self->{char}}) {
1812     $self->{next_input_character} = shift @{$self->{char}};
1813     } else {
1814     $self->{set_next_input_character}->($self);
1815     }
1816    
1817     if ($self->{next_input_character} == 0x0053 or # S
1818     $self->{next_input_character} == 0x0073) { # s
1819    
1820     if (@{$self->{char}}) {
1821     $self->{next_input_character} = shift @{$self->{char}};
1822     } else {
1823     $self->{set_next_input_character}->($self);
1824     }
1825    
1826     if ($self->{next_input_character} == 0x0054 or # T
1827     $self->{next_input_character} == 0x0074) { # t
1828    
1829     if (@{$self->{char}}) {
1830     $self->{next_input_character} = shift @{$self->{char}};
1831     } else {
1832     $self->{set_next_input_character}->($self);
1833     }
1834    
1835     if ($self->{next_input_character} == 0x0045 or # E
1836     $self->{next_input_character} == 0x0065) { # e
1837    
1838     if (@{$self->{char}}) {
1839     $self->{next_input_character} = shift @{$self->{char}};
1840     } else {
1841     $self->{set_next_input_character}->($self);
1842     }
1843    
1844     if ($self->{next_input_character} == 0x004D or # M
1845     $self->{next_input_character} == 0x006D) { # m
1846     $self->{state} = 'before DOCTYPE system identifier';
1847    
1848     if (@{$self->{char}}) {
1849     $self->{next_input_character} = shift @{$self->{char}};
1850     } else {
1851     $self->{set_next_input_character}->($self);
1852     }
1853    
1854     redo A;
1855     }
1856     }
1857     }
1858     }
1859     }
1860    
1861     #
1862     } else {
1863    
1864     if (@{$self->{char}}) {
1865     $self->{next_input_character} = shift @{$self->{char}};
1866     } else {
1867     $self->{set_next_input_character}->($self);
1868     }
1869    
1870     #
1871     }
1872    
1873     $self->{parse_error}-> (type => 'string after DOCTYPE name');
1874     $self->{state} = 'bogus DOCTYPE';
1875     # next-input-character is already done
1876     redo A;
1877     } elsif ($self->{state} eq 'before DOCTYPE public identifier') {
1878     if ({
1879     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1880     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1881     }->{$self->{next_input_character}}) {
1882 wakaba 1.1 ## Stay in the state
1883    
1884     if (@{$self->{char}}) {
1885     $self->{next_input_character} = shift @{$self->{char}};
1886     } else {
1887     $self->{set_next_input_character}->($self);
1888     }
1889    
1890     redo A;
1891 wakaba 1.18 } elsif ($self->{next_input_character} eq 0x0022) { # "
1892     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1893     $self->{state} = 'DOCTYPE public identifier (double-quoted)';
1894    
1895     if (@{$self->{char}}) {
1896     $self->{next_input_character} = shift @{$self->{char}};
1897     } else {
1898     $self->{set_next_input_character}->($self);
1899     }
1900    
1901     redo A;
1902     } elsif ($self->{next_input_character} eq 0x0027) { # '
1903     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1904     $self->{state} = 'DOCTYPE public identifier (single-quoted)';
1905    
1906     if (@{$self->{char}}) {
1907     $self->{next_input_character} = shift @{$self->{char}};
1908     } else {
1909     $self->{set_next_input_character}->($self);
1910     }
1911    
1912     redo A;
1913     } elsif ($self->{next_input_character} eq 0x003E) { # >
1914     $self->{parse_error}-> (type => 'no PUBLIC literal');
1915    
1916     $self->{state} = 'data';
1917    
1918     if (@{$self->{char}}) {
1919     $self->{next_input_character} = shift @{$self->{char}};
1920     } else {
1921     $self->{set_next_input_character}->($self);
1922     }
1923    
1924    
1925     delete $self->{current_token}->{correct};
1926     return ($self->{current_token}); # DOCTYPE
1927    
1928     redo A;
1929 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
1930 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1931 wakaba 1.18
1932 wakaba 1.1 $self->{state} = 'data';
1933     ## reconsume
1934    
1935 wakaba 1.18 delete $self->{current_token}->{correct};
1936     return ($self->{current_token}); # DOCTYPE
1937 wakaba 1.1
1938     redo A;
1939     } else {
1940 wakaba 1.18 $self->{parse_error}-> (type => 'string after PUBLIC');
1941     $self->{state} = 'bogus DOCTYPE';
1942    
1943     if (@{$self->{char}}) {
1944     $self->{next_input_character} = shift @{$self->{char}};
1945     } else {
1946     $self->{set_next_input_character}->($self);
1947     }
1948    
1949     redo A;
1950     }
1951     } elsif ($self->{state} eq 'DOCTYPE public identifier (double-quoted)') {
1952     if ($self->{next_input_character} == 0x0022) { # "
1953     $self->{state} = 'after DOCTYPE public identifier';
1954    
1955     if (@{$self->{char}}) {
1956     $self->{next_input_character} = shift @{$self->{char}};
1957     } else {
1958     $self->{set_next_input_character}->($self);
1959     }
1960    
1961     redo A;
1962     } elsif ($self->{next_input_character} == -1) {
1963     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
1964    
1965     $self->{state} = 'data';
1966     ## reconsume
1967    
1968     delete $self->{current_token}->{correct};
1969     return ($self->{current_token}); # DOCTYPE
1970    
1971     redo A;
1972     } else {
1973     $self->{current_token}->{public_identifier} # DOCTYPE
1974     .= chr $self->{next_input_character};
1975     ## Stay in the state
1976    
1977     if (@{$self->{char}}) {
1978     $self->{next_input_character} = shift @{$self->{char}};
1979     } else {
1980     $self->{set_next_input_character}->($self);
1981     }
1982    
1983     redo A;
1984     }
1985     } elsif ($self->{state} eq 'DOCTYPE public identifier (single-quoted)') {
1986     if ($self->{next_input_character} == 0x0027) { # '
1987     $self->{state} = 'after DOCTYPE public identifier';
1988    
1989     if (@{$self->{char}}) {
1990     $self->{next_input_character} = shift @{$self->{char}};
1991     } else {
1992     $self->{set_next_input_character}->($self);
1993     }
1994    
1995     redo A;
1996     } elsif ($self->{next_input_character} == -1) {
1997     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
1998    
1999     $self->{state} = 'data';
2000     ## reconsume
2001    
2002     delete $self->{current_token}->{correct};
2003     return ($self->{current_token}); # DOCTYPE
2004    
2005     redo A;
2006     } else {
2007     $self->{current_token}->{public_identifier} # DOCTYPE
2008     .= chr $self->{next_input_character};
2009     ## Stay in the state
2010    
2011     if (@{$self->{char}}) {
2012     $self->{next_input_character} = shift @{$self->{char}};
2013     } else {
2014     $self->{set_next_input_character}->($self);
2015     }
2016    
2017     redo A;
2018     }
2019     } elsif ($self->{state} eq 'after DOCTYPE public identifier') {
2020     if ({
2021     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2022     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2023     }->{$self->{next_input_character}}) {
2024 wakaba 1.1 ## Stay in the state
2025    
2026     if (@{$self->{char}}) {
2027     $self->{next_input_character} = shift @{$self->{char}};
2028     } else {
2029     $self->{set_next_input_character}->($self);
2030     }
2031    
2032     redo A;
2033 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
2034     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2035     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
2036    
2037     if (@{$self->{char}}) {
2038     $self->{next_input_character} = shift @{$self->{char}};
2039     } else {
2040     $self->{set_next_input_character}->($self);
2041     }
2042    
2043     redo A;
2044     } elsif ($self->{next_input_character} == 0x0027) { # '
2045     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2046     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
2047    
2048     if (@{$self->{char}}) {
2049     $self->{next_input_character} = shift @{$self->{char}};
2050     } else {
2051     $self->{set_next_input_character}->($self);
2052     }
2053    
2054     redo A;
2055     } elsif ($self->{next_input_character} == 0x003E) { # >
2056     $self->{state} = 'data';
2057    
2058     if (@{$self->{char}}) {
2059     $self->{next_input_character} = shift @{$self->{char}};
2060     } else {
2061     $self->{set_next_input_character}->($self);
2062     }
2063    
2064    
2065     return ($self->{current_token}); # DOCTYPE
2066    
2067     redo A;
2068     } elsif ($self->{next_input_character} == -1) {
2069     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2070    
2071     $self->{state} = 'data';
2072 wakaba 1.26 ## reconsume
2073 wakaba 1.18
2074     delete $self->{current_token}->{correct};
2075     return ($self->{current_token}); # DOCTYPE
2076    
2077     redo A;
2078     } else {
2079     $self->{parse_error}-> (type => 'string after PUBLIC literal');
2080     $self->{state} = 'bogus DOCTYPE';
2081    
2082     if (@{$self->{char}}) {
2083     $self->{next_input_character} = shift @{$self->{char}};
2084     } else {
2085     $self->{set_next_input_character}->($self);
2086     }
2087    
2088     redo A;
2089 wakaba 1.1 }
2090 wakaba 1.18 } elsif ($self->{state} eq 'before DOCTYPE system identifier') {
2091     if ({
2092     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2093     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2094     }->{$self->{next_input_character}}) {
2095 wakaba 1.1 ## Stay in the state
2096    
2097     if (@{$self->{char}}) {
2098     $self->{next_input_character} = shift @{$self->{char}};
2099     } else {
2100     $self->{set_next_input_character}->($self);
2101     }
2102    
2103     redo A;
2104 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
2105     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2106     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
2107    
2108     if (@{$self->{char}}) {
2109     $self->{next_input_character} = shift @{$self->{char}};
2110     } else {
2111     $self->{set_next_input_character}->($self);
2112     }
2113    
2114     redo A;
2115     } elsif ($self->{next_input_character} == 0x0027) { # '
2116     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2117     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
2118    
2119     if (@{$self->{char}}) {
2120     $self->{next_input_character} = shift @{$self->{char}};
2121     } else {
2122     $self->{set_next_input_character}->($self);
2123     }
2124    
2125     redo A;
2126 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
2127 wakaba 1.18 $self->{parse_error}-> (type => 'no SYSTEM literal');
2128 wakaba 1.1 $self->{state} = 'data';
2129    
2130     if (@{$self->{char}}) {
2131     $self->{next_input_character} = shift @{$self->{char}};
2132     } else {
2133     $self->{set_next_input_character}->($self);
2134     }
2135    
2136    
2137 wakaba 1.18 delete $self->{current_token}->{correct};
2138 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2139    
2140     redo A;
2141     } elsif ($self->{next_input_character} == -1) {
2142 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2143 wakaba 1.18
2144     $self->{state} = 'data';
2145 wakaba 1.26 ## reconsume
2146 wakaba 1.18
2147     delete $self->{current_token}->{correct};
2148     return ($self->{current_token}); # DOCTYPE
2149    
2150     redo A;
2151     } else {
2152     $self->{parse_error}-> (type => 'string after PUBLIC literal');
2153     $self->{state} = 'bogus DOCTYPE';
2154    
2155     if (@{$self->{char}}) {
2156     $self->{next_input_character} = shift @{$self->{char}};
2157     } else {
2158     $self->{set_next_input_character}->($self);
2159     }
2160    
2161     redo A;
2162     }
2163     } elsif ($self->{state} eq 'DOCTYPE system identifier (double-quoted)') {
2164     if ($self->{next_input_character} == 0x0022) { # "
2165     $self->{state} = 'after DOCTYPE system identifier';
2166    
2167     if (@{$self->{char}}) {
2168     $self->{next_input_character} = shift @{$self->{char}};
2169     } else {
2170     $self->{set_next_input_character}->($self);
2171     }
2172    
2173     redo A;
2174     } elsif ($self->{next_input_character} == -1) {
2175     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2176    
2177 wakaba 1.1 $self->{state} = 'data';
2178     ## reconsume
2179    
2180 wakaba 1.18 delete $self->{current_token}->{correct};
2181 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2182    
2183     redo A;
2184     } else {
2185 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2186     .= chr $self->{next_input_character};
2187     ## Stay in the state
2188    
2189     if (@{$self->{char}}) {
2190     $self->{next_input_character} = shift @{$self->{char}};
2191     } else {
2192     $self->{set_next_input_character}->($self);
2193     }
2194    
2195     redo A;
2196     }
2197     } elsif ($self->{state} eq 'DOCTYPE system identifier (single-quoted)') {
2198     if ($self->{next_input_character} == 0x0027) { # '
2199     $self->{state} = 'after DOCTYPE system identifier';
2200    
2201     if (@{$self->{char}}) {
2202     $self->{next_input_character} = shift @{$self->{char}};
2203     } else {
2204     $self->{set_next_input_character}->($self);
2205     }
2206    
2207     redo A;
2208     } elsif ($self->{next_input_character} == -1) {
2209     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2210    
2211     $self->{state} = 'data';
2212     ## reconsume
2213    
2214     delete $self->{current_token}->{correct};
2215     return ($self->{current_token}); # DOCTYPE
2216    
2217     redo A;
2218     } else {
2219     $self->{current_token}->{system_identifier} # DOCTYPE
2220     .= chr $self->{next_input_character};
2221     ## Stay in the state
2222    
2223     if (@{$self->{char}}) {
2224     $self->{next_input_character} = shift @{$self->{char}};
2225     } else {
2226     $self->{set_next_input_character}->($self);
2227     }
2228    
2229     redo A;
2230     }
2231     } elsif ($self->{state} eq 'after DOCTYPE system identifier') {
2232     if ({
2233     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2234     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2235     }->{$self->{next_input_character}}) {
2236     ## Stay in the state
2237    
2238     if (@{$self->{char}}) {
2239     $self->{next_input_character} = shift @{$self->{char}};
2240     } else {
2241     $self->{set_next_input_character}->($self);
2242     }
2243    
2244     redo A;
2245     } elsif ($self->{next_input_character} == 0x003E) { # >
2246     $self->{state} = 'data';
2247    
2248     if (@{$self->{char}}) {
2249     $self->{next_input_character} = shift @{$self->{char}};
2250     } else {
2251     $self->{set_next_input_character}->($self);
2252     }
2253    
2254    
2255     return ($self->{current_token}); # DOCTYPE
2256    
2257     redo A;
2258     } elsif ($self->{next_input_character} == -1) {
2259     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2260    
2261     $self->{state} = 'data';
2262 wakaba 1.26 ## reconsume
2263 wakaba 1.18
2264     delete $self->{current_token}->{correct};
2265     return ($self->{current_token}); # DOCTYPE
2266    
2267     redo A;
2268     } else {
2269     $self->{parse_error}-> (type => 'string after SYSTEM literal');
2270 wakaba 1.1 $self->{state} = 'bogus DOCTYPE';
2271    
2272     if (@{$self->{char}}) {
2273     $self->{next_input_character} = shift @{$self->{char}};
2274     } else {
2275     $self->{set_next_input_character}->($self);
2276     }
2277    
2278     redo A;
2279     }
2280     } elsif ($self->{state} eq 'bogus DOCTYPE') {
2281     if ($self->{next_input_character} == 0x003E) { # >
2282     $self->{state} = 'data';
2283    
2284     if (@{$self->{char}}) {
2285     $self->{next_input_character} = shift @{$self->{char}};
2286     } else {
2287     $self->{set_next_input_character}->($self);
2288     }
2289    
2290    
2291 wakaba 1.18 delete $self->{current_token}->{correct};
2292 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2293    
2294     redo A;
2295     } elsif ($self->{next_input_character} == -1) {
2296 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2297 wakaba 1.1 $self->{state} = 'data';
2298     ## reconsume
2299    
2300 wakaba 1.18 delete $self->{current_token}->{correct};
2301 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2302    
2303     redo A;
2304     } else {
2305     ## Stay in the state
2306    
2307     if (@{$self->{char}}) {
2308     $self->{next_input_character} = shift @{$self->{char}};
2309     } else {
2310     $self->{set_next_input_character}->($self);
2311     }
2312    
2313     redo A;
2314     }
2315     } else {
2316     die "$0: $self->{state}: Unknown state";
2317     }
2318     } # A
2319    
2320     die "$0: _get_next_token: unexpected case";
2321     } # _get_next_token
2322    
2323 wakaba 1.26 sub _tokenize_attempt_to_consume_an_entity ($$) {
2324     my ($self, $in_attr) = @_;
2325 wakaba 1.20
2326     if ({
2327     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2328     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2329     }->{$self->{next_input_character}}) {
2330     ## Don't consume
2331     ## No error
2332     return undef;
2333     } elsif ($self->{next_input_character} == 0x0023) { # #
2334 wakaba 1.1
2335     if (@{$self->{char}}) {
2336     $self->{next_input_character} = shift @{$self->{char}};
2337     } else {
2338     $self->{set_next_input_character}->($self);
2339     }
2340    
2341     if ($self->{next_input_character} == 0x0078 or # x
2342     $self->{next_input_character} == 0x0058) { # X
2343 wakaba 1.26 my $code;
2344 wakaba 1.1 X: {
2345     my $x_char = $self->{next_input_character};
2346    
2347     if (@{$self->{char}}) {
2348     $self->{next_input_character} = shift @{$self->{char}};
2349     } else {
2350     $self->{set_next_input_character}->($self);
2351     }
2352    
2353     if (0x0030 <= $self->{next_input_character} and
2354     $self->{next_input_character} <= 0x0039) { # 0..9
2355 wakaba 1.26 $code ||= 0;
2356     $code *= 0x10;
2357     $code += $self->{next_input_character} - 0x0030;
2358 wakaba 1.1 redo X;
2359     } elsif (0x0061 <= $self->{next_input_character} and
2360     $self->{next_input_character} <= 0x0066) { # a..f
2361 wakaba 1.26 $code ||= 0;
2362     $code *= 0x10;
2363     $code += $self->{next_input_character} - 0x0060 + 9;
2364 wakaba 1.1 redo X;
2365     } elsif (0x0041 <= $self->{next_input_character} and
2366     $self->{next_input_character} <= 0x0046) { # A..F
2367 wakaba 1.26 $code ||= 0;
2368     $code *= 0x10;
2369     $code += $self->{next_input_character} - 0x0040 + 9;
2370 wakaba 1.1 redo X;
2371 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2372 wakaba 1.3 $self->{parse_error}-> (type => 'bare hcro');
2373 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
2374     unshift @{$self->{char}}, ($x_char);
2375     return undef;
2376     } elsif ($self->{next_input_character} == 0x003B) { # ;
2377    
2378     if (@{$self->{char}}) {
2379     $self->{next_input_character} = shift @{$self->{char}};
2380     } else {
2381     $self->{set_next_input_character}->($self);
2382     }
2383    
2384     } else {
2385 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2386 wakaba 1.1 }
2387    
2388 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2389     $self->{parse_error}-> (type => sprintf 'invalid character reference:U+%04X', $code);
2390     $code = 0xFFFD;
2391     } elsif ($code > 0x10FFFF) {
2392     $self->{parse_error}-> (type => sprintf 'invalid character reference:U-%08X', $code);
2393     $code = 0xFFFD;
2394     } elsif ($code == 0x000D) {
2395     $self->{parse_error}-> (type => 'CR character reference');
2396     $code = 0x000A;
2397     } elsif (0x80 <= $code and $code <= 0x9F) {
2398     $self->{parse_error}-> (type => sprintf 'c1 entity:U+%04X', $code);
2399     $code = $c1_entity_char->{$code};
2400 wakaba 1.1 }
2401    
2402 wakaba 1.26 return {type => 'character', data => chr $code};
2403 wakaba 1.1 } # X
2404     } elsif (0x0030 <= $self->{next_input_character} and
2405     $self->{next_input_character} <= 0x0039) { # 0..9
2406     my $code = $self->{next_input_character} - 0x0030;
2407    
2408     if (@{$self->{char}}) {
2409     $self->{next_input_character} = shift @{$self->{char}};
2410     } else {
2411     $self->{set_next_input_character}->($self);
2412     }
2413    
2414    
2415     while (0x0030 <= $self->{next_input_character} and
2416     $self->{next_input_character} <= 0x0039) { # 0..9
2417     $code *= 10;
2418     $code += $self->{next_input_character} - 0x0030;
2419    
2420    
2421     if (@{$self->{char}}) {
2422     $self->{next_input_character} = shift @{$self->{char}};
2423     } else {
2424     $self->{set_next_input_character}->($self);
2425     }
2426    
2427     }
2428    
2429     if ($self->{next_input_character} == 0x003B) { # ;
2430    
2431     if (@{$self->{char}}) {
2432     $self->{next_input_character} = shift @{$self->{char}};
2433     } else {
2434     $self->{set_next_input_character}->($self);
2435     }
2436    
2437     } else {
2438 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2439 wakaba 1.1 }
2440    
2441 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2442     $self->{parse_error}-> (type => sprintf 'invalid character reference:U+%04X', $code);
2443     $code = 0xFFFD;
2444     } elsif ($code > 0x10FFFF) {
2445     $self->{parse_error}-> (type => sprintf 'invalid character reference:U-%08X', $code);
2446     $code = 0xFFFD;
2447     } elsif ($code == 0x000D) {
2448     $self->{parse_error}-> (type => 'CR character reference');
2449     $code = 0x000A;
2450 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2451 wakaba 1.8 $self->{parse_error}-> (type => sprintf 'c1 entity:U+%04X', $code);
2452 wakaba 1.4 $code = $c1_entity_char->{$code};
2453 wakaba 1.1 }
2454    
2455     return {type => 'character', data => chr $code};
2456     } else {
2457 wakaba 1.3 $self->{parse_error}-> (type => 'bare nero');
2458 wakaba 1.1 unshift @{$self->{char}}, ($self->{next_input_character});
2459     $self->{next_input_character} = 0x0023; # #
2460     return undef;
2461     }
2462     } elsif ((0x0041 <= $self->{next_input_character} and
2463     $self->{next_input_character} <= 0x005A) or
2464     (0x0061 <= $self->{next_input_character} and
2465     $self->{next_input_character} <= 0x007A)) {
2466     my $entity_name = chr $self->{next_input_character};
2467    
2468     if (@{$self->{char}}) {
2469     $self->{next_input_character} = shift @{$self->{char}};
2470     } else {
2471     $self->{set_next_input_character}->($self);
2472     }
2473    
2474    
2475     my $value = $entity_name;
2476     my $match;
2477 wakaba 1.16 require Whatpm::_NamedEntityList;
2478     our $EntityChar;
2479 wakaba 1.1
2480     while (length $entity_name < 10 and
2481     ## NOTE: Some number greater than the maximum length of entity name
2482 wakaba 1.16 ((0x0041 <= $self->{next_input_character} and # a
2483     $self->{next_input_character} <= 0x005A) or # x
2484     (0x0061 <= $self->{next_input_character} and # a
2485     $self->{next_input_character} <= 0x007A) or # z
2486     (0x0030 <= $self->{next_input_character} and # 0
2487     $self->{next_input_character} <= 0x0039) or # 9
2488     $self->{next_input_character} == 0x003B)) { # ;
2489 wakaba 1.1 $entity_name .= chr $self->{next_input_character};
2490 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2491     if ($self->{next_input_character} == 0x003B) { # ;
2492 wakaba 1.26 $value = $EntityChar->{$entity_name};
2493 wakaba 1.16 $match = 1;
2494    
2495     if (@{$self->{char}}) {
2496     $self->{next_input_character} = shift @{$self->{char}};
2497     } else {
2498     $self->{set_next_input_character}->($self);
2499     }
2500    
2501     last;
2502 wakaba 1.26 } elsif (not $in_attr) {
2503     $value = $EntityChar->{$entity_name};
2504     $match = -1;
2505 wakaba 1.16 } else {
2506 wakaba 1.26 $value .= chr $self->{next_input_character};
2507 wakaba 1.16 }
2508 wakaba 1.1 } else {
2509     $value .= chr $self->{next_input_character};
2510     }
2511    
2512     if (@{$self->{char}}) {
2513     $self->{next_input_character} = shift @{$self->{char}};
2514     } else {
2515     $self->{set_next_input_character}->($self);
2516     }
2517    
2518     }
2519    
2520 wakaba 1.16 if ($match > 0) {
2521     return {type => 'character', data => $value};
2522     } elsif ($match < 0) {
2523     $self->{parse_error}-> (type => 'refc');
2524 wakaba 1.1 return {type => 'character', data => $value};
2525     } else {
2526 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2527 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
2528 wakaba 1.26 return {type => 'character', data => '&'.$value};
2529 wakaba 1.1 }
2530     } else {
2531     ## no characters are consumed
2532 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2533 wakaba 1.1 return undef;
2534     }
2535     } # _tokenize_attempt_to_consume_an_entity
2536    
2537     sub _initialize_tree_constructor ($) {
2538     my $self = shift;
2539     ## NOTE: $self->{document} MUST be specified before this method is called
2540     $self->{document}->strict_error_checking (0);
2541     ## TODO: Turn mutation events off # MUST
2542     ## TODO: Turn loose Document option (manakai extension) on
2543 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2544 wakaba 1.1 } # _initialize_tree_constructor
2545    
2546     sub _terminate_tree_constructor ($) {
2547     my $self = shift;
2548     $self->{document}->strict_error_checking (1);
2549     ## TODO: Turn mutation events on
2550     } # _terminate_tree_constructor
2551    
2552     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2553    
2554 wakaba 1.3 { # tree construction stage
2555     my $token;
2556    
2557 wakaba 1.1 sub _construct_tree ($) {
2558     my ($self) = @_;
2559    
2560     ## When an interactive UA render the $self->{document} available
2561     ## to the user, or when it begin accepting user input, are
2562     ## not defined.
2563    
2564     ## Append a character: collect it and all subsequent consecutive
2565     ## characters and insert one Text node whose data is concatenation
2566     ## of all those characters. # MUST
2567    
2568     $token = $self->_get_next_token;
2569    
2570 wakaba 1.3 $self->{insertion_mode} = 'before head';
2571     undef $self->{form_element};
2572     undef $self->{head_element};
2573     $self->{open_elements} = [];
2574     undef $self->{inner_html_node};
2575    
2576     $self->_tree_construction_initial; # MUST
2577     $self->_tree_construction_root_element;
2578     $self->_tree_construction_main;
2579     } # _construct_tree
2580    
2581     sub _tree_construction_initial ($) {
2582     my $self = shift;
2583 wakaba 1.18 INITIAL: {
2584     if ($token->{type} eq 'DOCTYPE') {
2585     ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2586     ## error, switch to a conformance checking mode for another
2587     ## language.
2588     my $doctype_name = $token->{name};
2589     $doctype_name = '' unless defined $doctype_name;
2590     $doctype_name =~ tr/a-z/A-Z/;
2591     if (not defined $token->{name} or # <!DOCTYPE>
2592     defined $token->{public_identifier} or
2593     defined $token->{system_identifier}) {
2594     $self->{parse_error}-> (type => 'not HTML5');
2595     } elsif ($doctype_name ne 'HTML') {
2596     ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2597     $self->{parse_error}-> (type => 'not HTML5');
2598     }
2599    
2600     my $doctype = $self->{document}->create_document_type_definition
2601     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2602     $doctype->public_id ($token->{public_identifier})
2603     if defined $token->{public_identifier};
2604     $doctype->system_id ($token->{system_identifier})
2605     if defined $token->{system_identifier};
2606     ## NOTE: Other DocumentType attributes are null or empty lists.
2607     ## ISSUE: internalSubset = null??
2608     $self->{document}->append_child ($doctype);
2609    
2610     if (not $token->{correct} or $doctype_name ne 'HTML') {
2611     $self->{document}->manakai_compat_mode ('quirks');
2612     } elsif (defined $token->{public_identifier}) {
2613     my $pubid = $token->{public_identifier};
2614     $pubid =~ tr/a-z/A-z/;
2615     if ({
2616     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2617     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2618     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2619     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2620     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2621     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2622     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2623     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2624     "-//IETF//DTD HTML 2.0//EN" => 1,
2625     "-//IETF//DTD HTML 2.1E//EN" => 1,
2626     "-//IETF//DTD HTML 3.0//EN" => 1,
2627     "-//IETF//DTD HTML 3.0//EN//" => 1,
2628     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2629     "-//IETF//DTD HTML 3.2//EN" => 1,
2630     "-//IETF//DTD HTML 3//EN" => 1,
2631     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2632     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2633     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2634     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2635     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2636     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2637     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2638     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2639     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2640     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2641     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2642     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2643     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2644     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2645     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2646     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2647     "-//IETF//DTD HTML STRICT//EN" => 1,
2648     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2649     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2650     "-//IETF//DTD HTML//EN" => 1,
2651     "-//IETF//DTD HTML//EN//2.0" => 1,
2652     "-//IETF//DTD HTML//EN//3.0" => 1,
2653     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2654     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2655     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2656     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2657     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2658     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2659     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2660     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2661     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2662     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2663     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2664     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2665     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2666     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2667     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2668     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2669     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2670     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2671     "-//W3C//DTD HTML 3.2//EN" => 1,
2672     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2673     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2674     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2675     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2676     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2677     "-//W3C//DTD W3 HTML//EN" => 1,
2678     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2679     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2680     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2681     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2682     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2683     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2684     "HTML" => 1,
2685     }->{$pubid}) {
2686     $self->{document}->manakai_compat_mode ('quirks');
2687     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2688     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2689     if (defined $token->{system_identifier}) {
2690     $self->{document}->manakai_compat_mode ('quirks');
2691     } else {
2692     $self->{document}->manakai_compat_mode ('limited quirks');
2693 wakaba 1.3 }
2694 wakaba 1.18 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 Frameset//EN" or
2695     $pubid eq "-//W3C//DTD XHTML 1.0 Transitional//EN") {
2696     $self->{document}->manakai_compat_mode ('limited quirks');
2697     }
2698     }
2699     if (defined $token->{system_identifier}) {
2700     my $sysid = $token->{system_identifier};
2701     $sysid =~ tr/A-Z/a-z/;
2702     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2703     $self->{document}->manakai_compat_mode ('quirks');
2704     }
2705     }
2706    
2707     ## Go to the root element phase.
2708     $token = $self->_get_next_token;
2709     return;
2710     } elsif ({
2711     'start tag' => 1,
2712     'end tag' => 1,
2713     'end-of-file' => 1,
2714     }->{$token->{type}}) {
2715     $self->{parse_error}-> (type => 'no DOCTYPE');
2716     $self->{document}->manakai_compat_mode ('quirks');
2717     ## Go to the root element phase
2718     ## reprocess
2719     return;
2720     } elsif ($token->{type} eq 'character') {
2721     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2722     ## Ignore the token
2723 wakaba 1.26
2724 wakaba 1.18 unless (length $token->{data}) {
2725     ## Stay in the phase
2726     $token = $self->_get_next_token;
2727     redo INITIAL;
2728 wakaba 1.3 }
2729     }
2730 wakaba 1.18
2731     $self->{parse_error}-> (type => 'no DOCTYPE');
2732     $self->{document}->manakai_compat_mode ('quirks');
2733     ## Go to the root element phase
2734     ## reprocess
2735     return;
2736     } elsif ($token->{type} eq 'comment') {
2737     my $comment = $self->{document}->create_comment ($token->{data});
2738     $self->{document}->append_child ($comment);
2739    
2740     ## Stay in the phase.
2741     $token = $self->_get_next_token;
2742     redo INITIAL;
2743     } else {
2744     die "$0: $token->{type}: Unknown token";
2745     }
2746     } # INITIAL
2747 wakaba 1.3 } # _tree_construction_initial
2748    
2749     sub _tree_construction_root_element ($) {
2750     my $self = shift;
2751    
2752     B: {
2753     if ($token->{type} eq 'DOCTYPE') {
2754     $self->{parse_error}-> (type => 'in html:#DOCTYPE');
2755     ## Ignore the token
2756     ## Stay in the phase
2757     $token = $self->_get_next_token;
2758     redo B;
2759     } elsif ($token->{type} eq 'comment') {
2760     my $comment = $self->{document}->create_comment ($token->{data});
2761     $self->{document}->append_child ($comment);
2762     ## Stay in the phase
2763     $token = $self->_get_next_token;
2764     redo B;
2765     } elsif ($token->{type} eq 'character') {
2766 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2767     ## Ignore the token.
2768    
2769 wakaba 1.3 unless (length $token->{data}) {
2770     ## Stay in the phase
2771     $token = $self->_get_next_token;
2772     redo B;
2773     }
2774     }
2775     #
2776     } elsif ({
2777     'start tag' => 1,
2778     'end tag' => 1,
2779     'end-of-file' => 1,
2780     }->{$token->{type}}) {
2781     ## ISSUE: There is an issue in the spec
2782     #
2783     } else {
2784     die "$0: $token->{type}: Unknown token";
2785     }
2786     my $root_element;
2787     $root_element = $self->{document}->create_element_ns
2788     (q<http://www.w3.org/1999/xhtml>, [undef, 'html']);
2789    
2790     $self->{document}->append_child ($root_element);
2791     push @{$self->{open_elements}}, [$root_element, 'html'];
2792     #$phase = 'main';
2793     ## reprocess
2794     #redo B;
2795     return;
2796     } # B
2797     } # _tree_construction_root_element
2798    
2799     sub _reset_insertion_mode ($) {
2800     my $self = shift;
2801    
2802     ## Step 1
2803     my $last;
2804    
2805     ## Step 2
2806     my $i = -1;
2807     my $node = $self->{open_elements}->[$i];
2808    
2809     ## Step 3
2810     S3: {
2811     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
2812     if (defined $self->{inner_html_node}) {
2813     if ($self->{inner_html_node}->[1] eq 'td' or
2814     $self->{inner_html_node}->[1] eq 'th') {
2815     #
2816     } else {
2817     $node = $self->{inner_html_node};
2818     }
2819     }
2820    
2821     ## Step 4..13
2822     my $new_mode = {
2823     select => 'in select',
2824     td => 'in cell',
2825     th => 'in cell',
2826     tr => 'in row',
2827     tbody => 'in table body',
2828     thead => 'in table head',
2829     tfoot => 'in table foot',
2830     caption => 'in caption',
2831     colgroup => 'in column group',
2832     table => 'in table',
2833     head => 'in body', # not in head!
2834     body => 'in body',
2835     frameset => 'in frameset',
2836     }->{$node->[1]};
2837     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2838    
2839     ## Step 14
2840     if ($node->[1] eq 'html') {
2841     unless (defined $self->{head_element}) {
2842     $self->{insertion_mode} = 'before head';
2843     } else {
2844     $self->{insertion_mode} = 'after head';
2845     }
2846     return;
2847     }
2848    
2849     ## Step 15
2850     $self->{insertion_mode} = 'in body' and return if $last;
2851    
2852     ## Step 16
2853     $i--;
2854     $node = $self->{open_elements}->[$i];
2855    
2856     ## Step 17
2857     redo S3;
2858     } # S3
2859     } # _reset_insertion_mode
2860    
2861     sub _tree_construction_main ($) {
2862     my $self = shift;
2863    
2864     my $phase = 'main';
2865 wakaba 1.1
2866     my $active_formatting_elements = [];
2867    
2868     my $reconstruct_active_formatting_elements = sub { # MUST
2869     my $insert = shift;
2870    
2871     ## Step 1
2872     return unless @$active_formatting_elements;
2873    
2874     ## Step 3
2875     my $i = -1;
2876     my $entry = $active_formatting_elements->[$i];
2877    
2878     ## Step 2
2879     return if $entry->[0] eq '#marker';
2880 wakaba 1.3 for (@{$self->{open_elements}}) {
2881 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2882     return;
2883     }
2884     }
2885    
2886     S4: {
2887     ## Step 4
2888     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2889    
2890     ## Step 5
2891     $i--;
2892     $entry = $active_formatting_elements->[$i];
2893    
2894     ## Step 6
2895     if ($entry->[0] eq '#marker') {
2896     #
2897     } else {
2898     my $in_open_elements;
2899 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
2900 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2901     $in_open_elements = 1;
2902     last OE;
2903     }
2904     }
2905     if ($in_open_elements) {
2906     #
2907     } else {
2908     redo S4;
2909     }
2910     }
2911    
2912     ## Step 7
2913     $i++;
2914     $entry = $active_formatting_elements->[$i];
2915     } # S4
2916    
2917     S7: {
2918     ## Step 8
2919     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2920    
2921     ## Step 9
2922     $insert->($clone->[0]);
2923 wakaba 1.3 push @{$self->{open_elements}}, $clone;
2924 wakaba 1.1
2925     ## Step 10
2926 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2927 wakaba 1.1
2928     ## Step 11
2929     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2930     ## Step 7'
2931     $i++;
2932     $entry = $active_formatting_elements->[$i];
2933    
2934     redo S7;
2935     }
2936     } # S7
2937     }; # $reconstruct_active_formatting_elements
2938    
2939     my $clear_up_to_marker = sub {
2940     for (reverse 0..$#$active_formatting_elements) {
2941     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2942     splice @$active_formatting_elements, $_;
2943     return;
2944     }
2945     }
2946     }; # $clear_up_to_marker
2947    
2948 wakaba 1.25 my $parse_rcdata = sub ($$) {
2949     my ($content_model_flag, $insert) = @_;
2950    
2951     ## Step 1
2952     my $start_tag_name = $token->{tag_name};
2953     my $el;
2954    
2955     $el = $self->{document}->create_element_ns
2956     (q<http://www.w3.org/1999/xhtml>, [undef, $start_tag_name]);
2957 wakaba 1.1
2958 wakaba 1.6 for my $attr_name (keys %{ $token->{attributes}}) {
2959 wakaba 1.25 $el->set_attribute_ns (undef, [undef, $attr_name],
2960 wakaba 1.6 $token->{attributes} ->{$attr_name}->{value});
2961     }
2962    
2963 wakaba 1.25
2964     ## Step 2
2965     $insert->($el); # /context node/->append_child ($el)
2966    
2967     ## Step 3
2968     $self->{content_model_flag} = $content_model_flag; # CDATA or RCDATA
2969 wakaba 1.13 delete $self->{escape}; # MUST
2970 wakaba 1.25
2971     ## Step 4
2972 wakaba 1.1 my $text = '';
2973     $token = $self->_get_next_token;
2974 wakaba 1.25 while ($token->{type} eq 'character') { # or until stop tokenizing
2975 wakaba 1.1 $text .= $token->{data};
2976     $token = $self->_get_next_token;
2977 wakaba 1.25 }
2978    
2979     ## Step 5
2980 wakaba 1.1 if (length $text) {
2981 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
2982     $el->append_child ($text);
2983 wakaba 1.1 }
2984 wakaba 1.25
2985     ## Step 6
2986 wakaba 1.1 $self->{content_model_flag} = 'PCDATA';
2987 wakaba 1.25
2988     ## Step 7
2989     if ($token->{type} eq 'end tag' and $token->{tag_name} eq $start_tag_name) {
2990 wakaba 1.1 ## Ignore the token
2991     } else {
2992 wakaba 1.25 $self->{parse_error}-> (type => 'in '.$content_model_flag.':#'.$token->{type});
2993 wakaba 1.1 }
2994     $token = $self->_get_next_token;
2995 wakaba 1.25 }; # $parse_rcdata
2996 wakaba 1.1
2997 wakaba 1.25 my $script_start_tag = sub ($) {
2998     my $insert = $_[0];
2999 wakaba 1.1 my $script_el;
3000    
3001     $script_el = $self->{document}->create_element_ns
3002     (q<http://www.w3.org/1999/xhtml>, [undef, 'script']);
3003    
3004     for my $attr_name (keys %{ $token->{attributes}}) {
3005     $script_el->set_attribute_ns (undef, [undef, $attr_name],
3006     $token->{attributes} ->{$attr_name}->{value});
3007     }
3008    
3009     ## TODO: mark as "parser-inserted"
3010    
3011     $self->{content_model_flag} = 'CDATA';
3012 wakaba 1.13 delete $self->{escape}; # MUST
3013 wakaba 1.1
3014     my $text = '';
3015     $token = $self->_get_next_token;
3016     while ($token->{type} eq 'character') {
3017     $text .= $token->{data};
3018     $token = $self->_get_next_token;
3019     } # stop if non-character token or tokenizer stops tokenising
3020     if (length $text) {
3021     $script_el->manakai_append_text ($text);
3022     }
3023    
3024     $self->{content_model_flag} = 'PCDATA';
3025    
3026     if ($token->{type} eq 'end tag' and
3027     $token->{tag_name} eq 'script') {
3028     ## Ignore the token
3029     } else {
3030 wakaba 1.3 $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
3031 wakaba 1.1 ## ISSUE: And ignore?
3032     ## TODO: mark as "already executed"
3033     }
3034    
3035 wakaba 1.3 if (defined $self->{inner_html_node}) {
3036     ## TODO: mark as "already executed"
3037     } else {
3038 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3039     ## TODO: insertion point = just before the next input character
3040 wakaba 1.25
3041     $insert->($script_el);
3042 wakaba 1.1
3043     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3044    
3045     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3046     }
3047    
3048     $token = $self->_get_next_token;
3049     }; # $script_start_tag
3050    
3051     my $formatting_end_tag = sub {
3052     my $tag_name = shift;
3053    
3054     FET: {
3055     ## Step 1
3056     my $formatting_element;
3057     my $formatting_element_i_in_active;
3058     AFE: for (reverse 0..$#$active_formatting_elements) {
3059     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
3060     $formatting_element = $active_formatting_elements->[$_];
3061     $formatting_element_i_in_active = $_;
3062     last AFE;
3063     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
3064     last AFE;
3065     }
3066     } # AFE
3067     unless (defined $formatting_element) {
3068 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$tag_name);
3069 wakaba 1.1 ## Ignore the token
3070     $token = $self->_get_next_token;
3071     return;
3072     }
3073     ## has an element in scope
3074     my $in_scope = 1;
3075     my $formatting_element_i_in_open;
3076 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3077     my $node = $self->{open_elements}->[$_];
3078 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3079     if ($in_scope) {
3080     $formatting_element_i_in_open = $_;
3081     last INSCOPE;
3082     } else { # in open elements but not in scope
3083 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3084 wakaba 1.1 ## Ignore the token
3085     $token = $self->_get_next_token;
3086     return;
3087     }
3088     } elsif ({
3089     table => 1, caption => 1, td => 1, th => 1,
3090     button => 1, marquee => 1, object => 1, html => 1,
3091     }->{$node->[1]}) {
3092     $in_scope = 0;
3093     }
3094     } # INSCOPE
3095     unless (defined $formatting_element_i_in_open) {
3096 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3097 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3098     $token = $self->_get_next_token; ## TODO: ok?
3099     return;
3100     }
3101 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3102 wakaba 1.4 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3103 wakaba 1.1 }
3104    
3105     ## Step 2
3106     my $furthest_block;
3107     my $furthest_block_i_in_open;
3108 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3109     my $node = $self->{open_elements}->[$_];
3110 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
3111     #not $phrasing_category->{$node->[1]} and
3112     ($special_category->{$node->[1]} or
3113     $scoping_category->{$node->[1]})) {
3114     $furthest_block = $node;
3115     $furthest_block_i_in_open = $_;
3116     } elsif ($node->[0] eq $formatting_element->[0]) {
3117     last OE;
3118     }
3119     } # OE
3120    
3121     ## Step 3
3122     unless (defined $furthest_block) { # MUST
3123 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3124 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3125     $token = $self->_get_next_token;
3126     return;
3127     }
3128    
3129     ## Step 4
3130 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3131 wakaba 1.1
3132     ## Step 5
3133     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3134     if (defined $furthest_block_parent) {
3135     $furthest_block_parent->remove_child ($furthest_block->[0]);
3136     }
3137    
3138     ## Step 6
3139     my $bookmark_prev_el
3140     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3141     ->[0];
3142    
3143     ## Step 7
3144     my $node = $furthest_block;
3145     my $node_i_in_open = $furthest_block_i_in_open;
3146     my $last_node = $furthest_block;
3147     S7: {
3148     ## Step 1
3149     $node_i_in_open--;
3150 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3151 wakaba 1.1
3152     ## Step 2
3153     my $node_i_in_active;
3154     S7S2: {
3155     for (reverse 0..$#$active_formatting_elements) {
3156     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3157     $node_i_in_active = $_;
3158     last S7S2;
3159     }
3160     }
3161 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3162 wakaba 1.1 redo S7;
3163     } # S7S2
3164    
3165     ## Step 3
3166     last S7 if $node->[0] eq $formatting_element->[0];
3167    
3168     ## Step 4
3169     if ($last_node->[0] eq $furthest_block->[0]) {
3170     $bookmark_prev_el = $node->[0];
3171     }
3172    
3173     ## Step 5
3174     if ($node->[0]->has_child_nodes ()) {
3175     my $clone = [$node->[0]->clone_node (0), $node->[1]];
3176     $active_formatting_elements->[$node_i_in_active] = $clone;
3177 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3178 wakaba 1.1 $node = $clone;
3179     }
3180    
3181     ## Step 6
3182     $node->[0]->append_child ($last_node->[0]);
3183    
3184     ## Step 7
3185     $last_node = $node;
3186    
3187     ## Step 8
3188     redo S7;
3189     } # S7
3190    
3191     ## Step 8
3192     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3193    
3194     ## Step 9
3195     my $clone = [$formatting_element->[0]->clone_node (0),
3196     $formatting_element->[1]];
3197    
3198     ## Step 10
3199     my @cn = @{$furthest_block->[0]->child_nodes};
3200     $clone->[0]->append_child ($_) for @cn;
3201    
3202     ## Step 11
3203     $furthest_block->[0]->append_child ($clone->[0]);
3204    
3205     ## Step 12
3206     my $i;
3207     AFE: for (reverse 0..$#$active_formatting_elements) {
3208     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3209     splice @$active_formatting_elements, $_, 1;
3210     $i-- and last AFE if defined $i;
3211     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3212     $i = $_;
3213     }
3214     } # AFE
3215     splice @$active_formatting_elements, $i + 1, 0, $clone;
3216    
3217     ## Step 13
3218     undef $i;
3219 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3220     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3221     splice @{$self->{open_elements}}, $_, 1;
3222 wakaba 1.1 $i-- and last OE if defined $i;
3223 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3224 wakaba 1.1 $i = $_;
3225     }
3226     } # OE
3227 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3228 wakaba 1.1
3229     ## Step 14
3230     redo FET;
3231     } # FET
3232     }; # $formatting_end_tag
3233    
3234     my $insert_to_current = sub {
3235 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3236 wakaba 1.1 }; # $insert_to_current
3237    
3238     my $insert_to_foster = sub {
3239     my $child = shift;
3240     if ({
3241     table => 1, tbody => 1, tfoot => 1,
3242     thead => 1, tr => 1,
3243 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3244 wakaba 1.1 # MUST
3245     my $foster_parent_element;
3246     my $next_sibling;
3247 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3248     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3249     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3250 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3251     $foster_parent_element = $parent;
3252 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3253 wakaba 1.1 } else {
3254     $foster_parent_element
3255 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3256 wakaba 1.1 }
3257     last OE;
3258     }
3259     } # OE
3260 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3261 wakaba 1.1 unless defined $foster_parent_element;
3262     $foster_parent_element->insert_before
3263     ($child, $next_sibling);
3264     } else {
3265 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
3266 wakaba 1.1 }
3267     }; # $insert_to_foster
3268    
3269     my $in_body = sub {
3270     my $insert = shift;
3271     if ($token->{type} eq 'start tag') {
3272     if ($token->{tag_name} eq 'script') {
3273 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
3274     $script_start_tag->($insert);
3275 wakaba 1.1 return;
3276     } elsif ($token->{tag_name} eq 'style') {
3277 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
3278     $parse_rcdata->('CDATA', $insert);
3279 wakaba 1.1 return;
3280     } elsif ({
3281     base => 1, link => 1, meta => 1,
3282     }->{$token->{tag_name}}) {
3283 wakaba 1.25 ## NOTE: This is an "as if in head" code clone, only "-t" differs
3284 wakaba 1.1
3285 wakaba 1.25 {
3286     my $el;
3287    
3288 wakaba 1.1 $el = $self->{document}->create_element_ns
3289     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3290    
3291 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
3292 wakaba 1.1 $el->set_attribute_ns (undef, [undef, $attr_name],
3293 wakaba 1.25 $token->{attributes} ->{$attr_name}->{value});
3294 wakaba 1.1 }
3295    
3296 wakaba 1.25 $insert->($el);
3297     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3298     }
3299    
3300     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3301 wakaba 1.1 $token = $self->_get_next_token;
3302 wakaba 1.26 ## TODO: Extracting |charset| from |meta|.
3303 wakaba 1.1 return;
3304     } elsif ($token->{tag_name} eq 'title') {
3305 wakaba 1.3 $self->{parse_error}-> (type => 'in body:title');
3306 wakaba 1.25 ## NOTE: This is an "as if in head" code clone
3307     $parse_rcdata->('RCDATA', $insert);
3308 wakaba 1.1 return;
3309     } elsif ($token->{tag_name} eq 'body') {
3310 wakaba 1.3 $self->{parse_error}-> (type => 'in body:body');
3311 wakaba 1.1
3312 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
3313     $self->{open_elements}->[1]->[1] ne 'body') {
3314 wakaba 1.1 ## Ignore the token
3315     } else {
3316 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
3317 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
3318     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
3319     $body_el->set_attribute_ns
3320     (undef, [undef, $attr_name],
3321     $token->{attributes}->{$attr_name}->{value});
3322     }
3323     }
3324     }
3325     $token = $self->_get_next_token;
3326     return;
3327     } elsif ({
3328     address => 1, blockquote => 1, center => 1, dir => 1,
3329     div => 1, dl => 1, fieldset => 1, listing => 1,
3330     menu => 1, ol => 1, p => 1, ul => 1,
3331     pre => 1,
3332     }->{$token->{tag_name}}) {
3333     ## has a p element in scope
3334 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3335 wakaba 1.1 if ($_->[1] eq 'p') {
3336     unshift @{$self->{token}}, $token;
3337     $token = {type => 'end tag', tag_name => 'p'};
3338     return;
3339     } elsif ({
3340     table => 1, caption => 1, td => 1, th => 1,
3341     button => 1, marquee => 1, object => 1, html => 1,
3342     }->{$_->[1]}) {
3343     last INSCOPE;
3344     }
3345     } # INSCOPE
3346    
3347    
3348     {
3349     my $el;
3350    
3351     $el = $self->{document}->create_element_ns
3352     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3353    
3354     for my $attr_name (keys %{ $token->{attributes}}) {
3355     $el->set_attribute_ns (undef, [undef, $attr_name],
3356     $token->{attributes} ->{$attr_name}->{value});
3357     }
3358    
3359     $insert->($el);
3360 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3361 wakaba 1.1 }
3362    
3363     if ($token->{tag_name} eq 'pre') {
3364     $token = $self->_get_next_token;
3365     if ($token->{type} eq 'character') {
3366     $token->{data} =~ s/^\x0A//;
3367     unless (length $token->{data}) {
3368     $token = $self->_get_next_token;
3369     }
3370     }
3371     } else {
3372     $token = $self->_get_next_token;
3373     }
3374     return;
3375     } elsif ($token->{tag_name} eq 'form') {
3376 wakaba 1.3 if (defined $self->{form_element}) {
3377     $self->{parse_error}-> (type => 'in form:form');
3378 wakaba 1.1 ## Ignore the token
3379 wakaba 1.7 $token = $self->_get_next_token;
3380     return;
3381 wakaba 1.1 } else {
3382     ## has a p element in scope
3383 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3384 wakaba 1.1 if ($_->[1] eq 'p') {
3385     unshift @{$self->{token}}, $token;
3386     $token = {type => 'end tag', tag_name => 'p'};
3387     return;
3388     } elsif ({
3389     table => 1, caption => 1, td => 1, th => 1,
3390     button => 1, marquee => 1, object => 1, html => 1,
3391     }->{$_->[1]}) {
3392     last INSCOPE;
3393     }
3394     } # INSCOPE
3395    
3396    
3397     {
3398     my $el;
3399    
3400     $el = $self->{document}->create_element_ns
3401     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3402    
3403     for my $attr_name (keys %{ $token->{attributes}}) {
3404     $el->set_attribute_ns (undef, [undef, $attr_name],
3405     $token->{attributes} ->{$attr_name}->{value});
3406     }
3407    
3408     $insert->($el);
3409 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3410 wakaba 1.1 }
3411    
3412 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
3413 wakaba 1.1 $token = $self->_get_next_token;
3414     return;
3415     }
3416     } elsif ($token->{tag_name} eq 'li') {
3417     ## has a p element in scope
3418 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3419 wakaba 1.1 if ($_->[1] eq 'p') {
3420     unshift @{$self->{token}}, $token;
3421     $token = {type => 'end tag', tag_name => 'p'};
3422     return;
3423     } elsif ({
3424     table => 1, caption => 1, td => 1, th => 1,
3425     button => 1, marquee => 1, object => 1, html => 1,
3426     }->{$_->[1]}) {
3427     last INSCOPE;
3428     }
3429     } # INSCOPE
3430    
3431     ## Step 1
3432     my $i = -1;
3433 wakaba 1.3 my $node = $self->{open_elements}->[$i];
3434 wakaba 1.1 LI: {
3435     ## Step 2
3436     if ($node->[1] eq 'li') {
3437 wakaba 1.8 if ($i != -1) {
3438     $self->{parse_error}-> (type => 'end tag missing:'.
3439     $self->{open_elements}->[-1]->[1]);
3440     ## TODO: test
3441     }
3442 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3443 wakaba 1.1 last LI;
3444     }
3445    
3446     ## Step 3
3447     if (not $formatting_category->{$node->[1]} and
3448     #not $phrasing_category->{$node->[1]} and
3449     ($special_category->{$node->[1]} or
3450     $scoping_category->{$node->[1]}) and
3451     $node->[1] ne 'address' and $node->[1] ne 'div') {
3452     last LI;
3453     }
3454    
3455     ## Step 4
3456     $i--;
3457 wakaba 1.3 $node = $self->{open_elements}->[$i];
3458 wakaba 1.1 redo LI;
3459     } # LI
3460    
3461    
3462     {
3463     my $el;
3464    
3465     $el = $self->{document}->create_element_ns
3466     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3467    
3468     for my $attr_name (keys %{ $token->{attributes}}) {
3469     $el->set_attribute_ns (undef, [undef, $attr_name],
3470     $token->{attributes} ->{$attr_name}->{value});
3471     }
3472    
3473     $insert->($el);
3474 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3475 wakaba 1.1 }
3476    
3477     $token = $self->_get_next_token;
3478     return;
3479     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
3480     ## has a p element in scope
3481 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3482 wakaba 1.1 if ($_->[1] eq 'p') {
3483     unshift @{$self->{token}}, $token;
3484     $token = {type => 'end tag', tag_name => 'p'};
3485     return;
3486     } elsif ({
3487     table => 1, caption => 1, td => 1, th => 1,
3488     button => 1, marquee => 1, object => 1, html => 1,
3489     }->{$_->[1]}) {
3490     last INSCOPE;
3491     }
3492     } # INSCOPE
3493    
3494     ## Step 1
3495     my $i = -1;
3496 wakaba 1.3 my $node = $self->{open_elements}->[$i];
3497 wakaba 1.1 LI: {
3498     ## Step 2
3499     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
3500 wakaba 1.8 if ($i != -1) {
3501     $self->{parse_error}-> (type => 'end tag missing:'.
3502     $self->{open_elements}->[-1]->[1]);
3503     ## TODO: test
3504     }
3505 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3506 wakaba 1.1 last LI;
3507     }
3508    
3509     ## Step 3
3510     if (not $formatting_category->{$node->[1]} and
3511     #not $phrasing_category->{$node->[1]} and
3512     ($special_category->{$node->[1]} or
3513     $scoping_category->{$node->[1]}) and
3514     $node->[1] ne 'address' and $node->[1] ne 'div') {
3515     last LI;
3516     }
3517    
3518     ## Step 4
3519     $i--;
3520 wakaba 1.3 $node = $self->{open_elements}->[$i];
3521 wakaba 1.1 redo LI;
3522     } # LI
3523    
3524    
3525     {
3526     my $el;
3527    
3528     $el = $self->{document}->create_element_ns
3529     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3530    
3531     for my $attr_name (keys %{ $token->{attributes}}) {
3532     $el->set_attribute_ns (undef, [undef, $attr_name],
3533     $token->{attributes} ->{$attr_name}->{value});
3534     }
3535    
3536     $insert->($el);
3537 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3538 wakaba 1.1 }
3539    
3540     $token = $self->_get_next_token;
3541     return;
3542     } elsif ($token->{tag_name} eq 'plaintext') {
3543     ## has a p element in scope
3544 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3545 wakaba 1.1 if ($_->[1] eq 'p') {
3546     unshift @{$self->{token}}, $token;
3547     $token = {type => 'end tag', tag_name => 'p'};
3548     return;
3549     } elsif ({
3550     table => 1, caption => 1, td => 1, th => 1,
3551     button => 1, marquee => 1, object => 1, html => 1,
3552     }->{$_->[1]}) {
3553     last INSCOPE;
3554     }
3555     } # INSCOPE
3556    
3557    
3558     {
3559     my $el;
3560    
3561     $el = $self->{document}->create_element_ns
3562     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3563    
3564     for my $attr_name (keys %{ $token->{attributes}}) {
3565     $el->set_attribute_ns (undef, [undef, $attr_name],
3566     $token->{attributes} ->{$attr_name}->{value});
3567     }
3568    
3569     $insert->($el);
3570 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3571 wakaba 1.1 }
3572    
3573    
3574     $self->{content_model_flag} = 'PLAINTEXT';
3575    
3576     $token = $self->_get_next_token;
3577     return;
3578     } elsif ({
3579     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3580     }->{$token->{tag_name}}) {
3581     ## has a p element in scope
3582 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3583     my $node = $self->{open_elements}->[$_];
3584 wakaba 1.1 if ($node->[1] eq 'p') {
3585     unshift @{$self->{token}}, $token;
3586     $token = {type => 'end tag', tag_name => 'p'};
3587     return;
3588     } elsif ({
3589     table => 1, caption => 1, td => 1, th => 1,
3590     button => 1, marquee => 1, object => 1, html => 1,
3591     }->{$node->[1]}) {
3592     last INSCOPE;
3593     }
3594     } # INSCOPE
3595    
3596 wakaba 1.23 ## NOTE: See <http://html5.org/tools/web-apps-tracker?from=925&to=926>
3597 wakaba 1.1 ## has an element in scope
3598 wakaba 1.23 #my $i;
3599     #INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3600     # my $node = $self->{open_elements}->[$_];
3601     # if ({
3602     # h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3603     # }->{$node->[1]}) {
3604     # $i = $_;
3605     # last INSCOPE;
3606     # } elsif ({
3607     # table => 1, caption => 1, td => 1, th => 1,
3608     # button => 1, marquee => 1, object => 1, html => 1,
3609     # }->{$node->[1]}) {
3610     # last INSCOPE;
3611     # }
3612     #} # INSCOPE
3613     #
3614     #if (defined $i) {
3615     # !!! parse-error (type => 'in hn:hn');
3616     # splice @{$self->{open_elements}}, $i;
3617     #}
3618 wakaba 1.1
3619    
3620     {
3621     my $el;
3622    
3623     $el = $self->{document}->create_element_ns
3624     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3625    
3626     for my $attr_name (keys %{ $token->{attributes}}) {
3627     $el->set_attribute_ns (undef, [undef, $attr_name],
3628     $token->{attributes} ->{$attr_name}->{value});
3629     }
3630    
3631     $insert->($el);
3632 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3633 wakaba 1.1 }
3634    
3635    
3636     $token = $self->_get_next_token;
3637     return;
3638     } elsif ($token->{tag_name} eq 'a') {
3639     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
3640     my $node = $active_formatting_elements->[$i];
3641     if ($node->[1] eq 'a') {
3642 wakaba 1.3 $self->{parse_error}-> (type => 'in a:a');
3643 wakaba 1.1
3644     unshift @{$self->{token}}, $token;
3645     $token = {type => 'end tag', tag_name => 'a'};
3646     $formatting_end_tag->($token->{tag_name});
3647    
3648     AFE2: for (reverse 0..$#$active_formatting_elements) {
3649     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3650     splice @$active_formatting_elements, $_, 1;
3651     last AFE2;
3652     }
3653     } # AFE2
3654 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3655     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
3656     splice @{$self->{open_elements}}, $_, 1;
3657 wakaba 1.1 last OE;
3658     }
3659     } # OE
3660     last AFE;
3661     } elsif ($node->[0] eq '#marker') {
3662     last AFE;
3663     }
3664     } # AFE
3665    
3666     $reconstruct_active_formatting_elements->($insert_to_current);
3667    
3668    
3669     {
3670     my $el;
3671    
3672     $el = $self->{document}->create_element_ns
3673     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3674    
3675     for my $attr_name (keys %{ $token->{attributes}}) {
3676     $el->set_attribute_ns (undef, [undef, $attr_name],
3677     $token->{attributes} ->{$attr_name}->{value});
3678     }
3679    
3680     $insert->($el);
3681 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3682 wakaba 1.1 }
3683    
3684 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
3685 wakaba 1.1
3686     $token = $self->_get_next_token;
3687     return;
3688     } elsif ({
3689     b => 1, big => 1, em => 1, font => 1, i => 1,
3690 wakaba 1.19 s => 1, small => 1, strile => 1,
3691 wakaba 1.1 strong => 1, tt => 1, u => 1,
3692     }->{$token->{tag_name}}) {
3693     $reconstruct_active_formatting_elements->($insert_to_current);
3694    
3695    
3696     {
3697     my $el;
3698    
3699     $el = $self->{document}->create_element_ns
3700     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3701    
3702     for my $attr_name (keys %{ $token->{attributes}}) {
3703     $el->set_attribute_ns (undef, [undef, $attr_name],
3704     $token->{attributes} ->{$attr_name}->{value});
3705     }
3706    
3707     $insert->($el);
3708 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3709 wakaba 1.1 }
3710    
3711 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
3712 wakaba 1.1
3713     $token = $self->_get_next_token;
3714     return;
3715 wakaba 1.19 } elsif ($token->{tag_name} eq 'nobr') {
3716     $reconstruct_active_formatting_elements->($insert_to_current);
3717    
3718     ## has a |nobr| element in scope
3719     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3720     my $node = $self->{open_elements}->[$_];
3721     if ($node->[1] eq 'nobr') {
3722     unshift @{$self->{token}}, $token;
3723     $token = {type => 'end tag', tag_name => 'nobr'};
3724     return;
3725     } elsif ({
3726     table => 1, caption => 1, td => 1, th => 1,
3727     button => 1, marquee => 1, object => 1, html => 1,
3728     }->{$node->[1]}) {
3729     last INSCOPE;
3730     }
3731     } # INSCOPE
3732    
3733    
3734     {
3735     my $el;
3736    
3737     $el = $self->{document}->create_element_ns
3738     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3739    
3740     for my $attr_name (keys %{ $token->{attributes}}) {
3741     $el->set_attribute_ns (undef, [undef, $attr_name],
3742     $token->{attributes} ->{$attr_name}->{value});
3743     }
3744    
3745     $insert->($el);
3746     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3747     }
3748    
3749     push @$active_formatting_elements, $self->{open_elements}->[-1];
3750    
3751     $token = $self->_get_next_token;
3752     return;
3753 wakaba 1.1 } elsif ($token->{tag_name} eq 'button') {
3754     ## has a button element in scope
3755 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3756     my $node = $self->{open_elements}->[$_];
3757 wakaba 1.1 if ($node->[1] eq 'button') {
3758 wakaba 1.3 $self->{parse_error}-> (type => 'in button:button');
3759 wakaba 1.1 unshift @{$self->{token}}, $token;
3760     $token = {type => 'end tag', tag_name => 'button'};
3761     return;
3762     } elsif ({
3763     table => 1, caption => 1, td => 1, th => 1,
3764     button => 1, marquee => 1, object => 1, html => 1,
3765     }->{$node->[1]}) {
3766     last INSCOPE;
3767     }
3768     } # INSCOPE
3769    
3770     $reconstruct_active_formatting_elements->($insert_to_current);
3771    
3772    
3773     {
3774     my $el;
3775    
3776     $el = $self->{document}->create_element_ns
3777     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3778    
3779     for my $attr_name (keys %{ $token->{attributes}}) {
3780     $el->set_attribute_ns (undef, [undef, $attr_name],
3781     $token->{attributes} ->{$attr_name}->{value});
3782     }
3783    
3784     $insert->($el);
3785 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3786 wakaba 1.1 }
3787    
3788     push @$active_formatting_elements, ['#marker', ''];
3789    
3790     $token = $self->_get_next_token;
3791     return;
3792     } elsif ($token->{tag_name} eq 'marquee' or
3793     $token->{tag_name} eq 'object') {
3794     $reconstruct_active_formatting_elements->($insert_to_current);
3795    
3796    
3797     {
3798     my $el;
3799    
3800     $el = $self->{document}->create_element_ns
3801     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3802    
3803     for my $attr_name (keys %{ $token->{attributes}}) {
3804     $el->set_attribute_ns (undef, [undef, $attr_name],
3805     $token->{attributes} ->{$attr_name}->{value});
3806     }
3807    
3808     $insert->($el);
3809 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3810 wakaba 1.1 }
3811    
3812     push @$active_formatting_elements, ['#marker', ''];
3813    
3814     $token = $self->_get_next_token;
3815     return;
3816     } elsif ($token->{tag_name} eq 'xmp') {
3817     $reconstruct_active_formatting_elements->($insert_to_current);
3818 wakaba 1.25 $parse_rcdata->('CDATA', $insert);
3819 wakaba 1.1 return;
3820     } elsif ($token->{tag_name} eq 'table') {
3821     ## has a p element in scope
3822 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3823 wakaba 1.1 if ($_->[1] eq 'p') {
3824     unshift @{$self->{token}}, $token;
3825     $token = {type => 'end tag', tag_name => 'p'};
3826     return;
3827     } elsif ({
3828     table => 1, caption => 1, td => 1, th => 1,
3829     button => 1, marquee => 1, object => 1, html => 1,
3830     }->{$_->[1]}) {
3831     last INSCOPE;
3832     }
3833     } # INSCOPE
3834    
3835    
3836     {
3837     my $el;
3838    
3839     $el = $self->{document}->create_element_ns
3840     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3841    
3842     for my $attr_name (keys %{ $token->{attributes}}) {
3843     $el->set_attribute_ns (undef, [undef, $attr_name],
3844     $token->{attributes} ->{$attr_name}->{value});
3845     }
3846    
3847     $insert->($el);
3848 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3849 wakaba 1.1 }
3850    
3851    
3852 wakaba 1.3 $self->{insertion_mode} = 'in table';
3853 wakaba 1.1
3854     $token = $self->_get_next_token;
3855     return;
3856     } elsif ({
3857     area => 1, basefont => 1, bgsound => 1, br => 1,
3858     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
3859     image => 1,
3860     }->{$token->{tag_name}}) {
3861     if ($token->{tag_name} eq 'image') {
3862 wakaba 1.3 $self->{parse_error}-> (type => 'image');
3863 wakaba 1.1 $token->{tag_name} = 'img';
3864     }
3865    
3866     $reconstruct_active_formatting_elements->($insert_to_current);
3867    
3868    
3869     {
3870     my $el;
3871    
3872     $el = $self->{document}->create_element_ns
3873     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3874    
3875     for my $attr_name (keys %{ $token->{attributes}}) {
3876     $el->set_attribute_ns (undef, [undef, $attr_name],
3877     $token->{attributes} ->{$attr_name}->{value});
3878     }
3879    
3880     $insert->($el);
3881 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3882 wakaba 1.1 }
3883    
3884 wakaba 1.3 pop @{$self->{open_elements}};
3885 wakaba 1.1
3886     $token = $self->_get_next_token;
3887     return;
3888     } elsif ($token->{tag_name} eq 'hr') {
3889     ## has a p element in scope
3890 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3891 wakaba 1.1 if ($_->[1] eq 'p') {
3892     unshift @{$self->{token}}, $token;
3893     $token = {type => 'end tag', tag_name => 'p'};
3894     return;
3895     } elsif ({
3896     table => 1, caption => 1, td => 1, th => 1,
3897     button => 1, marquee => 1, object => 1, html => 1,
3898     }->{$_->[1]}) {
3899     last INSCOPE;
3900     }
3901     } # INSCOPE
3902    
3903    
3904     {
3905     my $el;
3906    
3907     $el = $self->{document}->create_element_ns
3908     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3909    
3910     for my $attr_name (keys %{ $token->{attributes}}) {
3911     $el->set_attribute_ns (undef, [undef, $attr_name],
3912     $token->{attributes} ->{$attr_name}->{value});
3913     }
3914    
3915     $insert->($el);
3916 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3917 wakaba 1.1 }
3918    
3919 wakaba 1.3 pop @{$self->{open_elements}};
3920 wakaba 1.1
3921     $token = $self->_get_next_token;
3922     return;
3923     } elsif ($token->{tag_name} eq 'input') {
3924     $reconstruct_active_formatting_elements->($insert_to_current);
3925    
3926    
3927     {
3928     my $el;
3929    
3930     $el = $self->{document}->create_element_ns
3931     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3932    
3933     for my $attr_name (keys %{ $token->{attributes}}) {
3934     $el->set_attribute_ns (undef, [undef, $attr_name],
3935     $token->{attributes} ->{$attr_name}->{value});
3936     }
3937    
3938     $insert->($el);
3939 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3940 wakaba 1.1 }
3941    
3942 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
3943     pop @{$self->{open_elements}};
3944 wakaba 1.1
3945     $token = $self->_get_next_token;
3946     return;
3947     } elsif ($token->{tag_name} eq 'isindex') {
3948 wakaba 1.3 $self->{parse_error}-> (type => 'isindex');
3949 wakaba 1.1
3950 wakaba 1.3 if (defined $self->{form_element}) {
3951 wakaba 1.1 ## Ignore the token
3952     $token = $self->_get_next_token;
3953     return;
3954     } else {
3955     my $at = $token->{attributes};
3956 wakaba 1.22 my $form_attrs;
3957     $form_attrs->{action} = $at->{action} if $at->{action};
3958     my $prompt_attr = $at->{prompt};
3959 wakaba 1.1 $at->{name} = {name => 'name', value => 'isindex'};
3960 wakaba 1.22 delete $at->{action};
3961     delete $at->{prompt};
3962 wakaba 1.1 my @tokens = (
3963 wakaba 1.22 {type => 'start tag', tag_name => 'form',
3964     attributes => $form_attrs},
3965 wakaba 1.1 {type => 'start tag', tag_name => 'hr'},
3966     {type => 'start tag', tag_name => 'p'},
3967     {type => 'start tag', tag_name => 'label'},
3968 wakaba 1.22 );
3969     if ($prompt_attr) {
3970     push @tokens, {type => 'character', data => $prompt_attr->{value}};
3971     } else {
3972     push @tokens, {type => 'character',
3973     data => 'This is a searchable index. Insert your search keywords here: '}; # SHOULD
3974     ## TODO: make this configurable
3975     }
3976     push @tokens,
3977 wakaba 1.1 {type => 'start tag', tag_name => 'input', attributes => $at},
3978     #{type => 'character', data => ''}, # SHOULD
3979     {type => 'end tag', tag_name => 'label'},
3980     {type => 'end tag', tag_name => 'p'},
3981     {type => 'start tag', tag_name => 'hr'},
3982 wakaba 1.22 {type => 'end tag', tag_name => 'form'};
3983 wakaba 1.1 $token = shift @tokens;
3984     unshift @{$self->{token}}, (@tokens);
3985     return;
3986     }
3987 wakaba 1.25 } elsif ($token->{tag_name} eq 'textarea') {
3988 wakaba 1.1 my $tag_name = $token->{tag_name};
3989     my $el;
3990    
3991     $el = $self->{document}->create_element_ns
3992     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3993    
3994     for my $attr_name (keys %{ $token->{attributes}}) {
3995     $el->set_attribute_ns (undef, [undef, $attr_name],
3996     $token->{attributes} ->{$attr_name}->{value});
3997     }
3998    
3999    
4000 wakaba 1.25 ## TODO: $self->{form_element} if defined
4001     $self->{content_model_flag} = 'RCDATA';
4002 wakaba 1.13 delete $self->{escape}; # MUST
4003 wakaba 1.1
4004     $insert->($el);
4005    
4006     my $text = '';
4007 wakaba 1.25 $token = $self->_get_next_token;
4008     if ($token->{type} eq 'character') {
4009     $token->{data} =~ s/^\x0A//;
4010     unless (length $token->{data}) {
4011     $token = $self->_get_next_token;
4012 wakaba 1.8 }
4013     }
4014 wakaba 1.1 while ($token->{type} eq 'character') {
4015     $text .= $token->{data};
4016     $token = $self->_get_next_token;
4017     }
4018     if (length $text) {
4019     $el->manakai_append_text ($text);
4020     }
4021    
4022     $self->{content_model_flag} = 'PCDATA';
4023    
4024     if ($token->{type} eq 'end tag' and
4025     $token->{tag_name} eq $tag_name) {
4026     ## Ignore the token
4027     } else {
4028 wakaba 1.25 $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
4029 wakaba 1.1 }
4030     $token = $self->_get_next_token;
4031     return;
4032 wakaba 1.25 } elsif ({
4033     iframe => 1,
4034     noembed => 1,
4035     noframes => 1,
4036     noscript => 0, ## TODO: 1 if scripting is enabled
4037     }->{$token->{tag_name}}) {
4038     $parse_rcdata->('CDATA', $insert);
4039     return;
4040 wakaba 1.1 } elsif ($token->{tag_name} eq 'select') {
4041     $reconstruct_active_formatting_elements->($insert_to_current);
4042    
4043    
4044     {
4045     my $el;
4046    
4047     $el = $self->{document}->create_element_ns
4048     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4049    
4050     for my $attr_name (keys %{ $token->{attributes}}) {
4051     $el->set_attribute_ns (undef, [undef, $attr_name],
4052     $token->{attributes} ->{$attr_name}->{value});
4053     }
4054    
4055     $insert->($el);
4056 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4057 wakaba 1.1 }
4058    
4059    
4060 wakaba 1.3 $self->{insertion_mode} = 'in select';
4061 wakaba 1.1 $token = $self->_get_next_token;
4062     return;
4063     } elsif ({
4064     caption => 1, col => 1, colgroup => 1, frame => 1,
4065     frameset => 1, head => 1, option => 1, optgroup => 1,
4066     tbody => 1, td => 1, tfoot => 1, th => 1,
4067     thead => 1, tr => 1,
4068     }->{$token->{tag_name}}) {
4069 wakaba 1.3 $self->{parse_error}-> (type => 'in body:'.$token->{tag_name});
4070 wakaba 1.1 ## Ignore the token
4071     $token = $self->_get_next_token;
4072     return;
4073    
4074     ## ISSUE: An issue on HTML5 new elements in the spec.
4075     } else {
4076     $reconstruct_active_formatting_elements->($insert_to_current);
4077    
4078    
4079     {
4080     my $el;
4081    
4082     $el = $self->{document}->create_element_ns
4083     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4084    
4085     for my $attr_name (keys %{ $token->{attributes}}) {
4086     $el->set_attribute_ns (undef, [undef, $attr_name],
4087     $token->{attributes} ->{$attr_name}->{value});
4088     }
4089    
4090     $insert->($el);
4091 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4092 wakaba 1.1 }
4093    
4094    
4095     $token = $self->_get_next_token;
4096     return;
4097     }
4098     } elsif ($token->{type} eq 'end tag') {
4099     if ($token->{tag_name} eq 'body') {
4100 wakaba 1.20 if (@{$self->{open_elements}} > 1 and
4101     $self->{open_elements}->[1]->[1] eq 'body') {
4102     for (@{$self->{open_elements}}) {
4103     unless ({
4104     dd => 1, dt => 1, li => 1, p => 1, td => 1,
4105     th => 1, tr => 1, body => 1, html => 1,
4106     }->{$_->[1]}) {
4107     $self->{parse_error}-> (type => 'not closed:'.$_->[1]);
4108     }
4109 wakaba 1.1 }
4110 wakaba 1.20
4111 wakaba 1.3 $self->{insertion_mode} = 'after body';
4112 wakaba 1.1 $token = $self->_get_next_token;
4113     return;
4114     } else {
4115 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4116 wakaba 1.1 ## Ignore the token
4117     $token = $self->_get_next_token;
4118     return;
4119     }
4120     } elsif ($token->{tag_name} eq 'html') {
4121 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
4122 wakaba 1.1 ## ISSUE: There is an issue in the spec.
4123 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
4124     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
4125 wakaba 1.1 }
4126 wakaba 1.3 $self->{insertion_mode} = 'after body';
4127 wakaba 1.1 ## reprocess
4128     return;
4129     } else {
4130 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4131 wakaba 1.1 ## Ignore the token
4132     $token = $self->_get_next_token;
4133     return;
4134     }
4135     } elsif ({
4136     address => 1, blockquote => 1, center => 1, dir => 1,
4137     div => 1, dl => 1, fieldset => 1, listing => 1,
4138     menu => 1, ol => 1, pre => 1, ul => 1,
4139     p => 1,
4140     dd => 1, dt => 1, li => 1,
4141     button => 1, marquee => 1, object => 1,
4142     }->{$token->{tag_name}}) {
4143     ## has an element in scope
4144     my $i;
4145 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4146     my $node = $self->{open_elements}->[$_];
4147 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4148     ## generate implied end tags
4149     if ({
4150     dd => ($token->{tag_name} ne 'dd'),
4151     dt => ($token->{tag_name} ne 'dt'),
4152     li => ($token->{tag_name} ne 'li'),
4153     p => ($token->{tag_name} ne 'p'),
4154     td => 1, th => 1, tr => 1,
4155 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4156 wakaba 1.1 unshift @{$self->{token}}, $token;
4157     $token = {type => 'end tag',
4158 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4159 wakaba 1.1 return;
4160     }
4161     $i = $_;
4162     last INSCOPE unless $token->{tag_name} eq 'p';
4163     } elsif ({
4164     table => 1, caption => 1, td => 1, th => 1,
4165     button => 1, marquee => 1, object => 1, html => 1,
4166     }->{$node->[1]}) {
4167     last INSCOPE;
4168     }
4169     } # INSCOPE
4170    
4171 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4172     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4173 wakaba 1.1 }
4174    
4175 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
4176 wakaba 1.1 $clear_up_to_marker->()
4177     if {
4178     button => 1, marquee => 1, object => 1,
4179     }->{$token->{tag_name}};
4180 wakaba 1.12 $token = $self->_get_next_token;
4181     return;
4182     } elsif ($token->{tag_name} eq 'form') {
4183     ## has an element in scope
4184     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4185     my $node = $self->{open_elements}->[$_];
4186     if ($node->[1] eq $token->{tag_name}) {
4187     ## generate implied end tags
4188     if ({
4189     dd => 1, dt => 1, li => 1, p => 1,
4190     td => 1, th => 1, tr => 1,
4191     }->{$self->{open_elements}->[-1]->[1]}) {
4192     unshift @{$self->{token}}, $token;
4193     $token = {type => 'end tag',
4194     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4195     return;
4196     }
4197     last INSCOPE;
4198     } elsif ({
4199     table => 1, caption => 1, td => 1, th => 1,
4200     button => 1, marquee => 1, object => 1, html => 1,
4201     }->{$node->[1]}) {
4202     last INSCOPE;
4203     }
4204     } # INSCOPE
4205    
4206     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
4207     pop @{$self->{open_elements}};
4208     } else {
4209     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4210     }
4211    
4212     undef $self->{form_element};
4213 wakaba 1.1 $token = $self->_get_next_token;
4214     return;
4215     } elsif ({
4216     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
4217     }->{$token->{tag_name}}) {
4218     ## has an element in scope
4219     my $i;
4220 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4221     my $node = $self->{open_elements}->[$_];
4222 wakaba 1.1 if ({
4223     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
4224     }->{$node->[1]}) {
4225     ## generate implied end tags
4226     if ({
4227     dd => 1, dt => 1, li => 1, p => 1,
4228     td => 1, th => 1, tr => 1,
4229 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4230 wakaba 1.1 unshift @{$self->{token}}, $token;
4231     $token = {type => 'end tag',
4232 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4233 wakaba 1.1 return;
4234     }
4235     $i = $_;
4236     last INSCOPE;
4237     } elsif ({
4238     table => 1, caption => 1, td => 1, th => 1,
4239     button => 1, marquee => 1, object => 1, html => 1,
4240     }->{$node->[1]}) {
4241     last INSCOPE;
4242     }
4243     } # INSCOPE
4244    
4245 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4246     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4247 wakaba 1.1 }
4248    
4249 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
4250 wakaba 1.1 $token = $self->_get_next_token;
4251     return;
4252     } elsif ({
4253     a => 1,
4254     b => 1, big => 1, em => 1, font => 1, i => 1,
4255     nobr => 1, s => 1, small => 1, strile => 1,
4256     strong => 1, tt => 1, u => 1,
4257     }->{$token->{tag_name}}) {
4258     $formatting_end_tag->($token->{tag_name});
4259 wakaba 1.8 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
4260 wakaba 1.1 return;
4261     } elsif ({
4262     caption => 1, col => 1, colgroup => 1, frame => 1,
4263     frameset => 1, head => 1, option => 1, optgroup => 1,
4264     tbody => 1, td => 1, tfoot => 1, th => 1,
4265     thead => 1, tr => 1,
4266     area => 1, basefont => 1, bgsound => 1, br => 1,
4267     embed => 1, hr => 1, iframe => 1, image => 1,
4268 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
4269 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
4270     table => 1, textarea => 1, wbr => 1,
4271     noscript => 0, ## TODO: if scripting is enabled
4272     }->{$token->{tag_name}}) {
4273 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4274 wakaba 1.1 ## Ignore the token
4275     $token = $self->_get_next_token;
4276     return;
4277    
4278     ## ISSUE: Issue on HTML5 new elements in spec
4279    
4280     } else {
4281     ## Step 1
4282     my $node_i = -1;
4283 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
4284 wakaba 1.1
4285     ## Step 2
4286     S2: {
4287     if ($node->[1] eq $token->{tag_name}) {
4288     ## Step 1
4289     ## generate implied end tags
4290     if ({
4291     dd => 1, dt => 1, li => 1, p => 1,
4292     td => 1, th => 1, tr => 1,
4293 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4294 wakaba 1.1 unshift @{$self->{token}}, $token;
4295     $token = {type => 'end tag',
4296 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4297 wakaba 1.1 return;
4298     }
4299    
4300     ## Step 2
4301 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
4302     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4303 wakaba 1.1 }
4304    
4305     ## Step 3
4306 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
4307    
4308     $token = $self->_get_next_token;
4309 wakaba 1.1 last S2;
4310     } else {
4311     ## Step 3
4312     if (not $formatting_category->{$node->[1]} and
4313     #not $phrasing_category->{$node->[1]} and
4314     ($special_category->{$node->[1]} or
4315     $scoping_category->{$node->[1]})) {
4316 wakaba 1.25 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4317 wakaba 1.1 ## Ignore the token
4318     $token = $self->_get_next_token;
4319     last S2;
4320     }
4321     }
4322    
4323     ## Step 4
4324     $node_i--;
4325 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
4326 wakaba 1.1
4327     ## Step 5;
4328     redo S2;
4329     } # S2
4330 wakaba 1.3 return;
4331 wakaba 1.1 }
4332     }
4333     }; # $in_body
4334    
4335     B: {
4336 wakaba 1.3 if ($phase eq 'main') {
4337 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
4338 wakaba 1.3 $self->{parse_error}-> (type => 'in html:#DOCTYPE');
4339 wakaba 1.1 ## Ignore the token
4340     ## Stay in the phase
4341     $token = $self->_get_next_token;
4342     redo B;
4343     } elsif ($token->{type} eq 'start tag' and
4344     $token->{tag_name} eq 'html') {
4345     ## TODO: unless it is the first start tag token, parse-error
4346 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
4347 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
4348     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
4349     $top_el->set_attribute_ns
4350     (undef, [undef, $attr_name],
4351     $token->{attributes}->{$attr_name}->{value});
4352     }
4353     }
4354     $token = $self->_get_next_token;
4355     redo B;
4356     } elsif ($token->{type} eq 'end-of-file') {
4357     ## Generate implied end tags
4358     if ({
4359     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
4360 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4361 wakaba 1.1 unshift @{$self->{token}}, $token;
4362 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
4363 wakaba 1.1 redo B;
4364     }
4365    
4366 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
4367     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
4368     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4369     } elsif (defined $self->{inner_html_node} and
4370     @{$self->{open_elements}} > 1 and
4371     $self->{open_elements}->[1]->[1] ne 'body') {
4372     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4373 wakaba 1.1 }
4374    
4375     ## Stop parsing
4376     last B;
4377    
4378     ## ISSUE: There is an issue in the spec.
4379     } else {
4380 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
4381 wakaba 1.1 if ($token->{type} eq 'character') {
4382     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4383 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4384 wakaba 1.1 unless (length $token->{data}) {
4385     $token = $self->_get_next_token;
4386     redo B;
4387     }
4388     }
4389     ## As if <head>
4390    
4391 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4392 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4393    
4394 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4395     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4396     $self->{insertion_mode} = 'in head';
4397 wakaba 1.1 ## reprocess
4398     redo B;
4399     } elsif ($token->{type} eq 'comment') {
4400     my $comment = $self->{document}->create_comment ($token->{data});
4401 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4402 wakaba 1.1 $token = $self->_get_next_token;
4403     redo B;
4404     } elsif ($token->{type} eq 'start tag') {
4405     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
4406    
4407 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4408 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4409    
4410     for my $attr_name (keys %{ $attr}) {
4411 wakaba 1.3 $self->{head_element}->set_attribute_ns (undef, [undef, $attr_name],
4412 wakaba 1.1 $attr ->{$attr_name}->{value});
4413     }
4414    
4415 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4416     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4417     $self->{insertion_mode} = 'in head';
4418 wakaba 1.1 if ($token->{tag_name} eq 'head') {
4419     $token = $self->_get_next_token;
4420     #} elsif ({
4421     # base => 1, link => 1, meta => 1,
4422     # script => 1, style => 1, title => 1,
4423     # }->{$token->{tag_name}}) {
4424     # ## reprocess
4425     } else {
4426     ## reprocess
4427     }
4428     redo B;
4429     } elsif ($token->{type} eq 'end tag') {
4430 wakaba 1.21 if ({head => 1, body => 1, html => 1}->{$token->{tag_name}}) {
4431 wakaba 1.1 ## As if <head>
4432    
4433 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4434 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4435    
4436 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4437     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4438     $self->{insertion_mode} = 'in head';
4439 wakaba 1.1 ## reprocess
4440     redo B;
4441     } else {
4442 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4443 wakaba 1.21 ## Ignore the token ## ISSUE: An issue in the spec.
4444 wakaba 1.1 $token = $self->_get_next_token;
4445     redo B;
4446     }
4447     } else {
4448     die "$0: $token->{type}: Unknown type";
4449     }
4450 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'in head' or
4451     $self->{insertion_mode} eq 'in head noscript' or
4452     $self->{insertion_mode} eq 'after head') {
4453 wakaba 1.1 if ($token->{type} eq 'character') {
4454     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4455 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4456 wakaba 1.1 unless (length $token->{data}) {
4457     $token = $self->_get_next_token;
4458     redo B;
4459     }
4460     }
4461    
4462     #
4463     } elsif ($token->{type} eq 'comment') {
4464     my $comment = $self->{document}->create_comment ($token->{data});
4465 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4466 wakaba 1.1 $token = $self->_get_next_token;
4467     redo B;
4468     } elsif ($token->{type} eq 'start tag') {
4469 wakaba 1.25 if ({base => ($self->{insertion_mode} eq 'in head' or
4470     $self->{insertion_mode} eq 'after head'),
4471     link => 1, meta => 1}->{$token->{tag_name}}) {
4472     ## NOTE: There is a "as if in head" code clone.
4473     if ($self->{insertion_mode} eq 'after head') {
4474     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4475     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4476     }
4477 wakaba 1.1
4478 wakaba 1.25 {
4479     my $el;
4480    
4481     $el = $self->{document}->create_element_ns
4482     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4483 wakaba 1.1
4484 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
4485     $el->set_attribute_ns (undef, [undef, $attr_name],
4486     $token->{attributes} ->{$attr_name}->{value});
4487 wakaba 1.1 }
4488    
4489 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($el);
4490     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4491     }
4492    
4493     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4494 wakaba 1.26 ## TODO: Extracting |charset| from |meta|.
4495 wakaba 1.25 pop @{$self->{open_elements}}
4496     if $self->{insertion_mode} eq 'after head';
4497 wakaba 1.1 $token = $self->_get_next_token;
4498 wakaba 1.25 redo B;
4499     } elsif ($token->{tag_name} eq 'title' and
4500     $self->{insertion_mode} eq 'in head') {
4501     ## NOTE: There is a "as if in head" code clone.
4502     if ($self->{insertion_mode} eq 'after head') {
4503     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4504     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4505     }
4506     $parse_rcdata->('RCDATA', $insert_to_current);
4507     pop @{$self->{open_elements}}
4508     if $self->{insertion_mode} eq 'after head';
4509 wakaba 1.1 redo B;
4510     } elsif ($token->{tag_name} eq 'style') {
4511 wakaba 1.25 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4512     ## insertion mode 'in head')
4513     ## NOTE: There is a "as if in head" code clone.
4514     if ($self->{insertion_mode} eq 'after head') {
4515     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4516     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4517     }
4518     $parse_rcdata->('CDATA', $insert_to_current);
4519     pop @{$self->{open_elements}}
4520     if $self->{insertion_mode} eq 'after head';
4521     redo B;
4522     } elsif ($token->{tag_name} eq 'noscript') {
4523     if ($self->{insertion_mode} eq 'in head') {
4524     ## NOTE: and scripting is disalbed
4525    
4526     {
4527     my $el;
4528    
4529 wakaba 1.1 $el = $self->{document}->create_element_ns
4530     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4531    
4532 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
4533 wakaba 1.1 $el->set_attribute_ns (undef, [undef, $attr_name],
4534 wakaba 1.25 $token->{attributes} ->{$attr_name}->{value});
4535 wakaba 1.1 }
4536    
4537 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($el);
4538     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4539     }
4540    
4541     $self->{insertion_mode} = 'in head noscript';
4542     $token = $self->_get_next_token;
4543     redo B;
4544     } elsif ($self->{insertion_mode} eq 'in head noscript') {
4545     $self->{parse_error}-> (type => 'noscript in noscript');
4546     ## Ignore the token
4547     redo B;
4548 wakaba 1.24 } else {
4549 wakaba 1.25 #
4550 wakaba 1.24 }
4551 wakaba 1.25 } elsif ($token->{tag_name} eq 'head' and
4552     $self->{insertion_mode} ne 'after head') {
4553     $self->{parse_error}-> (type => 'in head:head'); # or in head noscript
4554 wakaba 1.1 ## Ignore the token
4555     $token = $self->_get_next_token;
4556     redo B;
4557 wakaba 1.25 } elsif ($self->{insertion_mode} ne 'in head noscript' and
4558     $token->{tag_name} eq 'script') {
4559     if ($self->{insertion_mode} eq 'after head') {
4560     $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4561     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4562     }
4563     ## NOTE: There is a "as if in head" code clone.
4564     $script_start_tag->($insert_to_current);
4565     pop @{$self->{open_elements}}
4566     if $self->{insertion_mode} eq 'after head';
4567 wakaba 1.1 redo B;
4568 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'after head' and
4569     $token->{tag_name} eq 'body') {
4570 wakaba 1.1
4571     {
4572     my $el;
4573    
4574     $el = $self->{document}->create_element_ns
4575     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
4576    
4577     for my $attr_name (keys %{ $token->{attributes}}) {
4578     $el->set_attribute_ns (undef, [undef, $attr_name],
4579     $token->{attributes} ->{$attr_name}->{value});
4580     }
4581    
4582 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4583     push @{$self->{open_elements}}, [$el, 'body'];
4584 wakaba 1.1 }
4585    
4586 wakaba 1.3 $self->{insertion_mode} = 'in body';
4587 wakaba 1.1 $token = $self->_get_next_token;
4588     redo B;
4589 wakaba 1.25 } elsif ($self->{insertion_mode} eq 'after head' and
4590     $token->{tag_name} eq 'frameset') {
4591 wakaba 1.1
4592     {
4593     my $el;
4594    
4595     $el = $self->{document}->create_element_ns
4596     (q<http://www.w3.org/1999/xhtml>, [undef, 'frameset']);
4597    
4598     for my $attr_name (keys %{ $token->{attributes}}) {
4599     $el->set_attribute_ns (undef, [undef, $attr_name],
4600     $token->{attributes} ->{$attr_name}->{value});
4601     }
4602    
4603 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4604     push @{$self->{open_elements}}, [$el, 'frameset'];
4605 wakaba 1.1 }
4606    
4607 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
4608 wakaba 1.1 $token = $self->_get_next_token;
4609     redo B;
4610 wakaba 1.25 } else {
4611     #
4612     }
4613     } elsif ($token->{type} eq 'end tag') {
4614     if ($self->{insertion_mode} eq 'in head' and
4615     $token->{tag_name} eq 'head') {
4616     pop @{$self->{open_elements}};
4617     $self->{insertion_mode} = 'after head';
4618     $token = $self->_get_next_token;
4619     redo B;
4620     } elsif ($self->{insertion_mode} eq 'in head noscript' and
4621     $token->{tag_name} eq 'noscript') {
4622     pop @{$self->{open_elements}};
4623 wakaba 1.3 $self->{insertion_mode} = 'in head';
4624 wakaba 1.25 $token = $self->_get_next_token;
4625     redo B;
4626     } elsif ($self->{insertion_mode} eq 'in head' and
4627     ($token->{tag_name} eq 'body' or
4628     $token->{tag_name} eq 'html')) {
4629     #
4630     } elsif ($self->{insertion_mode} ne 'after head') {
4631     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4632     ## Ignore the token
4633     $token = $self->_get_next_token;
4634 wakaba 1.1 redo B;
4635     } else {
4636 wakaba 1.25 #
4637     }
4638 wakaba 1.1 } else {
4639     #
4640     }
4641 wakaba 1.25
4642     ## As if </head> or </noscript> or <body>
4643     if ($self->{insertion_mode} eq 'in head') {
4644     pop @{$self->{open_elements}};
4645     $self->{insertion_mode} = 'after head';
4646     } elsif ($self->{insertion_mode} eq 'in head noscript') {
4647     pop @{$self->{open_elements}};
4648     $self->{parse_error}-> (type => 'in noscript:'.(defined $token->{tag_name} ? ($token->{type} eq 'end tag' ? '/' : '') . $token->{tag_name} : '#' . $token->{type}));
4649     $self->{insertion_mode} = 'in head';
4650     } else { # 'after head'
4651    
4652 wakaba 1.1 {
4653     my $el;
4654    
4655     $el = $self->{document}->create_element_ns
4656     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
4657    
4658 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4659     push @{$self->{open_elements}}, [$el, 'body'];
4660 wakaba 1.1 }
4661    
4662 wakaba 1.25 $self->{insertion_mode} = 'in body';
4663     }
4664 wakaba 1.1 ## reprocess
4665     redo B;
4666 wakaba 1.25
4667     ## ISSUE: An issue in the spec.
4668 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
4669 wakaba 1.1 if ($token->{type} eq 'character') {
4670     ## NOTE: There is a code clone of "character in body".
4671     $reconstruct_active_formatting_elements->($insert_to_current);
4672    
4673 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4674 wakaba 1.1
4675     $token = $self->_get_next_token;
4676     redo B;
4677     } elsif ($token->{type} eq 'comment') {
4678     ## NOTE: There is a code clone of "comment in body".
4679     my $comment = $self->{document}->create_comment ($token->{data});
4680 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4681 wakaba 1.1 $token = $self->_get_next_token;
4682     redo B;
4683     } else {
4684     $in_body->($insert_to_current);
4685     redo B;
4686     }
4687 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
4688 wakaba 1.1 if ($token->{type} eq 'character') {
4689     ## NOTE: There are "character in table" code clones.
4690     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4691 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4692 wakaba 1.1
4693     unless (length $token->{data}) {
4694     $token = $self->_get_next_token;
4695     redo B;
4696     }
4697     }
4698    
4699 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
4700    
4701 wakaba 1.1 ## As if in body, but insert into foster parent element
4702     ## ISSUE: Spec says that "whenever a node would be inserted
4703     ## into the current node" while characters might not be
4704     ## result in a new Text node.
4705     $reconstruct_active_formatting_elements->($insert_to_foster);
4706    
4707     if ({
4708     table => 1, tbody => 1, tfoot => 1,
4709     thead => 1, tr => 1,
4710 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4711 wakaba 1.1 # MUST
4712     my $foster_parent_element;
4713     my $next_sibling;
4714     my $prev_sibling;
4715 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4716     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4717     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4718 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4719     $foster_parent_element = $parent;
4720 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4721 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4722     } else {
4723 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4724 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4725     }
4726     last OE;
4727     }
4728     } # OE
4729 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4730 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4731     unless defined $foster_parent_element;
4732     if (defined $prev_sibling and
4733     $prev_sibling->node_type == 3) {
4734     $prev_sibling->manakai_append_text ($token->{data});
4735     } else {
4736     $foster_parent_element->insert_before
4737     ($self->{document}->create_text_node ($token->{data}),
4738     $next_sibling);
4739     }
4740     } else {
4741 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4742 wakaba 1.1 }
4743    
4744     $token = $self->_get_next_token;
4745     redo B;
4746     } elsif ($token->{type} eq 'comment') {
4747     my $comment = $self->{document}->create_comment ($token->{data});
4748 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4749 wakaba 1.1 $token = $self->_get_next_token;
4750     redo B;
4751     } elsif ($token->{type} eq 'start tag') {
4752     if ({
4753     caption => 1,
4754     colgroup => 1,
4755     tbody => 1, tfoot => 1, thead => 1,
4756     }->{$token->{tag_name}}) {
4757     ## Clear back to table context
4758 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
4759     $self->{open_elements}->[-1]->[1] ne 'html') {
4760     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4761     pop @{$self->{open_elements}};
4762 wakaba 1.1 }
4763    
4764     push @$active_formatting_elements, ['#marker', '']
4765     if $token->{tag_name} eq 'caption';
4766    
4767    
4768     {
4769     my $el;
4770    
4771     $el = $self->{document}->create_element_ns
4772     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4773    
4774     for my $attr_name (keys %{ $token->{attributes}}) {
4775     $el->set_attribute_ns (undef, [undef, $attr_name],
4776     $token->{attributes} ->{$attr_name}->{value});
4777     }
4778    
4779 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4780     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4781 wakaba 1.1 }
4782    
4783 wakaba 1.3 $self->{insertion_mode} = {
4784 wakaba 1.1 caption => 'in caption',
4785     colgroup => 'in column group',
4786     tbody => 'in table body',
4787     tfoot => 'in table body',
4788     thead => 'in table body',
4789     }->{$token->{tag_name}};
4790     $token = $self->_get_next_token;
4791     redo B;
4792     } elsif ({
4793     col => 1,
4794     td => 1, th => 1, tr => 1,
4795     }->{$token->{tag_name}}) {
4796     ## Clear back to table context
4797 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
4798     $self->{open_elements}->[-1]->[1] ne 'html') {
4799     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4800     pop @{$self->{open_elements}};
4801 wakaba 1.1 }
4802    
4803    
4804     {
4805     my $el;
4806    
4807     $el = $self->{document}->create_element_ns
4808     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name} eq 'col' ? 'colgroup' : 'tbody']);
4809    
4810 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4811     push @{$self->{open_elements}}, [$el, $token->{tag_name} eq 'col' ? 'colgroup' : 'tbody'];
4812 wakaba 1.1 }
4813    
4814 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
4815 wakaba 1.1 ? 'in column group' : 'in table body';
4816     ## reprocess
4817     redo B;
4818     } elsif ($token->{tag_name} eq 'table') {
4819     ## NOTE: There are code clones for this "table in table"
4820 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4821 wakaba 1.1
4822     ## As if </table>
4823     ## have a table element in table scope
4824     my $i;
4825 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4826     my $node = $self->{open_elements}->[$_];
4827 wakaba 1.1 if ($node->[1] eq 'table') {
4828     $i = $_;
4829     last INSCOPE;
4830     } elsif ({
4831     table => 1, html => 1,
4832     }->{$node->[1]}) {
4833     last INSCOPE;
4834     }
4835     } # INSCOPE
4836     unless (defined $i) {
4837 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
4838 wakaba 1.1 ## Ignore tokens </table><table>
4839     $token = $self->_get_next_token;
4840     redo B;
4841     }
4842    
4843     ## generate implied end tags
4844     if ({
4845     dd => 1, dt => 1, li => 1, p => 1,
4846     td => 1, th => 1, tr => 1,
4847 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4848 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
4849     $token = {type => 'end tag', tag_name => 'table'};
4850     unshift @{$self->{token}}, $token;
4851     $token = {type => 'end tag',
4852 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4853 wakaba 1.1 redo B;
4854     }
4855    
4856 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4857     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4858 wakaba 1.1 }
4859    
4860 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4861 wakaba 1.1
4862 wakaba 1.3 $self->_reset_insertion_mode;
4863 wakaba 1.1
4864     ## reprocess
4865     redo B;
4866     } else {
4867     #
4868     }
4869     } elsif ($token->{type} eq 'end tag') {
4870     if ($token->{tag_name} eq 'table') {
4871     ## have a table element in table scope
4872     my $i;
4873 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4874     my $node = $self->{open_elements}->[$_];
4875 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4876     $i = $_;
4877     last INSCOPE;
4878     } elsif ({
4879     table => 1, html => 1,
4880     }->{$node->[1]}) {
4881     last INSCOPE;
4882     }
4883     } # INSCOPE
4884     unless (defined $i) {
4885 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4886 wakaba 1.1 ## Ignore the token
4887     $token = $self->_get_next_token;
4888     redo B;
4889     }
4890    
4891     ## generate implied end tags
4892     if ({
4893     dd => 1, dt => 1, li => 1, p => 1,
4894     td => 1, th => 1, tr => 1,
4895 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4896 wakaba 1.1 unshift @{$self->{token}}, $token;
4897     $token = {type => 'end tag',
4898 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4899 wakaba 1.1 redo B;
4900     }
4901    
4902 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4903     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4904 wakaba 1.1 }
4905    
4906 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4907 wakaba 1.1
4908 wakaba 1.3 $self->_reset_insertion_mode;
4909 wakaba 1.1
4910     $token = $self->_get_next_token;
4911     redo B;
4912     } elsif ({
4913     body => 1, caption => 1, col => 1, colgroup => 1,
4914     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
4915     thead => 1, tr => 1,
4916     }->{$token->{tag_name}}) {
4917 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4918 wakaba 1.1 ## Ignore the token
4919     $token = $self->_get_next_token;
4920     redo B;
4921     } else {
4922     #
4923     }
4924     } else {
4925     #
4926     }
4927    
4928 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
4929 wakaba 1.1 $in_body->($insert_to_foster);
4930     redo B;
4931 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
4932 wakaba 1.1 if ($token->{type} eq 'character') {
4933     ## NOTE: This is a code clone of "character in body".
4934     $reconstruct_active_formatting_elements->($insert_to_current);
4935    
4936 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4937 wakaba 1.1
4938     $token = $self->_get_next_token;
4939     redo B;
4940     } elsif ($token->{type} eq 'comment') {
4941     ## NOTE: This is a code clone of "comment in body".
4942     my $comment = $self->{document}->create_comment ($token->{data});
4943 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4944 wakaba 1.1 $token = $self->_get_next_token;
4945     redo B;
4946     } elsif ($token->{type} eq 'start tag') {
4947     if ({
4948     caption => 1, col => 1, colgroup => 1, tbody => 1,
4949     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4950     }->{$token->{tag_name}}) {
4951 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:caption');
4952 wakaba 1.1
4953     ## As if </caption>
4954     ## have a table element in table scope
4955     my $i;
4956 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4957     my $node = $self->{open_elements}->[$_];
4958 wakaba 1.1 if ($node->[1] eq 'caption') {
4959     $i = $_;
4960     last INSCOPE;
4961     } elsif ({
4962     table => 1, html => 1,
4963     }->{$node->[1]}) {
4964     last INSCOPE;
4965     }
4966     } # INSCOPE
4967     unless (defined $i) {
4968 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:caption');
4969 wakaba 1.1 ## Ignore the token
4970     $token = $self->_get_next_token;
4971     redo B;
4972     }
4973    
4974     ## generate implied end tags
4975     if ({
4976     dd => 1, dt => 1, li => 1, p => 1,
4977     td => 1, th => 1, tr => 1,
4978 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4979 wakaba 1.1 unshift @{$self->{token}}, $token; # <?>
4980     $token = {type => 'end tag', tag_name => 'caption'};
4981     unshift @{$self->{token}}, $token;
4982     $token = {type => 'end tag',
4983 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4984 wakaba 1.1 redo B;
4985     }
4986    
4987 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4988     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4989 wakaba 1.1 }
4990    
4991 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4992 wakaba 1.1
4993     $clear_up_to_marker->();
4994    
4995 wakaba 1.3 $self->{insertion_mode} = 'in table';
4996 wakaba 1.1
4997     ## reprocess
4998     redo B;
4999     } else {
5000     #
5001     }
5002     } elsif ($token->{type} eq 'end tag') {
5003     if ($token->{tag_name} eq 'caption') {
5004     ## have a table element in table scope
5005     my $i;
5006 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5007     my $node = $self->{open_elements}->[$_];
5008 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5009     $i = $_;
5010     last INSCOPE;
5011     } elsif ({
5012     table => 1, html => 1,
5013     }->{$node->[1]}) {
5014     last INSCOPE;
5015     }
5016     } # INSCOPE
5017     unless (defined $i) {
5018 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5019 wakaba 1.1 ## Ignore the token
5020     $token = $self->_get_next_token;
5021     redo B;
5022     }
5023    
5024     ## generate implied end tags
5025     if ({
5026     dd => 1, dt => 1, li => 1, p => 1,
5027     td => 1, th => 1, tr => 1,
5028 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5029 wakaba 1.1 unshift @{$self->{token}}, $token;
5030     $token = {type => 'end tag',
5031 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5032 wakaba 1.1 redo B;
5033     }
5034    
5035 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
5036     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5037 wakaba 1.1 }
5038    
5039 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5040 wakaba 1.1
5041     $clear_up_to_marker->();
5042    
5043 wakaba 1.3 $self->{insertion_mode} = 'in table';
5044 wakaba 1.1
5045     $token = $self->_get_next_token;
5046     redo B;
5047     } elsif ($token->{tag_name} eq 'table') {
5048 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:caption');
5049 wakaba 1.1
5050     ## As if </caption>
5051     ## have a table element in table scope
5052     my $i;
5053 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5054     my $node = $self->{open_elements}->[$_];
5055 wakaba 1.1 if ($node->[1] eq 'caption') {
5056     $i = $_;
5057     last INSCOPE;
5058     } elsif ({
5059     table => 1, html => 1,
5060     }->{$node->[1]}) {
5061     last INSCOPE;
5062     }
5063     } # INSCOPE
5064     unless (defined $i) {
5065 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:caption');
5066 wakaba 1.1 ## Ignore the token
5067     $token = $self->_get_next_token;
5068     redo B;
5069     }
5070    
5071     ## generate implied end tags
5072     if ({
5073     dd => 1, dt => 1, li => 1, p => 1,
5074     td => 1, th => 1, tr => 1,
5075 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5076 wakaba 1.1 unshift @{$self->{token}}, $token; # </table>
5077     $token = {type => 'end tag', tag_name => 'caption'};
5078     unshift @{$self->{token}}, $token;
5079     $token = {type => 'end tag',
5080 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5081 wakaba 1.1 redo B;
5082     }
5083    
5084 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
5085     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5086 wakaba 1.1 }
5087    
5088 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5089 wakaba 1.1
5090     $clear_up_to_marker->();
5091    
5092 wakaba 1.3 $self->{insertion_mode} = 'in table';
5093 wakaba 1.1
5094     ## reprocess
5095     redo B;
5096     } elsif ({
5097     body => 1, col => 1, colgroup => 1,
5098     html => 1, tbody => 1, td => 1, tfoot => 1,
5099     th => 1, thead => 1, tr => 1,
5100     }->{$token->{tag_name}}) {
5101 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5102 wakaba 1.1 ## Ignore the token
5103     redo B;
5104     } else {
5105     #
5106     }
5107     } else {
5108     #
5109     }
5110    
5111     $in_body->($insert_to_current);
5112     redo B;
5113 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
5114 wakaba 1.1 if ($token->{type} eq 'character') {
5115     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5116 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5117 wakaba 1.1 unless (length $token->{data}) {
5118     $token = $self->_get_next_token;
5119     redo B;
5120     }
5121     }
5122    
5123     #
5124     } elsif ($token->{type} eq 'comment') {
5125     my $comment = $self->{document}->create_comment ($token->{data});
5126 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5127 wakaba 1.1 $token = $self->_get_next_token;
5128     redo B;
5129     } elsif ($token->{type} eq 'start tag') {
5130     if ($token->{tag_name} eq 'col') {
5131    
5132     {
5133     my $el;
5134    
5135     $el = $self->{document}->create_element_ns
5136     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5137    
5138     for my $attr_name (keys %{ $token->{attributes}}) {
5139     $el->set_attribute_ns (undef, [undef, $attr_name],
5140     $token->{attributes} ->{$attr_name}->{value});
5141     }
5142    
5143 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5144     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5145 wakaba 1.1 }
5146    
5147 wakaba 1.3 pop @{$self->{open_elements}};
5148 wakaba 1.1 $token = $self->_get_next_token;
5149     redo B;
5150     } else {
5151     #
5152     }
5153     } elsif ($token->{type} eq 'end tag') {
5154     if ($token->{tag_name} eq 'colgroup') {
5155 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
5156     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
5157 wakaba 1.1 ## Ignore the token
5158     $token = $self->_get_next_token;
5159     redo B;
5160     } else {
5161 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
5162     $self->{insertion_mode} = 'in table';
5163 wakaba 1.1 $token = $self->_get_next_token;
5164     redo B;
5165     }
5166     } elsif ($token->{tag_name} eq 'col') {
5167 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:col');
5168 wakaba 1.1 ## Ignore the token
5169     $token = $self->_get_next_token;
5170     redo B;
5171     } else {
5172     #
5173     }
5174     } else {
5175     #
5176     }
5177    
5178     ## As if </colgroup>
5179 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
5180     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
5181 wakaba 1.1 ## Ignore the token
5182     $token = $self->_get_next_token;
5183     redo B;
5184     } else {
5185 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
5186     $self->{insertion_mode} = 'in table';
5187 wakaba 1.1 ## reprocess
5188     redo B;
5189     }
5190 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
5191 wakaba 1.1 if ($token->{type} eq 'character') {
5192     ## NOTE: This is a "character in table" code clone.
5193     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5194 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5195 wakaba 1.1
5196     unless (length $token->{data}) {
5197     $token = $self->_get_next_token;
5198     redo B;
5199     }
5200     }
5201    
5202 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
5203    
5204 wakaba 1.1 ## As if in body, but insert into foster parent element
5205     ## ISSUE: Spec says that "whenever a node would be inserted
5206     ## into the current node" while characters might not be
5207     ## result in a new Text node.
5208     $reconstruct_active_formatting_elements->($insert_to_foster);
5209    
5210     if ({
5211     table => 1, tbody => 1, tfoot => 1,
5212     thead => 1, tr => 1,
5213 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5214 wakaba 1.1 # MUST
5215     my $foster_parent_element;
5216     my $next_sibling;
5217     my $prev_sibling;
5218 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
5219     if ($self->{open_elements}->[$_]->[1] eq 'table') {
5220     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5221 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
5222     $foster_parent_element = $parent;
5223 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
5224 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
5225     } else {
5226 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5227 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
5228     }
5229     last OE;
5230     }
5231     } # OE
5232 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5233 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
5234     unless defined $foster_parent_element;
5235     if (defined $prev_sibling and
5236     $prev_sibling->node_type == 3) {
5237     $prev_sibling->manakai_append_text ($token->{data});
5238     } else {
5239     $foster_parent_element->insert_before
5240     ($self->{document}->create_text_node ($token->{data}),
5241     $next_sibling);
5242     }
5243     } else {
5244 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5245 wakaba 1.1 }
5246    
5247     $token = $self->_get_next_token;
5248     redo B;
5249     } elsif ($token->{type} eq 'comment') {
5250     ## Copied from 'in table'
5251     my $comment = $self->{document}->create_comment ($token->{data});
5252 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5253 wakaba 1.1 $token = $self->_get_next_token;
5254     redo B;
5255     } elsif ($token->{type} eq 'start tag') {
5256     if ({
5257     tr => 1,
5258     th => 1, td => 1,
5259     }->{$token->{tag_name}}) {
5260 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
5261     $self->{parse_error}-> (type => 'missing start tag:tr');
5262     }
5263    
5264 wakaba 1.1 ## Clear back to table body context
5265     while (not {
5266     tbody => 1, tfoot => 1, thead => 1, html => 1,
5267 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5268     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5269     pop @{$self->{open_elements}};
5270 wakaba 1.1 }
5271    
5272 wakaba 1.3 $self->{insertion_mode} = 'in row';
5273 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
5274    
5275     {
5276     my $el;
5277    
5278     $el = $self->{document}->create_element_ns
5279     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5280    
5281     for my $attr_name (keys %{ $token->{attributes}}) {
5282     $el->set_attribute_ns (undef, [undef, $attr_name],
5283     $token->{attributes} ->{$attr_name}->{value});
5284     }
5285    
5286 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5287     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5288 wakaba 1.1 }
5289    
5290     $token = $self->_get_next_token;
5291     } else {
5292    
5293     {
5294     my $el;
5295    
5296     $el = $self->{document}->create_element_ns
5297     (q<http://www.w3.org/1999/xhtml>, [undef, 'tr']);
5298    
5299 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5300     push @{$self->{open_elements}}, [$el, 'tr'];
5301 wakaba 1.1 }
5302    
5303     ## reprocess
5304     }
5305     redo B;
5306     } elsif ({
5307     caption => 1, col => 1, colgroup => 1,
5308     tbody => 1, tfoot => 1, thead => 1,
5309     }->{$token->{tag_name}}) {
5310     ## have an element in table scope
5311     my $i;
5312 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5313     my $node = $self->{open_elements}->[$_];
5314 wakaba 1.1 if ({
5315     tbody => 1, thead => 1, tfoot => 1,
5316     }->{$node->[1]}) {
5317     $i = $_;
5318     last INSCOPE;
5319     } elsif ({
5320     table => 1, html => 1,
5321     }->{$node->[1]}) {
5322     last INSCOPE;
5323     }
5324     } # INSCOPE
5325     unless (defined $i) {
5326 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5327 wakaba 1.1 ## Ignore the token
5328     $token = $self->_get_next_token;
5329     redo B;
5330     }
5331    
5332     ## Clear back to table body context
5333     while (not {
5334     tbody => 1, tfoot => 1, thead => 1, html => 1,
5335 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5336     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5337     pop @{$self->{open_elements}};
5338 wakaba 1.1 }
5339    
5340     ## As if <{current node}>
5341     ## have an element in table scope
5342     ## true by definition
5343    
5344     ## Clear back to table body context
5345     ## nop by definition
5346    
5347 wakaba 1.3 pop @{$self->{open_elements}};
5348     $self->{insertion_mode} = 'in table';
5349 wakaba 1.1 ## reprocess
5350     redo B;
5351     } elsif ($token->{tag_name} eq 'table') {
5352     ## NOTE: This is a code clone of "table in table"
5353 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:table');
5354 wakaba 1.1
5355     ## As if </table>
5356     ## have a table element in table scope
5357     my $i;
5358 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5359     my $node = $self->{open_elements}->[$_];
5360 wakaba 1.1 if ($node->[1] eq 'table') {
5361     $i = $_;
5362     last INSCOPE;
5363     } elsif ({
5364     table => 1, html => 1,
5365     }->{$node->[1]}) {
5366     last INSCOPE;
5367     }
5368     } # INSCOPE
5369     unless (defined $i) {
5370 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
5371 wakaba 1.1 ## Ignore tokens </table><table>
5372     $token = $self->_get_next_token;
5373     redo B;
5374     }
5375    
5376     ## generate implied end tags
5377     if ({
5378     dd => 1, dt => 1, li => 1, p => 1,
5379     td => 1, th => 1, tr => 1,
5380 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5381 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
5382     $token = {type => 'end tag', tag_name => 'table'};
5383     unshift @{$self->{token}}, $token;
5384     $token = {type => 'end tag',
5385 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5386 wakaba 1.1 redo B;
5387     }
5388    
5389 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5390     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5391 wakaba 1.1 }
5392    
5393 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5394 wakaba 1.1
5395 wakaba 1.3 $self->_reset_insertion_mode;
5396 wakaba 1.1
5397     ## reprocess
5398     redo B;
5399     } else {
5400     #
5401     }
5402     } elsif ($token->{type} eq 'end tag') {
5403     if ({
5404     tbody => 1, tfoot => 1, thead => 1,
5405     }->{$token->{tag_name}}) {
5406     ## have an element in table scope
5407     my $i;
5408 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5409     my $node = $self->{open_elements}->[$_];
5410 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5411     $i = $_;
5412     last INSCOPE;
5413     } elsif ({
5414     table => 1, html => 1,
5415     }->{$node->[1]}) {
5416     last INSCOPE;
5417     }
5418     } # INSCOPE
5419     unless (defined $i) {
5420 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5421 wakaba 1.1 ## Ignore the token
5422     $token = $self->_get_next_token;
5423     redo B;
5424     }
5425    
5426     ## Clear back to table body context
5427     while (not {
5428     tbody => 1, tfoot => 1, thead => 1, html => 1,
5429 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5430     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5431     pop @{$self->{open_elements}};
5432 wakaba 1.1 }
5433    
5434 wakaba 1.3 pop @{$self->{open_elements}};
5435     $self->{insertion_mode} = 'in table';
5436 wakaba 1.1 $token = $self->_get_next_token;
5437     redo B;
5438     } elsif ($token->{tag_name} eq 'table') {
5439     ## have an element in table scope
5440     my $i;
5441 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5442     my $node = $self->{open_elements}->[$_];
5443 wakaba 1.1 if ({
5444     tbody => 1, thead => 1, tfoot => 1,
5445     }->{$node->[1]}) {
5446     $i = $_;
5447     last INSCOPE;
5448     } elsif ({
5449     table => 1, html => 1,
5450     }->{$node->[1]}) {
5451     last INSCOPE;
5452     }
5453     } # INSCOPE
5454     unless (defined $i) {
5455 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5456 wakaba 1.1 ## Ignore the token
5457     $token = $self->_get_next_token;
5458     redo B;
5459     }
5460    
5461     ## Clear back to table body context
5462     while (not {
5463     tbody => 1, tfoot => 1, thead => 1, html => 1,
5464 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5465     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5466     pop @{$self->{open_elements}};
5467 wakaba 1.1 }
5468    
5469     ## As if <{current node}>
5470     ## have an element in table scope
5471     ## true by definition
5472    
5473     ## Clear back to table body context
5474     ## nop by definition
5475    
5476 wakaba 1.3 pop @{$self->{open_elements}};
5477     $self->{insertion_mode} = 'in table';
5478 wakaba 1.1 ## reprocess
5479     redo B;
5480     } elsif ({
5481     body => 1, caption => 1, col => 1, colgroup => 1,
5482     html => 1, td => 1, th => 1, tr => 1,
5483     }->{$token->{tag_name}}) {
5484 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5485 wakaba 1.1 ## Ignore the token
5486     $token = $self->_get_next_token;
5487     redo B;
5488     } else {
5489     #
5490     }
5491     } else {
5492     #
5493     }
5494    
5495     ## As if in table
5496 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
5497 wakaba 1.1 $in_body->($insert_to_foster);
5498     redo B;
5499 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
5500 wakaba 1.1 if ($token->{type} eq 'character') {
5501     ## NOTE: This is a "character in table" code clone.
5502     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5503 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5504 wakaba 1.1
5505     unless (length $token->{data}) {
5506     $token = $self->_get_next_token;
5507     redo B;
5508     }
5509     }
5510    
5511 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
5512    
5513 wakaba 1.1 ## As if in body, but insert into foster parent element
5514     ## ISSUE: Spec says that "whenever a node would be inserted
5515     ## into the current node" while characters might not be
5516     ## result in a new Text node.
5517     $reconstruct_active_formatting_elements->($insert_to_foster);
5518    
5519     if ({
5520     table => 1, tbody => 1, tfoot => 1,
5521     thead => 1, tr => 1,
5522 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5523 wakaba 1.1 # MUST
5524     my $foster_parent_element;
5525     my $next_sibling;
5526     my $prev_sibling;
5527 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
5528     if ($self->{open_elements}->[$_]->[1] eq 'table') {
5529     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5530 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
5531     $foster_parent_element = $parent;
5532 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
5533 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
5534     } else {
5535 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5536 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
5537     }
5538     last OE;
5539     }
5540     } # OE
5541 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5542 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
5543     unless defined $foster_parent_element;
5544     if (defined $prev_sibling and
5545     $prev_sibling->node_type == 3) {
5546     $prev_sibling->manakai_append_text ($token->{data});
5547     } else {
5548     $foster_parent_element->insert_before
5549     ($self->{document}->create_text_node ($token->{data}),
5550     $next_sibling);
5551     }
5552     } else {
5553 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5554 wakaba 1.1 }
5555    
5556     $token = $self->_get_next_token;
5557     redo B;
5558     } elsif ($token->{type} eq 'comment') {
5559     ## Copied from 'in table'
5560     my $comment = $self->{document}->create_comment ($token->{data});
5561 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5562 wakaba 1.1 $token = $self->_get_next_token;
5563     redo B;
5564     } elsif ($token->{type} eq 'start tag') {
5565     if ($token->{tag_name} eq 'th' or
5566     $token->{tag_name} eq 'td') {
5567     ## Clear back to table row context
5568     while (not {
5569     tr => 1, html => 1,
5570 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5571     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5572     pop @{$self->{open_elements}};
5573 wakaba 1.1 }
5574    
5575    
5576     {
5577     my $el;
5578    
5579     $el = $self->{document}->create_element_ns
5580     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5581    
5582     for my $attr_name (keys %{ $token->{attributes}}) {
5583     $el->set_attribute_ns (undef, [undef, $attr_name],
5584     $token->{attributes} ->{$attr_name}->{value});
5585     }
5586    
5587 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5588     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5589 wakaba 1.1 }
5590    
5591 wakaba 1.3 $self->{insertion_mode} = 'in cell';
5592 wakaba 1.1
5593     push @$active_formatting_elements, ['#marker', ''];
5594    
5595     $token = $self->_get_next_token;
5596     redo B;
5597     } elsif ({
5598     caption => 1, col => 1, colgroup => 1,
5599     tbody => 1, tfoot => 1, thead => 1, tr => 1,
5600     }->{$token->{tag_name}}) {
5601     ## As if </tr>
5602     ## have an element in table scope
5603     my $i;
5604 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5605     my $node = $self->{open_elements}->[$_];
5606 wakaba 1.1 if ($node->[1] eq 'tr') {
5607     $i = $_;
5608     last INSCOPE;
5609     } elsif ({
5610     table => 1, html => 1,
5611     }->{$node->[1]}) {
5612     last INSCOPE;
5613     }
5614     } # INSCOPE
5615     unless (defined $i) {
5616 wakaba 1.3 $self->{parse_error}-> (type => 'unmacthed end tag:'.$token->{tag_name});
5617 wakaba 1.1 ## Ignore the token
5618     $token = $self->_get_next_token;
5619     redo B;
5620     }
5621    
5622     ## Clear back to table row context
5623     while (not {
5624     tr => 1, html => 1,
5625 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5626     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5627     pop @{$self->{open_elements}};
5628 wakaba 1.1 }
5629    
5630 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5631     $self->{insertion_mode} = 'in table body';
5632 wakaba 1.1 ## reprocess
5633     redo B;
5634     } elsif ($token->{tag_name} eq 'table') {
5635     ## NOTE: This is a code clone of "table in table"
5636 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:table');
5637 wakaba 1.1
5638     ## As if </table>
5639     ## have a table element in table scope
5640     my $i;
5641 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5642     my $node = $self->{open_elements}->[$_];
5643 wakaba 1.1 if ($node->[1] eq 'table') {
5644     $i = $_;
5645     last INSCOPE;
5646     } elsif ({
5647     table => 1, html => 1,
5648     }->{$node->[1]}) {
5649     last INSCOPE;
5650     }
5651     } # INSCOPE
5652     unless (defined $i) {
5653 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
5654 wakaba 1.1 ## Ignore tokens </table><table>
5655     $token = $self->_get_next_token;
5656     redo B;
5657     }
5658    
5659     ## generate implied end tags
5660     if ({
5661     dd => 1, dt => 1, li => 1, p => 1,
5662     td => 1, th => 1, tr => 1,
5663 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5664 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
5665     $token = {type => 'end tag', tag_name => 'table'};
5666     unshift @{$self->{token}}, $token;
5667     $token = {type => 'end tag',
5668 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5669 wakaba 1.1 redo B;
5670     }
5671    
5672 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5673     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5674 wakaba 1.1 }
5675    
5676 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5677 wakaba 1.1
5678 wakaba 1.3 $self->_reset_insertion_mode;
5679 wakaba 1.1
5680     ## reprocess
5681     redo B;
5682     } else {
5683     #
5684     }
5685     } elsif ($token->{type} eq 'end tag') {
5686     if ($token->{tag_name} eq 'tr') {
5687     ## have an element in table scope
5688     my $i;
5689 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5690     my $node = $self->{open_elements}->[$_];
5691 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5692     $i = $_;
5693     last INSCOPE;
5694     } elsif ({
5695     table => 1, html => 1,
5696     }->{$node->[1]}) {
5697     last INSCOPE;
5698     }
5699     } # INSCOPE
5700     unless (defined $i) {
5701 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5702 wakaba 1.1 ## Ignore the token
5703     $token = $self->_get_next_token;
5704     redo B;
5705     }
5706    
5707     ## Clear back to table row context
5708     while (not {
5709     tr => 1, html => 1,
5710 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5711     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5712     pop @{$self->{open_elements}};
5713 wakaba 1.1 }
5714    
5715 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5716     $self->{insertion_mode} = 'in table body';
5717 wakaba 1.1 $token = $self->_get_next_token;
5718     redo B;
5719     } elsif ($token->{tag_name} eq 'table') {
5720     ## As if </tr>
5721     ## have an element in table scope
5722     my $i;
5723 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5724     my $node = $self->{open_elements}->[$_];
5725 wakaba 1.1 if ($node->[1] eq 'tr') {
5726     $i = $_;
5727     last INSCOPE;
5728     } elsif ({
5729     table => 1, html => 1,
5730     }->{$node->[1]}) {
5731     last INSCOPE;
5732     }
5733     } # INSCOPE
5734     unless (defined $i) {
5735 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{type});
5736 wakaba 1.1 ## Ignore the token
5737     $token = $self->_get_next_token;
5738     redo B;
5739     }
5740    
5741     ## Clear back to table row context
5742     while (not {
5743     tr => 1, html => 1,
5744 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5745     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5746     pop @{$self->{open_elements}};
5747 wakaba 1.1 }
5748    
5749 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5750     $self->{insertion_mode} = 'in table body';
5751 wakaba 1.1 ## reprocess
5752     redo B;
5753     } elsif ({
5754     tbody => 1, tfoot => 1, thead => 1,
5755     }->{$token->{tag_name}}) {
5756     ## have an element in table scope
5757     my $i;
5758 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5759     my $node = $self->{open_elements}->[$_];
5760 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5761     $i = $_;
5762     last INSCOPE;
5763     } elsif ({
5764     table => 1, html => 1,
5765     }->{$node->[1]}) {
5766     last INSCOPE;
5767     }
5768     } # INSCOPE
5769     unless (defined $i) {
5770 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5771 wakaba 1.1 ## Ignore the token
5772     $token = $self->_get_next_token;
5773     redo B;
5774     }
5775    
5776     ## As if </tr>
5777     ## have an element in table scope
5778     my $i;
5779 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5780     my $node = $self->{open_elements}->[$_];
5781 wakaba 1.1 if ($node->[1] eq 'tr') {
5782     $i = $_;
5783     last INSCOPE;
5784     } elsif ({
5785     table => 1, html => 1,
5786     }->{$node->[1]}) {
5787     last INSCOPE;
5788     }
5789     } # INSCOPE
5790     unless (defined $i) {
5791 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:tr');
5792 wakaba 1.1 ## Ignore the token
5793     $token = $self->_get_next_token;
5794     redo B;
5795     }
5796    
5797     ## Clear back to table row context
5798     while (not {
5799     tr => 1, html => 1,
5800 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5801     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5802     pop @{$self->{open_elements}};
5803 wakaba 1.1 }
5804    
5805 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5806     $self->{insertion_mode} = 'in table body';
5807 wakaba 1.1 ## reprocess
5808     redo B;
5809     } elsif ({
5810     body => 1, caption => 1, col => 1,
5811     colgroup => 1, html => 1, td => 1, th => 1,
5812     }->{$token->{tag_name}}) {
5813 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5814 wakaba 1.1 ## Ignore the token
5815     $token = $self->_get_next_token;
5816     redo B;
5817     } else {
5818     #
5819     }
5820     } else {
5821     #
5822     }
5823    
5824     ## As if in table
5825 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
5826 wakaba 1.1 $in_body->($insert_to_foster);
5827     redo B;
5828 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
5829 wakaba 1.1 if ($token->{type} eq 'character') {
5830     ## NOTE: This is a code clone of "character in body".
5831     $reconstruct_active_formatting_elements->($insert_to_current);
5832    
5833 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5834 wakaba 1.1
5835     $token = $self->_get_next_token;
5836     redo B;
5837     } elsif ($token->{type} eq 'comment') {
5838     ## NOTE: This is a code clone of "comment in body".
5839     my $comment = $self->{document}->create_comment ($token->{data});
5840 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5841 wakaba 1.1 $token = $self->_get_next_token;
5842     redo B;
5843     } elsif ($token->{type} eq 'start tag') {
5844     if ({
5845     caption => 1, col => 1, colgroup => 1,
5846     tbody => 1, td => 1, tfoot => 1, th => 1,
5847     thead => 1, tr => 1,
5848     }->{$token->{tag_name}}) {
5849     ## have an element in table scope
5850     my $tn;
5851 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5852     my $node = $self->{open_elements}->[$_];
5853 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
5854     $tn = $node->[1];
5855     last INSCOPE;
5856     } elsif ({
5857     table => 1, html => 1,
5858     }->{$node->[1]}) {
5859     last INSCOPE;
5860     }
5861     } # INSCOPE
5862     unless (defined $tn) {
5863 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5864 wakaba 1.1 ## Ignore the token
5865     $token = $self->_get_next_token;
5866     redo B;
5867     }
5868    
5869     ## Close the cell
5870     unshift @{$self->{token}}, $token; # <?>
5871     $token = {type => 'end tag', tag_name => $tn};
5872     redo B;
5873     } else {
5874     #
5875     }
5876     } elsif ($token->{type} eq 'end tag') {
5877     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
5878     ## have an element in table scope
5879     my $i;
5880 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5881     my $node = $self->{open_elements}->[$_];
5882 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5883     $i = $_;
5884     last INSCOPE;
5885     } elsif ({
5886     table => 1, html => 1,
5887     }->{$node->[1]}) {
5888     last INSCOPE;
5889     }
5890     } # INSCOPE
5891     unless (defined $i) {
5892 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5893 wakaba 1.1 ## Ignore the token
5894     $token = $self->_get_next_token;
5895     redo B;
5896     }
5897    
5898     ## generate implied end tags
5899     if ({
5900     dd => 1, dt => 1, li => 1, p => 1,
5901     td => ($token->{tag_name} eq 'th'),
5902     th => ($token->{tag_name} eq 'td'),
5903     tr => 1,
5904 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5905 wakaba 1.1 unshift @{$self->{token}}, $token;
5906     $token = {type => 'end tag',
5907 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5908 wakaba 1.1 redo B;
5909     }
5910    
5911 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
5912     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5913 wakaba 1.1 }
5914    
5915 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5916 wakaba 1.1
5917     $clear_up_to_marker->();
5918    
5919 wakaba 1.3 $self->{insertion_mode} = 'in row';
5920 wakaba 1.1
5921     $token = $self->_get_next_token;
5922     redo B;
5923     } elsif ({
5924     body => 1, caption => 1, col => 1,
5925     colgroup => 1, html => 1,
5926     }->{$token->{tag_name}}) {
5927 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5928 wakaba 1.1 ## Ignore the token
5929     $token = $self->_get_next_token;
5930     redo B;
5931     } elsif ({
5932     table => 1, tbody => 1, tfoot => 1,
5933     thead => 1, tr => 1,
5934     }->{$token->{tag_name}}) {
5935     ## have an element in table scope
5936     my $i;
5937     my $tn;
5938 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5939     my $node = $self->{open_elements}->[$_];
5940 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5941     $i = $_;
5942     last INSCOPE;
5943     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
5944     $tn = $node->[1];
5945     ## NOTE: There is exactly one |td| or |th| element
5946     ## in scope in the stack of open elements by definition.
5947     } elsif ({
5948     table => 1, html => 1,
5949     }->{$node->[1]}) {
5950     last INSCOPE;
5951     }
5952     } # INSCOPE
5953     unless (defined $i) {
5954 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5955 wakaba 1.1 ## Ignore the token
5956     $token = $self->_get_next_token;
5957     redo B;
5958     }
5959    
5960     ## Close the cell
5961     unshift @{$self->{token}}, $token; # </?>
5962     $token = {type => 'end tag', tag_name => $tn};
5963     redo B;
5964     } else {
5965     #
5966     }
5967     } else {
5968     #
5969     }
5970    
5971     $in_body->($insert_to_current);
5972     redo B;
5973 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
5974 wakaba 1.1 if ($token->{type} eq 'character') {
5975 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5976 wakaba 1.1 $token = $self->_get_next_token;
5977     redo B;
5978     } elsif ($token->{type} eq 'comment') {
5979     my $comment = $self->{document}->create_comment ($token->{data});
5980 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5981 wakaba 1.1 $token = $self->_get_next_token;
5982     redo B;
5983     } elsif ($token->{type} eq 'start tag') {
5984     if ($token->{tag_name} eq 'option') {
5985 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
5986 wakaba 1.1 ## As if </option>
5987 wakaba 1.3 pop @{$self->{open_elements}};
5988 wakaba 1.1 }
5989    
5990    
5991     {
5992     my $el;
5993    
5994     $el = $self->{document}->create_element_ns
5995     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5996    
5997     for my $attr_name (keys %{ $token->{attributes}}) {
5998     $el->set_attribute_ns (undef, [undef, $attr_name],
5999     $token->{attributes} ->{$attr_name}->{value});
6000     }
6001    
6002 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6003     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6004 wakaba 1.1 }
6005    
6006     $token = $self->_get_next_token;
6007     redo B;
6008     } elsif ($token->{tag_name} eq 'optgroup') {
6009 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6010 wakaba 1.1 ## As if </option>
6011 wakaba 1.3 pop @{$self->{open_elements}};
6012 wakaba 1.1 }
6013    
6014 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
6015 wakaba 1.1 ## As if </optgroup>
6016 wakaba 1.3 pop @{$self->{open_elements}};
6017 wakaba 1.1 }
6018    
6019    
6020     {
6021     my $el;
6022    
6023     $el = $self->{document}->create_element_ns
6024     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6025    
6026     for my $attr_name (keys %{ $token->{attributes}}) {
6027     $el->set_attribute_ns (undef, [undef, $attr_name],
6028     $token->{attributes} ->{$attr_name}->{value});
6029     }
6030    
6031 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6032     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6033 wakaba 1.1 }
6034    
6035     $token = $self->_get_next_token;
6036     redo B;
6037     } elsif ($token->{tag_name} eq 'select') {
6038 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:select');
6039 wakaba 1.1 ## As if </select> instead
6040     ## have an element in table scope
6041     my $i;
6042 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6043     my $node = $self->{open_elements}->[$_];
6044 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6045     $i = $_;
6046     last INSCOPE;
6047     } elsif ({
6048     table => 1, html => 1,
6049     }->{$node->[1]}) {
6050     last INSCOPE;
6051     }
6052     } # INSCOPE
6053     unless (defined $i) {
6054 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:select');
6055 wakaba 1.1 ## Ignore the token
6056     $token = $self->_get_next_token;
6057     redo B;
6058     }
6059    
6060 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6061 wakaba 1.1
6062 wakaba 1.3 $self->_reset_insertion_mode;
6063 wakaba 1.1
6064     $token = $self->_get_next_token;
6065     redo B;
6066     } else {
6067     #
6068     }
6069     } elsif ($token->{type} eq 'end tag') {
6070     if ($token->{tag_name} eq 'optgroup') {
6071 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
6072     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
6073 wakaba 1.1 ## As if </option>
6074 wakaba 1.3 splice @{$self->{open_elements}}, -2;
6075     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
6076     pop @{$self->{open_elements}};
6077 wakaba 1.1 } else {
6078 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6079 wakaba 1.1 ## Ignore the token
6080     }
6081     $token = $self->_get_next_token;
6082     redo B;
6083     } elsif ($token->{tag_name} eq 'option') {
6084 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6085     pop @{$self->{open_elements}};
6086 wakaba 1.1 } else {
6087 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6088 wakaba 1.1 ## Ignore the token
6089     }
6090     $token = $self->_get_next_token;
6091     redo B;
6092     } elsif ($token->{tag_name} eq 'select') {
6093     ## have an element in table scope
6094     my $i;
6095 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6096     my $node = $self->{open_elements}->[$_];
6097 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6098     $i = $_;
6099     last INSCOPE;
6100     } elsif ({
6101     table => 1, html => 1,
6102     }->{$node->[1]}) {
6103     last INSCOPE;
6104     }
6105     } # INSCOPE
6106     unless (defined $i) {
6107 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6108 wakaba 1.1 ## Ignore the token
6109     $token = $self->_get_next_token;
6110     redo B;
6111     }
6112    
6113 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6114 wakaba 1.1
6115 wakaba 1.3 $self->_reset_insertion_mode;
6116 wakaba 1.1
6117     $token = $self->_get_next_token;
6118     redo B;
6119     } elsif ({
6120     caption => 1, table => 1, tbody => 1,
6121     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6122     }->{$token->{tag_name}}) {
6123 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6124 wakaba 1.1
6125     ## have an element in table scope
6126     my $i;
6127 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6128     my $node = $self->{open_elements}->[$_];
6129 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6130     $i = $_;
6131     last INSCOPE;
6132     } elsif ({
6133     table => 1, html => 1,
6134     }->{$node->[1]}) {
6135     last INSCOPE;
6136     }
6137     } # INSCOPE
6138     unless (defined $i) {
6139     ## Ignore the token
6140     $token = $self->_get_next_token;
6141     redo B;
6142     }
6143    
6144     ## As if </select>
6145     ## have an element in table scope
6146     undef $i;
6147 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6148     my $node = $self->{open_elements}->[$_];
6149 wakaba 1.1 if ($node->[1] eq 'select') {
6150     $i = $_;
6151     last INSCOPE;
6152     } elsif ({
6153     table => 1, html => 1,
6154     }->{$node->[1]}) {
6155     last INSCOPE;
6156     }
6157     } # INSCOPE
6158     unless (defined $i) {
6159 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:select');
6160 wakaba 1.1 ## Ignore the </select> token
6161     $token = $self->_get_next_token; ## TODO: ok?
6162     redo B;
6163     }
6164    
6165 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6166 wakaba 1.1
6167 wakaba 1.3 $self->_reset_insertion_mode;
6168 wakaba 1.1
6169     ## reprocess
6170     redo B;
6171     } else {
6172     #
6173     }
6174     } else {
6175     #
6176     }
6177    
6178 wakaba 1.3 $self->{parse_error}-> (type => 'in select:'.$token->{tag_name});
6179 wakaba 1.1 ## Ignore the token
6180     $token = $self->_get_next_token;
6181     redo B;
6182 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
6183 wakaba 1.1 if ($token->{type} eq 'character') {
6184     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6185     ## As if in body
6186     $reconstruct_active_formatting_elements->($insert_to_current);
6187    
6188 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6189 wakaba 1.1
6190     unless (length $token->{data}) {
6191     $token = $self->_get_next_token;
6192     redo B;
6193     }
6194     }
6195    
6196     #
6197 wakaba 1.3 $self->{parse_error}-> (type => 'after body:#'.$token->{type});
6198 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
6199     my $comment = $self->{document}->create_comment ($token->{data});
6200 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
6201 wakaba 1.1 $token = $self->_get_next_token;
6202     redo B;
6203 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
6204     $self->{parse_error}-> (type => 'after body:'.$token->{tag_name});
6205     #
6206 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
6207     if ($token->{tag_name} eq 'html') {
6208 wakaba 1.3 if (defined $self->{inner_html_node}) {
6209     $self->{parse_error}-> (type => 'unmatched end tag:html');
6210     ## Ignore the token
6211     $token = $self->_get_next_token;
6212     redo B;
6213     } else {
6214     $phase = 'trailing end';
6215     $token = $self->_get_next_token;
6216     redo B;
6217     }
6218 wakaba 1.1 } else {
6219 wakaba 1.3 $self->{parse_error}-> (type => 'after body:/'.$token->{tag_name});
6220 wakaba 1.1 }
6221     } else {
6222 wakaba 1.3 $self->{parse_error}-> (type => 'after body:#'.$token->{type});
6223 wakaba 1.1 }
6224    
6225 wakaba 1.3 $self->{insertion_mode} = 'in body';
6226 wakaba 1.1 ## reprocess
6227     redo B;
6228 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
6229 wakaba 1.1 if ($token->{type} eq 'character') {
6230     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6231 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6232 wakaba 1.1
6233     unless (length $token->{data}) {
6234     $token = $self->_get_next_token;
6235     redo B;
6236     }
6237     }
6238    
6239     #
6240     } elsif ($token->{type} eq 'comment') {
6241     my $comment = $self->{document}->create_comment ($token->{data});
6242 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
6243 wakaba 1.1 $token = $self->_get_next_token;
6244     redo B;
6245     } elsif ($token->{type} eq 'start tag') {
6246     if ($token->{tag_name} eq 'frameset') {
6247    
6248     {
6249     my $el;
6250    
6251     $el = $self->{document}->create_element_ns
6252     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6253    
6254     for my $attr_name (keys %{ $token->{attributes}}) {
6255     $el->set_attribute_ns (undef, [undef, $attr_name],
6256     $token->{attributes} ->{$attr_name}->{value});
6257     }
6258    
6259 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6260     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6261 wakaba 1.1 }
6262    
6263     $token = $self->_get_next_token;
6264     redo B;
6265     } elsif ($token->{tag_name} eq 'frame') {
6266    
6267     {
6268     my $el;
6269    
6270     $el = $self->{document}->create_element_ns
6271     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6272    
6273     for my $attr_name (keys %{ $token->{attributes}}) {
6274     $el->set_attribute_ns (undef, [undef, $attr_name],
6275     $token->{attributes} ->{$attr_name}->{value});
6276     }
6277    
6278 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6279     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6280 wakaba 1.1 }
6281    
6282 wakaba 1.3 pop @{$self->{open_elements}};
6283 wakaba 1.1 $token = $self->_get_next_token;
6284     redo B;
6285     } elsif ($token->{tag_name} eq 'noframes') {
6286     $in_body->($insert_to_current);
6287     redo B;
6288     } else {
6289     #
6290     }
6291     } elsif ($token->{type} eq 'end tag') {
6292     if ($token->{tag_name} eq 'frameset') {
6293 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
6294     @{$self->{open_elements}} == 1) {
6295     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6296 wakaba 1.1 ## Ignore the token
6297     $token = $self->_get_next_token;
6298     } else {
6299 wakaba 1.3 pop @{$self->{open_elements}};
6300 wakaba 1.1 $token = $self->_get_next_token;
6301     }
6302    
6303     ## if not inner_html and
6304 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
6305     $self->{insertion_mode} = 'after frameset';
6306 wakaba 1.1 }
6307     redo B;
6308     } else {
6309     #
6310     }
6311     } else {
6312     #
6313     }
6314    
6315 wakaba 1.3 if (defined $token->{tag_name}) {
6316     $self->{parse_error}-> (type => 'in frameset:'.$token->{tag_name});
6317     } else {
6318     $self->{parse_error}-> (type => 'in frameset:#'.$token->{type});
6319     }
6320 wakaba 1.1 ## Ignore the token
6321     $token = $self->_get_next_token;
6322     redo B;
6323 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
6324 wakaba 1.1 if ($token->{type} eq 'character') {
6325     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6326 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6327 wakaba 1.1
6328     unless (length $token->{data}) {
6329     $token = $self->_get_next_token;
6330     redo B;
6331     }
6332     }
6333    
6334     #
6335     } elsif ($token->{type} eq 'comment') {
6336     my $comment = $self->{document}->create_comment ($token->{data});
6337 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
6338 wakaba 1.1 $token = $self->_get_next_token;
6339     redo B;
6340     } elsif ($token->{type} eq 'start tag') {
6341     if ($token->{tag_name} eq 'noframes') {
6342     $in_body->($insert_to_current);
6343     redo B;
6344     } else {
6345     #
6346     }
6347     } elsif ($token->{type} eq 'end tag') {
6348     if ($token->{tag_name} eq 'html') {
6349     $phase = 'trailing end';
6350     $token = $self->_get_next_token;
6351     redo B;
6352     } else {
6353     #
6354     }
6355     } else {
6356     #
6357     }
6358    
6359 wakaba 1.3 if (defined $token->{tag_name}) {
6360     $self->{parse_error}-> (type => 'after frameset:'.$token->{tag_name});
6361     } else {
6362     $self->{parse_error}-> (type => 'after frameset:#'.$token->{type});
6363     }
6364 wakaba 1.1 ## Ignore the token
6365     $token = $self->_get_next_token;
6366     redo B;
6367    
6368     ## ISSUE: An issue in spec there
6369     } else {
6370 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
6371 wakaba 1.1 }
6372     }
6373     } elsif ($phase eq 'trailing end') {
6374     ## states in the main stage is preserved yet # MUST
6375    
6376     if ($token->{type} eq 'DOCTYPE') {
6377 wakaba 1.3 $self->{parse_error}-> (type => 'after html:#DOCTYPE');
6378 wakaba 1.1 ## Ignore the token
6379     $token = $self->_get_next_token;
6380     redo B;
6381     } elsif ($token->{type} eq 'comment') {
6382     my $comment = $self->{document}->create_comment ($token->{data});
6383     $self->{document}->append_child ($comment);
6384     $token = $self->_get_next_token;
6385     redo B;
6386     } elsif ($token->{type} eq 'character') {
6387     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6388     my $data = $1;
6389     ## As if in the main phase.
6390     ## NOTE: The insertion mode in the main phase
6391     ## just before the phase has been changed to the trailing
6392     ## end phase is either "after body" or "after frameset".
6393     $reconstruct_active_formatting_elements->($insert_to_current)
6394     if $phase eq 'main';
6395    
6396 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
6397 wakaba 1.1
6398     unless (length $token->{data}) {
6399     $token = $self->_get_next_token;
6400     redo B;
6401     }
6402     }
6403    
6404 wakaba 1.3 $self->{parse_error}-> (type => 'after html:#character');
6405 wakaba 1.1 $phase = 'main';
6406     ## reprocess
6407     redo B;
6408     } elsif ($token->{type} eq 'start tag' or
6409     $token->{type} eq 'end tag') {
6410 wakaba 1.3 $self->{parse_error}-> (type => 'after html:'.$token->{tag_name});
6411 wakaba 1.1 $phase = 'main';
6412     ## reprocess
6413     redo B;
6414     } elsif ($token->{type} eq 'end-of-file') {
6415     ## Stop parsing
6416     last B;
6417     } else {
6418     die "$0: $token->{type}: Unknown token";
6419     }
6420     }
6421     } # B
6422    
6423     ## Stop parsing # MUST
6424    
6425     ## TODO: script stuffs
6426 wakaba 1.3 } # _tree_construct_main
6427    
6428     sub set_inner_html ($$$) {
6429     my $class = shift;
6430     my $node = shift;
6431     my $s = \$_[0];
6432     my $onerror = $_[1];
6433    
6434     my $nt = $node->node_type;
6435     if ($nt == 9) {
6436     # MUST
6437    
6438     ## Step 1 # MUST
6439     ## TODO: If the document has an active parser, ...
6440     ## ISSUE: There is an issue in the spec.
6441    
6442     ## Step 2 # MUST
6443     my @cn = @{$node->child_nodes};
6444     for (@cn) {
6445     $node->remove_child ($_);
6446     }
6447    
6448     ## Step 3, 4, 5 # MUST
6449     $class->parse_string ($$s => $node, $onerror);
6450     } elsif ($nt == 1) {
6451     ## TODO: If non-html element
6452    
6453     ## NOTE: Most of this code is copied from |parse_string|
6454    
6455     ## Step 1 # MUST
6456 wakaba 1.14 my $this_doc = $node->owner_document;
6457     my $doc = $this_doc->implementation->create_document;
6458 wakaba 1.18 $doc->manakai_is_html (1);
6459 wakaba 1.3 my $p = $class->new;
6460     $p->{document} = $doc;
6461    
6462     ## Step 9 # MUST
6463     my $i = 0;
6464     my $line = 1;
6465     my $column = 0;
6466     $p->{set_next_input_character} = sub {
6467     my $self = shift;
6468 wakaba 1.14
6469     pop @{$self->{prev_input_character}};
6470     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
6471    
6472 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
6473     $self->{next_input_character} = ord substr $$s, $i++, 1;
6474     $column++;
6475 wakaba 1.4
6476     if ($self->{next_input_character} == 0x000A) { # LF
6477     $line++;
6478     $column = 0;
6479     } elsif ($self->{next_input_character} == 0x000D) { # CR
6480 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6481 wakaba 1.3 $self->{next_input_character} = 0x000A; # LF # MUST
6482     $line++;
6483 wakaba 1.4 $column = 0;
6484 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
6485     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6486     } elsif ($self->{next_input_character} == 0x0000) { # NULL
6487 wakaba 1.14 $self->{parse_error}-> (type => 'NULL');
6488 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6489     }
6490     };
6491 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
6492     $p->{next_input_character} = -1;
6493 wakaba 1.3
6494     my $ponerror = $onerror || sub {
6495     my (%opt) = @_;
6496     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
6497     };
6498     $p->{parse_error} = sub {
6499     $ponerror->(@_, line => $line, column => $column);
6500     };
6501    
6502     $p->_initialize_tokenizer;
6503     $p->_initialize_tree_constructor;
6504    
6505     ## Step 2
6506     my $node_ln = $node->local_name;
6507     $p->{content_model_flag} = {
6508     title => 'RCDATA',
6509     textarea => 'RCDATA',
6510     style => 'CDATA',
6511     script => 'CDATA',
6512     xmp => 'CDATA',
6513     iframe => 'CDATA',
6514     noembed => 'CDATA',
6515     noframes => 'CDATA',
6516     noscript => 'CDATA',
6517     plaintext => 'PLAINTEXT',
6518     }->{$node_ln} || 'PCDATA';
6519     ## ISSUE: What is "the name of the element"? local name?
6520    
6521     $p->{inner_html_node} = [$node, $node_ln];
6522    
6523     ## Step 4
6524     my $root = $doc->create_element_ns
6525     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6526    
6527     ## Step 5 # MUST
6528     $doc->append_child ($root);
6529    
6530     ## Step 6 # MUST
6531     push @{$p->{open_elements}}, [$root, 'html'];
6532    
6533     undef $p->{head_element};
6534    
6535     ## Step 7 # MUST
6536     $p->_reset_insertion_mode;
6537    
6538     ## Step 8 # MUST
6539     my $anode = $node;
6540     AN: while (defined $anode) {
6541     if ($anode->node_type == 1) {
6542     my $nsuri = $anode->namespace_uri;
6543     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6544     if ($anode->local_name eq 'form') { ## TODO: case?
6545     $p->{form_element} = $anode;
6546     last AN;
6547     }
6548     }
6549     }
6550     $anode = $anode->parent_node;
6551     } # AN
6552    
6553     ## Step 3 # MUST
6554     ## Step 10 # MUST
6555     {
6556     my $self = $p;
6557     $token = $self->_get_next_token;
6558     }
6559     $p->_tree_construction_main;
6560    
6561     ## Step 11 # MUST
6562     my @cn = @{$node->child_nodes};
6563     for (@cn) {
6564     $node->remove_child ($_);
6565     }
6566     ## ISSUE: mutation events? read-only?
6567    
6568     ## Step 12 # MUST
6569     @cn = @{$root->child_nodes};
6570     for (@cn) {
6571 wakaba 1.14 $this_doc->adopt_node ($_);
6572 wakaba 1.3 $node->append_child ($_);
6573     }
6574 wakaba 1.14 ## ISSUE: mutation events?
6575 wakaba 1.3
6576     $p->_terminate_tree_constructor;
6577     } else {
6578     die "$0: |set_inner_html| is not defined for node of type $nt";
6579     }
6580     } # set_inner_html
6581    
6582     } # tree construction stage
6583 wakaba 1.1
6584     sub get_inner_html ($$$) {
6585 wakaba 1.3 my (undef, $node, $on_error) = @_;
6586 wakaba 1.1
6587     ## Step 1
6588     my $s = '';
6589    
6590     my $in_cdata;
6591     my $parent = $node;
6592     while (defined $parent) {
6593     if ($parent->node_type == 1 and
6594     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
6595     {
6596     style => 1, script => 1, xmp => 1, iframe => 1,
6597     noembed => 1, noframes => 1, noscript => 1,
6598     }->{$parent->local_name}) { ## TODO: case thingy
6599     $in_cdata = 1;
6600     }
6601     $parent = $parent->parent_node;
6602     }
6603    
6604     ## Step 2
6605     my @node = @{$node->child_nodes};
6606     C: while (@node) {
6607     my $child = shift @node;
6608     unless (ref $child) {
6609     if ($child eq 'cdata-out') {
6610     $in_cdata = 0;
6611     } else {
6612     $s .= $child; # end tag
6613     }
6614     next C;
6615     }
6616    
6617     my $nt = $child->node_type;
6618     if ($nt == 1) { # Element
6619 wakaba 1.27 my $tag_name = $child->tag_name; ## TODO: manakai_tag_name
6620 wakaba 1.1 $s .= '<' . $tag_name;
6621 wakaba 1.27 ## NOTE: Non-HTML case:
6622     ## <http://permalink.gmane.org/gmane.org.w3c.whatwg.discuss/11191>
6623 wakaba 1.1
6624     my @attrs = @{$child->attributes}; # sort order MUST be stable
6625     for my $attr (@attrs) { # order is implementation dependent
6626 wakaba 1.27 my $attr_name = $attr->name; ## TODO: manakai_name
6627 wakaba 1.1 $s .= ' ' . $attr_name . '="';
6628     my $attr_value = $attr->value;
6629     ## escape
6630     $attr_value =~ s/&/&amp;/g;
6631     $attr_value =~ s/</&lt;/g;
6632     $attr_value =~ s/>/&gt;/g;
6633     $attr_value =~ s/"/&quot;/g;
6634     $s .= $attr_value . '"';
6635     }
6636     $s .= '>';
6637    
6638     next C if {
6639     area => 1, base => 1, basefont => 1, bgsound => 1,
6640     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
6641     img => 1, input => 1, link => 1, meta => 1, param => 1,
6642     spacer => 1, wbr => 1,
6643     }->{$tag_name};
6644    
6645 wakaba 1.23 $s .= "\x0A" if $tag_name eq 'pre' or $tag_name eq 'textarea';
6646    
6647 wakaba 1.1 if (not $in_cdata and {
6648     style => 1, script => 1, xmp => 1, iframe => 1,
6649     noembed => 1, noframes => 1, noscript => 1,
6650 wakaba 1.26 plaintext => 1,
6651 wakaba 1.1 }->{$tag_name}) {
6652     unshift @node, 'cdata-out';
6653     $in_cdata = 1;
6654     }
6655    
6656     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
6657     } elsif ($nt == 3 or $nt == 4) {
6658     if ($in_cdata) {
6659     $s .= $child->data;
6660     } else {
6661     my $value = $child->data;
6662     $value =~ s/&/&amp;/g;
6663     $value =~ s/</&lt;/g;
6664     $value =~ s/>/&gt;/g;
6665     $value =~ s/"/&quot;/g;
6666     $s .= $value;
6667     }
6668     } elsif ($nt == 8) {
6669     $s .= '<!--' . $child->data . '-->';
6670     } elsif ($nt == 10) {
6671     $s .= '<!DOCTYPE ' . $child->name . '>';
6672     } elsif ($nt == 5) { # entrefs
6673     push @node, @{$child->child_nodes};
6674     } else {
6675     $on_error->($child) if defined $on_error;
6676     }
6677     ## ISSUE: This code does not support PIs.
6678     } # C
6679    
6680     ## Step 3
6681     return \$s;
6682     } # get_inner_html
6683    
6684     1;
6685 wakaba 1.27 # $Date: 2007/06/24 06:20:37 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24