/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.22 - (hide annotations) (download)
Sat Jun 23 14:55:45 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.21: +19 -8 lines
++ whatpm/t/ChangeLog	23 Jun 2007 14:55:20 -0000
	* tree-test-1.dat: Tests for |<isindex>| are added.

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	23 Jun 2007 14:55:42 -0000
	* HTML.pm.src: HTML5 revision 920 (<isindex>).

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.22 our $VERSION=do{my @r=(q$Revision: 1.21 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 ## ISSUE:
6     ## var doc = implementation.createDocument (null, null, null);
7     ## doc.write ('');
8     ## alert (doc.compatMode);
9 wakaba 1.1
10     my $permitted_slash_tag_name = {
11     base => 1,
12     link => 1,
13     meta => 1,
14     hr => 1,
15     br => 1,
16     img=> 1,
17     embed => 1,
18     param => 1,
19     area => 1,
20     col => 1,
21     input => 1,
22     };
23    
24 wakaba 1.4 my $c1_entity_char = {
25 wakaba 1.9 0x80 => 0x20AC,
26     0x81 => 0xFFFD,
27     0x82 => 0x201A,
28     0x83 => 0x0192,
29     0x84 => 0x201E,
30     0x85 => 0x2026,
31     0x86 => 0x2020,
32     0x87 => 0x2021,
33     0x88 => 0x02C6,
34     0x89 => 0x2030,
35     0x8A => 0x0160,
36     0x8B => 0x2039,
37     0x8C => 0x0152,
38     0x8D => 0xFFFD,
39     0x8E => 0x017D,
40     0x8F => 0xFFFD,
41     0x90 => 0xFFFD,
42     0x91 => 0x2018,
43     0x92 => 0x2019,
44     0x93 => 0x201C,
45     0x94 => 0x201D,
46     0x95 => 0x2022,
47     0x96 => 0x2013,
48     0x97 => 0x2014,
49     0x98 => 0x02DC,
50     0x99 => 0x2122,
51     0x9A => 0x0161,
52     0x9B => 0x203A,
53     0x9C => 0x0153,
54     0x9D => 0xFFFD,
55     0x9E => 0x017E,
56     0x9F => 0x0178,
57 wakaba 1.4 }; # $c1_entity_char
58 wakaba 1.1
59     my $special_category = {
60     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
61     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
62     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
63     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
64     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
65     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
66     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
67     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
68     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
69     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
70     };
71     my $scoping_category = {
72     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
73     table => 1, td => 1, th => 1,
74     };
75     my $formatting_category = {
76     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
77     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
78     };
79     # $phrasing_category: all other elements
80    
81     sub parse_string ($$$;$) {
82     my $self = shift->new;
83     my $s = \$_[0];
84     $self->{document} = $_[1];
85    
86 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
87    
88 wakaba 1.1 my $i = 0;
89 wakaba 1.3 my $line = 1;
90     my $column = 0;
91 wakaba 1.1 $self->{set_next_input_character} = sub {
92     my $self = shift;
93 wakaba 1.13
94     pop @{$self->{prev_input_character}};
95     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
96    
97 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
98     $self->{next_input_character} = ord substr $$s, $i++, 1;
99 wakaba 1.3 $column++;
100 wakaba 1.1
101 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
102     $line++;
103     $column = 0;
104     } elsif ($self->{next_input_character} == 0x000D) { # CR
105 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
106 wakaba 1.1 $self->{next_input_character} = 0x000A; # LF # MUST
107 wakaba 1.3 $line++;
108 wakaba 1.4 $column = 0;
109 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
110     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
111     } elsif ($self->{next_input_character} == 0x0000) { # NULL
112 wakaba 1.8 $self->{parse_error}-> (type => 'NULL');
113 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
114     }
115     };
116 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
117     $self->{next_input_character} = -1;
118 wakaba 1.1
119 wakaba 1.3 my $onerror = $_[2] || sub {
120     my (%opt) = @_;
121     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
122     };
123     $self->{parse_error} = sub {
124     $onerror->(@_, line => $line, column => $column);
125 wakaba 1.1 };
126    
127     $self->_initialize_tokenizer;
128     $self->_initialize_tree_constructor;
129     $self->_construct_tree;
130     $self->_terminate_tree_constructor;
131    
132     return $self->{document};
133     } # parse_string
134    
135     sub new ($) {
136     my $class = shift;
137     my $self = bless {}, $class;
138     $self->{set_next_input_character} = sub {
139     $self->{next_input_character} = -1;
140     };
141     $self->{parse_error} = sub {
142     #
143     };
144     return $self;
145     } # new
146    
147     ## Implementations MUST act as if state machine in the spec
148    
149     sub _initialize_tokenizer ($) {
150     my $self = shift;
151     $self->{state} = 'data'; # MUST
152     $self->{content_model_flag} = 'PCDATA'; # be
153     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
154     undef $self->{current_attribute};
155     undef $self->{last_emitted_start_tag_name};
156     undef $self->{last_attribute_value_state};
157     $self->{char} = [];
158     # $self->{next_input_character}
159    
160     if (@{$self->{char}}) {
161     $self->{next_input_character} = shift @{$self->{char}};
162     } else {
163     $self->{set_next_input_character}->($self);
164     }
165    
166     $self->{token} = [];
167 wakaba 1.18 # $self->{escape}
168 wakaba 1.1 } # _initialize_tokenizer
169    
170     ## A token has:
171     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
172     ## 'character', or 'end-of-file'
173 wakaba 1.18 ## ->{name} (DOCTYPE, start tag (tag name), end tag (tag name))
174     ## ->{public_identifier} (DOCTYPE)
175     ## ->{system_identifier} (DOCTYPE)
176     ## ->{correct} == 1 or 0 (DOCTYPE)
177 wakaba 1.1 ## ->{attributes} isa HASH (start tag, end tag)
178     ## ->{data} (comment, character)
179    
180     ## Emitted token MUST immediately be handled by the tree construction state.
181    
182     ## Before each step, UA MAY check to see if either one of the scripts in
183     ## "list of scripts that will execute as soon as possible" or the first
184     ## script in the "list of scripts that will execute asynchronously",
185     ## has completed loading. If one has, then it MUST be executed
186     ## and removed from the list.
187    
188     sub _get_next_token ($) {
189     my $self = shift;
190     if (@{$self->{token}}) {
191     return shift @{$self->{token}};
192     }
193    
194     A: {
195     if ($self->{state} eq 'data') {
196     if ($self->{next_input_character} == 0x0026) { # &
197     if ($self->{content_model_flag} eq 'PCDATA' or
198     $self->{content_model_flag} eq 'RCDATA') {
199     $self->{state} = 'entity data';
200    
201     if (@{$self->{char}}) {
202     $self->{next_input_character} = shift @{$self->{char}};
203     } else {
204     $self->{set_next_input_character}->($self);
205     }
206    
207     redo A;
208     } else {
209     #
210     }
211 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
212     if ($self->{content_model_flag} eq 'RCDATA' or
213     $self->{content_model_flag} eq 'CDATA') {
214     unless ($self->{escape}) {
215     if ($self->{prev_input_character}->[0] == 0x002D and # -
216     $self->{prev_input_character}->[1] == 0x0021 and # !
217     $self->{prev_input_character}->[2] == 0x003C) { # <
218     $self->{escape} = 1;
219     }
220     }
221     }
222    
223     #
224 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
225 wakaba 1.13 if ($self->{content_model_flag} eq 'PCDATA' or
226     (($self->{content_model_flag} eq 'CDATA' or
227     $self->{content_model_flag} eq 'RCDATA') and
228     not $self->{escape})) {
229 wakaba 1.1 $self->{state} = 'tag open';
230    
231     if (@{$self->{char}}) {
232     $self->{next_input_character} = shift @{$self->{char}};
233     } else {
234     $self->{set_next_input_character}->($self);
235     }
236    
237     redo A;
238     } else {
239     #
240     }
241 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
242     if ($self->{escape} and
243     ($self->{content_model_flag} eq 'RCDATA' or
244     $self->{content_model_flag} eq 'CDATA')) {
245     if ($self->{prev_input_character}->[0] == 0x002D and # -
246     $self->{prev_input_character}->[1] == 0x002D) { # -
247     delete $self->{escape};
248     }
249     }
250    
251     #
252 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
253     return ({type => 'end-of-file'});
254     last A; ## TODO: ok?
255     }
256     # Anything else
257     my $token = {type => 'character',
258     data => chr $self->{next_input_character}};
259     ## Stay in the data state
260    
261     if (@{$self->{char}}) {
262     $self->{next_input_character} = shift @{$self->{char}};
263     } else {
264     $self->{set_next_input_character}->($self);
265     }
266    
267    
268     return ($token);
269    
270     redo A;
271     } elsif ($self->{state} eq 'entity data') {
272     ## (cannot happen in CDATA state)
273    
274     my $token = $self->_tokenize_attempt_to_consume_an_entity;
275    
276     $self->{state} = 'data';
277     # next-input-character is already done
278    
279     unless (defined $token) {
280     return ({type => 'character', data => '&'});
281     } else {
282     return ($token);
283     }
284    
285     redo A;
286     } elsif ($self->{state} eq 'tag open') {
287     if ($self->{content_model_flag} eq 'RCDATA' or
288     $self->{content_model_flag} eq 'CDATA') {
289     if ($self->{next_input_character} == 0x002F) { # /
290    
291     if (@{$self->{char}}) {
292     $self->{next_input_character} = shift @{$self->{char}};
293     } else {
294     $self->{set_next_input_character}->($self);
295     }
296    
297     $self->{state} = 'close tag open';
298     redo A;
299     } else {
300     ## reconsume
301     $self->{state} = 'data';
302    
303     return ({type => 'character', data => '<'});
304    
305     redo A;
306     }
307     } elsif ($self->{content_model_flag} eq 'PCDATA') {
308     if ($self->{next_input_character} == 0x0021) { # !
309     $self->{state} = 'markup declaration open';
310    
311     if (@{$self->{char}}) {
312     $self->{next_input_character} = shift @{$self->{char}};
313     } else {
314     $self->{set_next_input_character}->($self);
315     }
316    
317     redo A;
318     } elsif ($self->{next_input_character} == 0x002F) { # /
319     $self->{state} = 'close tag open';
320    
321     if (@{$self->{char}}) {
322     $self->{next_input_character} = shift @{$self->{char}};
323     } else {
324     $self->{set_next_input_character}->($self);
325     }
326    
327     redo A;
328     } elsif (0x0041 <= $self->{next_input_character} and
329     $self->{next_input_character} <= 0x005A) { # A..Z
330     $self->{current_token}
331     = {type => 'start tag',
332     tag_name => chr ($self->{next_input_character} + 0x0020)};
333     $self->{state} = 'tag name';
334    
335     if (@{$self->{char}}) {
336     $self->{next_input_character} = shift @{$self->{char}};
337     } else {
338     $self->{set_next_input_character}->($self);
339     }
340    
341     redo A;
342     } elsif (0x0061 <= $self->{next_input_character} and
343     $self->{next_input_character} <= 0x007A) { # a..z
344     $self->{current_token} = {type => 'start tag',
345     tag_name => chr ($self->{next_input_character})};
346     $self->{state} = 'tag name';
347    
348     if (@{$self->{char}}) {
349     $self->{next_input_character} = shift @{$self->{char}};
350     } else {
351     $self->{set_next_input_character}->($self);
352     }
353    
354     redo A;
355     } elsif ($self->{next_input_character} == 0x003E) { # >
356 wakaba 1.3 $self->{parse_error}-> (type => 'empty start tag');
357 wakaba 1.1 $self->{state} = 'data';
358    
359     if (@{$self->{char}}) {
360     $self->{next_input_character} = shift @{$self->{char}};
361     } else {
362     $self->{set_next_input_character}->($self);
363     }
364    
365    
366     return ({type => 'character', data => '<>'});
367    
368     redo A;
369     } elsif ($self->{next_input_character} == 0x003F) { # ?
370 wakaba 1.3 $self->{parse_error}-> (type => 'pio');
371 wakaba 1.1 $self->{state} = 'bogus comment';
372     ## $self->{next_input_character} is intentionally left as is
373     redo A;
374     } else {
375 wakaba 1.3 $self->{parse_error}-> (type => 'bare stago');
376 wakaba 1.1 $self->{state} = 'data';
377     ## reconsume
378    
379     return ({type => 'character', data => '<'});
380    
381     redo A;
382     }
383     } else {
384     die "$0: $self->{content_model_flag}: Unknown content model flag";
385     }
386     } elsif ($self->{state} eq 'close tag open') {
387     if ($self->{content_model_flag} eq 'RCDATA' or
388     $self->{content_model_flag} eq 'CDATA') {
389     my @next_char;
390     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
391     push @next_char, $self->{next_input_character};
392     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
393     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
394     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
395    
396     if (@{$self->{char}}) {
397     $self->{next_input_character} = shift @{$self->{char}};
398     } else {
399     $self->{set_next_input_character}->($self);
400     }
401    
402     next TAGNAME;
403     } else {
404     $self->{next_input_character} = shift @next_char; # reconsume
405     unshift @{$self->{char}}, (@next_char);
406     $self->{state} = 'data';
407    
408     return ({type => 'character', data => '</'});
409    
410     redo A;
411     }
412     }
413     push @next_char, $self->{next_input_character};
414    
415     unless ($self->{next_input_character} == 0x0009 or # HT
416     $self->{next_input_character} == 0x000A or # LF
417     $self->{next_input_character} == 0x000B or # VT
418     $self->{next_input_character} == 0x000C or # FF
419     $self->{next_input_character} == 0x0020 or # SP
420     $self->{next_input_character} == 0x003E or # >
421     $self->{next_input_character} == 0x002F or # /
422     $self->{next_input_character} == -1) {
423     $self->{next_input_character} = shift @next_char; # reconsume
424     unshift @{$self->{char}}, (@next_char);
425     $self->{state} = 'data';
426    
427     return ({type => 'character', data => '</'});
428    
429     redo A;
430     } else {
431     $self->{next_input_character} = shift @next_char;
432     unshift @{$self->{char}}, (@next_char);
433     # and consume...
434     }
435     }
436    
437     if (0x0041 <= $self->{next_input_character} and
438     $self->{next_input_character} <= 0x005A) { # A..Z
439     $self->{current_token} = {type => 'end tag',
440     tag_name => chr ($self->{next_input_character} + 0x0020)};
441     $self->{state} = 'tag name';
442    
443     if (@{$self->{char}}) {
444     $self->{next_input_character} = shift @{$self->{char}};
445     } else {
446     $self->{set_next_input_character}->($self);
447     }
448    
449     redo A;
450     } elsif (0x0061 <= $self->{next_input_character} and
451     $self->{next_input_character} <= 0x007A) { # a..z
452     $self->{current_token} = {type => 'end tag',
453     tag_name => chr ($self->{next_input_character})};
454     $self->{state} = 'tag name';
455    
456     if (@{$self->{char}}) {
457     $self->{next_input_character} = shift @{$self->{char}};
458     } else {
459     $self->{set_next_input_character}->($self);
460     }
461    
462     redo A;
463     } elsif ($self->{next_input_character} == 0x003E) { # >
464 wakaba 1.3 $self->{parse_error}-> (type => 'empty end tag');
465 wakaba 1.1 $self->{state} = 'data';
466    
467     if (@{$self->{char}}) {
468     $self->{next_input_character} = shift @{$self->{char}};
469     } else {
470     $self->{set_next_input_character}->($self);
471     }
472    
473     redo A;
474     } elsif ($self->{next_input_character} == -1) {
475 wakaba 1.3 $self->{parse_error}-> (type => 'bare etago');
476 wakaba 1.1 $self->{state} = 'data';
477     # reconsume
478    
479     return ({type => 'character', data => '</'});
480    
481     redo A;
482     } else {
483 wakaba 1.3 $self->{parse_error}-> (type => 'bogus end tag');
484 wakaba 1.1 $self->{state} = 'bogus comment';
485     ## $self->{next_input_character} is intentionally left as is
486     redo A;
487     }
488     } elsif ($self->{state} eq 'tag name') {
489     if ($self->{next_input_character} == 0x0009 or # HT
490     $self->{next_input_character} == 0x000A or # LF
491     $self->{next_input_character} == 0x000B or # VT
492     $self->{next_input_character} == 0x000C or # FF
493     $self->{next_input_character} == 0x0020) { # SP
494     $self->{state} = 'before attribute name';
495    
496     if (@{$self->{char}}) {
497     $self->{next_input_character} = shift @{$self->{char}};
498     } else {
499     $self->{set_next_input_character}->($self);
500     }
501    
502     redo A;
503     } elsif ($self->{next_input_character} == 0x003E) { # >
504     if ($self->{current_token}->{type} eq 'start tag') {
505     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
506     } elsif ($self->{current_token}->{type} eq 'end tag') {
507     $self->{content_model_flag} = 'PCDATA'; # MUST
508     if ($self->{current_token}->{attributes}) {
509 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
510 wakaba 1.1 }
511     } else {
512     die "$0: $self->{current_token}->{type}: Unknown token type";
513     }
514     $self->{state} = 'data';
515    
516     if (@{$self->{char}}) {
517     $self->{next_input_character} = shift @{$self->{char}};
518     } else {
519     $self->{set_next_input_character}->($self);
520     }
521    
522    
523     return ($self->{current_token}); # start tag or end tag
524     undef $self->{current_token};
525    
526     redo A;
527     } elsif (0x0041 <= $self->{next_input_character} and
528     $self->{next_input_character} <= 0x005A) { # A..Z
529     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
530     # start tag or end tag
531     ## Stay in this state
532    
533     if (@{$self->{char}}) {
534     $self->{next_input_character} = shift @{$self->{char}};
535     } else {
536     $self->{set_next_input_character}->($self);
537     }
538    
539     redo A;
540 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
541 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
542 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
543     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
544     } elsif ($self->{current_token}->{type} eq 'end tag') {
545     $self->{content_model_flag} = 'PCDATA'; # MUST
546     if ($self->{current_token}->{attributes}) {
547 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
548 wakaba 1.1 }
549     } else {
550     die "$0: $self->{current_token}->{type}: Unknown token type";
551     }
552     $self->{state} = 'data';
553     # reconsume
554    
555     return ($self->{current_token}); # start tag or end tag
556     undef $self->{current_token};
557    
558     redo A;
559     } elsif ($self->{next_input_character} == 0x002F) { # /
560    
561     if (@{$self->{char}}) {
562     $self->{next_input_character} = shift @{$self->{char}};
563     } else {
564     $self->{set_next_input_character}->($self);
565     }
566    
567     if ($self->{next_input_character} == 0x003E and # >
568     $self->{current_token}->{type} eq 'start tag' and
569     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
570     # permitted slash
571     #
572     } else {
573 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
574 wakaba 1.1 }
575     $self->{state} = 'before attribute name';
576     # next-input-character is already done
577     redo A;
578     } else {
579     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
580     # start tag or end tag
581     ## Stay in the state
582    
583     if (@{$self->{char}}) {
584     $self->{next_input_character} = shift @{$self->{char}};
585     } else {
586     $self->{set_next_input_character}->($self);
587     }
588    
589     redo A;
590     }
591     } elsif ($self->{state} eq 'before attribute name') {
592     if ($self->{next_input_character} == 0x0009 or # HT
593     $self->{next_input_character} == 0x000A or # LF
594     $self->{next_input_character} == 0x000B or # VT
595     $self->{next_input_character} == 0x000C or # FF
596     $self->{next_input_character} == 0x0020) { # SP
597     ## Stay in the state
598    
599     if (@{$self->{char}}) {
600     $self->{next_input_character} = shift @{$self->{char}};
601     } else {
602     $self->{set_next_input_character}->($self);
603     }
604    
605     redo A;
606     } elsif ($self->{next_input_character} == 0x003E) { # >
607     if ($self->{current_token}->{type} eq 'start tag') {
608     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
609     } elsif ($self->{current_token}->{type} eq 'end tag') {
610     $self->{content_model_flag} = 'PCDATA'; # MUST
611     if ($self->{current_token}->{attributes}) {
612 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
613 wakaba 1.1 }
614     } else {
615     die "$0: $self->{current_token}->{type}: Unknown token type";
616     }
617     $self->{state} = 'data';
618    
619     if (@{$self->{char}}) {
620     $self->{next_input_character} = shift @{$self->{char}};
621     } else {
622     $self->{set_next_input_character}->($self);
623     }
624    
625    
626     return ($self->{current_token}); # start tag or end tag
627     undef $self->{current_token};
628    
629     redo A;
630     } elsif (0x0041 <= $self->{next_input_character} and
631     $self->{next_input_character} <= 0x005A) { # A..Z
632     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
633     value => ''};
634     $self->{state} = 'attribute name';
635    
636     if (@{$self->{char}}) {
637     $self->{next_input_character} = shift @{$self->{char}};
638     } else {
639     $self->{set_next_input_character}->($self);
640     }
641    
642     redo A;
643     } elsif ($self->{next_input_character} == 0x002F) { # /
644    
645     if (@{$self->{char}}) {
646     $self->{next_input_character} = shift @{$self->{char}};
647     } else {
648     $self->{set_next_input_character}->($self);
649     }
650    
651     if ($self->{next_input_character} == 0x003E and # >
652     $self->{current_token}->{type} eq 'start tag' and
653     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
654     # permitted slash
655     #
656     } else {
657 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
658 wakaba 1.1 }
659     ## Stay in the state
660     # next-input-character is already done
661     redo A;
662 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
663 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
664 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
665     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
666     } elsif ($self->{current_token}->{type} eq 'end tag') {
667     $self->{content_model_flag} = 'PCDATA'; # MUST
668     if ($self->{current_token}->{attributes}) {
669 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
670 wakaba 1.1 }
671     } else {
672     die "$0: $self->{current_token}->{type}: Unknown token type";
673     }
674     $self->{state} = 'data';
675     # reconsume
676    
677     return ($self->{current_token}); # start tag or end tag
678     undef $self->{current_token};
679    
680     redo A;
681     } else {
682     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
683     value => ''};
684     $self->{state} = 'attribute name';
685    
686     if (@{$self->{char}}) {
687     $self->{next_input_character} = shift @{$self->{char}};
688     } else {
689     $self->{set_next_input_character}->($self);
690     }
691    
692     redo A;
693     }
694     } elsif ($self->{state} eq 'attribute name') {
695     my $before_leave = sub {
696     if (exists $self->{current_token}->{attributes} # start tag or end tag
697     ->{$self->{current_attribute}->{name}}) { # MUST
698 wakaba 1.3 $self->{parse_error}-> (type => 'dupulicate attribute');
699 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
700     } else {
701     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
702     = $self->{current_attribute};
703     }
704     }; # $before_leave
705    
706     if ($self->{next_input_character} == 0x0009 or # HT
707     $self->{next_input_character} == 0x000A or # LF
708     $self->{next_input_character} == 0x000B or # VT
709     $self->{next_input_character} == 0x000C or # FF
710     $self->{next_input_character} == 0x0020) { # SP
711     $before_leave->();
712     $self->{state} = 'after attribute name';
713    
714     if (@{$self->{char}}) {
715     $self->{next_input_character} = shift @{$self->{char}};
716     } else {
717     $self->{set_next_input_character}->($self);
718     }
719    
720     redo A;
721     } elsif ($self->{next_input_character} == 0x003D) { # =
722     $before_leave->();
723     $self->{state} = 'before attribute value';
724    
725     if (@{$self->{char}}) {
726     $self->{next_input_character} = shift @{$self->{char}};
727     } else {
728     $self->{set_next_input_character}->($self);
729     }
730    
731     redo A;
732     } elsif ($self->{next_input_character} == 0x003E) { # >
733     $before_leave->();
734     if ($self->{current_token}->{type} eq 'start tag') {
735     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
736     } elsif ($self->{current_token}->{type} eq 'end tag') {
737     $self->{content_model_flag} = 'PCDATA'; # MUST
738     if ($self->{current_token}->{attributes}) {
739 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
740 wakaba 1.1 }
741     } else {
742     die "$0: $self->{current_token}->{type}: Unknown token type";
743     }
744     $self->{state} = 'data';
745    
746     if (@{$self->{char}}) {
747     $self->{next_input_character} = shift @{$self->{char}};
748     } else {
749     $self->{set_next_input_character}->($self);
750     }
751    
752    
753     return ($self->{current_token}); # start tag or end tag
754     undef $self->{current_token};
755    
756     redo A;
757     } elsif (0x0041 <= $self->{next_input_character} and
758     $self->{next_input_character} <= 0x005A) { # A..Z
759     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
760     ## Stay in the state
761    
762     if (@{$self->{char}}) {
763     $self->{next_input_character} = shift @{$self->{char}};
764     } else {
765     $self->{set_next_input_character}->($self);
766     }
767    
768     redo A;
769     } elsif ($self->{next_input_character} == 0x002F) { # /
770     $before_leave->();
771    
772     if (@{$self->{char}}) {
773     $self->{next_input_character} = shift @{$self->{char}};
774     } else {
775     $self->{set_next_input_character}->($self);
776     }
777    
778     if ($self->{next_input_character} == 0x003E and # >
779     $self->{current_token}->{type} eq 'start tag' and
780     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
781     # permitted slash
782     #
783     } else {
784 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
785 wakaba 1.1 }
786     $self->{state} = 'before attribute name';
787     # next-input-character is already done
788     redo A;
789 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
790 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
791 wakaba 1.1 $before_leave->();
792     if ($self->{current_token}->{type} eq 'start tag') {
793     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
794     } elsif ($self->{current_token}->{type} eq 'end tag') {
795     $self->{content_model_flag} = 'PCDATA'; # MUST
796     if ($self->{current_token}->{attributes}) {
797 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
798 wakaba 1.1 }
799     } else {
800     die "$0: $self->{current_token}->{type}: Unknown token type";
801     }
802     $self->{state} = 'data';
803     # reconsume
804    
805     return ($self->{current_token}); # start tag or end tag
806     undef $self->{current_token};
807    
808     redo A;
809     } else {
810     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
811     ## Stay in the state
812    
813     if (@{$self->{char}}) {
814     $self->{next_input_character} = shift @{$self->{char}};
815     } else {
816     $self->{set_next_input_character}->($self);
817     }
818    
819     redo A;
820     }
821     } elsif ($self->{state} eq 'after attribute name') {
822     if ($self->{next_input_character} == 0x0009 or # HT
823     $self->{next_input_character} == 0x000A or # LF
824     $self->{next_input_character} == 0x000B or # VT
825     $self->{next_input_character} == 0x000C or # FF
826     $self->{next_input_character} == 0x0020) { # SP
827     ## Stay in the state
828    
829     if (@{$self->{char}}) {
830     $self->{next_input_character} = shift @{$self->{char}};
831     } else {
832     $self->{set_next_input_character}->($self);
833     }
834    
835     redo A;
836     } elsif ($self->{next_input_character} == 0x003D) { # =
837     $self->{state} = 'before attribute value';
838    
839     if (@{$self->{char}}) {
840     $self->{next_input_character} = shift @{$self->{char}};
841     } else {
842     $self->{set_next_input_character}->($self);
843     }
844    
845     redo A;
846     } elsif ($self->{next_input_character} == 0x003E) { # >
847     if ($self->{current_token}->{type} eq 'start tag') {
848     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
849     } elsif ($self->{current_token}->{type} eq 'end tag') {
850     $self->{content_model_flag} = 'PCDATA'; # MUST
851     if ($self->{current_token}->{attributes}) {
852 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
853 wakaba 1.1 }
854     } else {
855     die "$0: $self->{current_token}->{type}: Unknown token type";
856     }
857     $self->{state} = 'data';
858    
859     if (@{$self->{char}}) {
860     $self->{next_input_character} = shift @{$self->{char}};
861     } else {
862     $self->{set_next_input_character}->($self);
863     }
864    
865    
866     return ($self->{current_token}); # start tag or end tag
867     undef $self->{current_token};
868    
869     redo A;
870     } elsif (0x0041 <= $self->{next_input_character} and
871     $self->{next_input_character} <= 0x005A) { # A..Z
872     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
873     value => ''};
874     $self->{state} = 'attribute name';
875    
876     if (@{$self->{char}}) {
877     $self->{next_input_character} = shift @{$self->{char}};
878     } else {
879     $self->{set_next_input_character}->($self);
880     }
881    
882     redo A;
883     } elsif ($self->{next_input_character} == 0x002F) { # /
884    
885     if (@{$self->{char}}) {
886     $self->{next_input_character} = shift @{$self->{char}};
887     } else {
888     $self->{set_next_input_character}->($self);
889     }
890    
891     if ($self->{next_input_character} == 0x003E and # >
892     $self->{current_token}->{type} eq 'start tag' and
893     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
894     # permitted slash
895     #
896     } else {
897 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
898 wakaba 1.1 }
899     $self->{state} = 'before attribute name';
900     # next-input-character is already done
901     redo A;
902 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
903 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
904 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
905     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
906     } elsif ($self->{current_token}->{type} eq 'end tag') {
907     $self->{content_model_flag} = 'PCDATA'; # MUST
908     if ($self->{current_token}->{attributes}) {
909 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
910 wakaba 1.1 }
911     } else {
912     die "$0: $self->{current_token}->{type}: Unknown token type";
913     }
914     $self->{state} = 'data';
915     # reconsume
916    
917     return ($self->{current_token}); # start tag or end tag
918     undef $self->{current_token};
919    
920     redo A;
921     } else {
922     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
923     value => ''};
924     $self->{state} = 'attribute name';
925    
926     if (@{$self->{char}}) {
927     $self->{next_input_character} = shift @{$self->{char}};
928     } else {
929     $self->{set_next_input_character}->($self);
930     }
931    
932     redo A;
933     }
934     } elsif ($self->{state} eq 'before attribute value') {
935     if ($self->{next_input_character} == 0x0009 or # HT
936     $self->{next_input_character} == 0x000A or # LF
937     $self->{next_input_character} == 0x000B or # VT
938     $self->{next_input_character} == 0x000C or # FF
939     $self->{next_input_character} == 0x0020) { # SP
940     ## Stay in the state
941    
942     if (@{$self->{char}}) {
943     $self->{next_input_character} = shift @{$self->{char}};
944     } else {
945     $self->{set_next_input_character}->($self);
946     }
947    
948     redo A;
949     } elsif ($self->{next_input_character} == 0x0022) { # "
950     $self->{state} = 'attribute value (double-quoted)';
951    
952     if (@{$self->{char}}) {
953     $self->{next_input_character} = shift @{$self->{char}};
954     } else {
955     $self->{set_next_input_character}->($self);
956     }
957    
958     redo A;
959     } elsif ($self->{next_input_character} == 0x0026) { # &
960     $self->{state} = 'attribute value (unquoted)';
961     ## reconsume
962     redo A;
963     } elsif ($self->{next_input_character} == 0x0027) { # '
964     $self->{state} = 'attribute value (single-quoted)';
965    
966     if (@{$self->{char}}) {
967     $self->{next_input_character} = shift @{$self->{char}};
968     } else {
969     $self->{set_next_input_character}->($self);
970     }
971    
972     redo A;
973     } elsif ($self->{next_input_character} == 0x003E) { # >
974     if ($self->{current_token}->{type} eq 'start tag') {
975     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
976     } elsif ($self->{current_token}->{type} eq 'end tag') {
977     $self->{content_model_flag} = 'PCDATA'; # MUST
978     if ($self->{current_token}->{attributes}) {
979 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
980 wakaba 1.1 }
981     } else {
982     die "$0: $self->{current_token}->{type}: Unknown token type";
983     }
984     $self->{state} = 'data';
985    
986     if (@{$self->{char}}) {
987     $self->{next_input_character} = shift @{$self->{char}};
988     } else {
989     $self->{set_next_input_character}->($self);
990     }
991    
992    
993     return ($self->{current_token}); # start tag or end tag
994     undef $self->{current_token};
995    
996     redo A;
997 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
998 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
999 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1000     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1001     } elsif ($self->{current_token}->{type} eq 'end tag') {
1002     $self->{content_model_flag} = 'PCDATA'; # MUST
1003     if ($self->{current_token}->{attributes}) {
1004 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1005 wakaba 1.1 }
1006     } else {
1007     die "$0: $self->{current_token}->{type}: Unknown token type";
1008     }
1009     $self->{state} = 'data';
1010     ## reconsume
1011    
1012     return ($self->{current_token}); # start tag or end tag
1013     undef $self->{current_token};
1014    
1015     redo A;
1016     } else {
1017     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1018     $self->{state} = 'attribute value (unquoted)';
1019    
1020     if (@{$self->{char}}) {
1021     $self->{next_input_character} = shift @{$self->{char}};
1022     } else {
1023     $self->{set_next_input_character}->($self);
1024     }
1025    
1026     redo A;
1027     }
1028     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
1029     if ($self->{next_input_character} == 0x0022) { # "
1030     $self->{state} = 'before attribute name';
1031    
1032     if (@{$self->{char}}) {
1033     $self->{next_input_character} = shift @{$self->{char}};
1034     } else {
1035     $self->{set_next_input_character}->($self);
1036     }
1037    
1038     redo A;
1039     } elsif ($self->{next_input_character} == 0x0026) { # &
1040     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
1041     $self->{state} = 'entity in attribute value';
1042    
1043     if (@{$self->{char}}) {
1044     $self->{next_input_character} = shift @{$self->{char}};
1045     } else {
1046     $self->{set_next_input_character}->($self);
1047     }
1048    
1049     redo A;
1050     } elsif ($self->{next_input_character} == -1) {
1051 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1052 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1053     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1054     } elsif ($self->{current_token}->{type} eq 'end tag') {
1055     $self->{content_model_flag} = 'PCDATA'; # MUST
1056     if ($self->{current_token}->{attributes}) {
1057 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1058 wakaba 1.1 }
1059     } else {
1060     die "$0: $self->{current_token}->{type}: Unknown token type";
1061     }
1062     $self->{state} = 'data';
1063     ## reconsume
1064    
1065     return ($self->{current_token}); # start tag or end tag
1066     undef $self->{current_token};
1067    
1068     redo A;
1069     } else {
1070     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1071     ## Stay in the state
1072    
1073     if (@{$self->{char}}) {
1074     $self->{next_input_character} = shift @{$self->{char}};
1075     } else {
1076     $self->{set_next_input_character}->($self);
1077     }
1078    
1079     redo A;
1080     }
1081     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1082     if ($self->{next_input_character} == 0x0027) { # '
1083     $self->{state} = 'before attribute name';
1084    
1085     if (@{$self->{char}}) {
1086     $self->{next_input_character} = shift @{$self->{char}};
1087     } else {
1088     $self->{set_next_input_character}->($self);
1089     }
1090    
1091     redo A;
1092     } elsif ($self->{next_input_character} == 0x0026) { # &
1093     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1094     $self->{state} = 'entity in attribute value';
1095    
1096     if (@{$self->{char}}) {
1097     $self->{next_input_character} = shift @{$self->{char}};
1098     } else {
1099     $self->{set_next_input_character}->($self);
1100     }
1101    
1102     redo A;
1103     } elsif ($self->{next_input_character} == -1) {
1104 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1105 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1106     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1107     } elsif ($self->{current_token}->{type} eq 'end tag') {
1108     $self->{content_model_flag} = 'PCDATA'; # MUST
1109     if ($self->{current_token}->{attributes}) {
1110 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1111 wakaba 1.1 }
1112     } else {
1113     die "$0: $self->{current_token}->{type}: Unknown token type";
1114     }
1115     $self->{state} = 'data';
1116     ## reconsume
1117    
1118     return ($self->{current_token}); # start tag or end tag
1119     undef $self->{current_token};
1120    
1121     redo A;
1122     } else {
1123     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1124     ## Stay in the state
1125    
1126     if (@{$self->{char}}) {
1127     $self->{next_input_character} = shift @{$self->{char}};
1128     } else {
1129     $self->{set_next_input_character}->($self);
1130     }
1131    
1132     redo A;
1133     }
1134     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1135     if ($self->{next_input_character} == 0x0009 or # HT
1136     $self->{next_input_character} == 0x000A or # LF
1137     $self->{next_input_character} == 0x000B or # HT
1138     $self->{next_input_character} == 0x000C or # FF
1139     $self->{next_input_character} == 0x0020) { # SP
1140     $self->{state} = 'before attribute name';
1141    
1142     if (@{$self->{char}}) {
1143     $self->{next_input_character} = shift @{$self->{char}};
1144     } else {
1145     $self->{set_next_input_character}->($self);
1146     }
1147    
1148     redo A;
1149     } elsif ($self->{next_input_character} == 0x0026) { # &
1150     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1151     $self->{state} = 'entity in attribute value';
1152    
1153     if (@{$self->{char}}) {
1154     $self->{next_input_character} = shift @{$self->{char}};
1155     } else {
1156     $self->{set_next_input_character}->($self);
1157     }
1158    
1159     redo A;
1160     } elsif ($self->{next_input_character} == 0x003E) { # >
1161     if ($self->{current_token}->{type} eq 'start tag') {
1162     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1163     } elsif ($self->{current_token}->{type} eq 'end tag') {
1164     $self->{content_model_flag} = 'PCDATA'; # MUST
1165     if ($self->{current_token}->{attributes}) {
1166 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1167 wakaba 1.1 }
1168     } else {
1169     die "$0: $self->{current_token}->{type}: Unknown token type";
1170     }
1171     $self->{state} = 'data';
1172    
1173     if (@{$self->{char}}) {
1174     $self->{next_input_character} = shift @{$self->{char}};
1175     } else {
1176     $self->{set_next_input_character}->($self);
1177     }
1178    
1179    
1180     return ($self->{current_token}); # start tag or end tag
1181     undef $self->{current_token};
1182    
1183     redo A;
1184 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
1185 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
1186 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1187     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1188     } elsif ($self->{current_token}->{type} eq 'end tag') {
1189     $self->{content_model_flag} = 'PCDATA'; # MUST
1190     if ($self->{current_token}->{attributes}) {
1191 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1192 wakaba 1.1 }
1193     } else {
1194     die "$0: $self->{current_token}->{type}: Unknown token type";
1195     }
1196     $self->{state} = 'data';
1197     ## reconsume
1198    
1199     return ($self->{current_token}); # start tag or end tag
1200     undef $self->{current_token};
1201    
1202     redo A;
1203     } else {
1204     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1205     ## Stay in the state
1206    
1207     if (@{$self->{char}}) {
1208     $self->{next_input_character} = shift @{$self->{char}};
1209     } else {
1210     $self->{set_next_input_character}->($self);
1211     }
1212    
1213     redo A;
1214     }
1215     } elsif ($self->{state} eq 'entity in attribute value') {
1216     my $token = $self->_tokenize_attempt_to_consume_an_entity;
1217    
1218     unless (defined $token) {
1219     $self->{current_attribute}->{value} .= '&';
1220     } else {
1221     $self->{current_attribute}->{value} .= $token->{data};
1222     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1223     }
1224    
1225     $self->{state} = $self->{last_attribute_value_state};
1226     # next-input-character is already done
1227     redo A;
1228     } elsif ($self->{state} eq 'bogus comment') {
1229     ## (only happen if PCDATA state)
1230    
1231     my $token = {type => 'comment', data => ''};
1232    
1233     BC: {
1234     if ($self->{next_input_character} == 0x003E) { # >
1235     $self->{state} = 'data';
1236    
1237     if (@{$self->{char}}) {
1238     $self->{next_input_character} = shift @{$self->{char}};
1239     } else {
1240     $self->{set_next_input_character}->($self);
1241     }
1242    
1243    
1244     return ($token);
1245    
1246     redo A;
1247     } elsif ($self->{next_input_character} == -1) {
1248     $self->{state} = 'data';
1249     ## reconsume
1250    
1251     return ($token);
1252    
1253     redo A;
1254     } else {
1255     $token->{data} .= chr ($self->{next_input_character});
1256    
1257     if (@{$self->{char}}) {
1258     $self->{next_input_character} = shift @{$self->{char}};
1259     } else {
1260     $self->{set_next_input_character}->($self);
1261     }
1262    
1263     redo BC;
1264     }
1265     } # BC
1266     } elsif ($self->{state} eq 'markup declaration open') {
1267     ## (only happen if PCDATA state)
1268    
1269     my @next_char;
1270     push @next_char, $self->{next_input_character};
1271    
1272     if ($self->{next_input_character} == 0x002D) { # -
1273    
1274     if (@{$self->{char}}) {
1275     $self->{next_input_character} = shift @{$self->{char}};
1276     } else {
1277     $self->{set_next_input_character}->($self);
1278     }
1279    
1280     push @next_char, $self->{next_input_character};
1281     if ($self->{next_input_character} == 0x002D) { # -
1282     $self->{current_token} = {type => 'comment', data => ''};
1283     $self->{state} = 'comment';
1284    
1285     if (@{$self->{char}}) {
1286     $self->{next_input_character} = shift @{$self->{char}};
1287     } else {
1288     $self->{set_next_input_character}->($self);
1289     }
1290    
1291     redo A;
1292     }
1293     } elsif ($self->{next_input_character} == 0x0044 or # D
1294     $self->{next_input_character} == 0x0064) { # d
1295    
1296     if (@{$self->{char}}) {
1297     $self->{next_input_character} = shift @{$self->{char}};
1298     } else {
1299     $self->{set_next_input_character}->($self);
1300     }
1301    
1302     push @next_char, $self->{next_input_character};
1303     if ($self->{next_input_character} == 0x004F or # O
1304     $self->{next_input_character} == 0x006F) { # o
1305    
1306     if (@{$self->{char}}) {
1307     $self->{next_input_character} = shift @{$self->{char}};
1308     } else {
1309     $self->{set_next_input_character}->($self);
1310     }
1311    
1312     push @next_char, $self->{next_input_character};
1313     if ($self->{next_input_character} == 0x0043 or # C
1314     $self->{next_input_character} == 0x0063) { # c
1315    
1316     if (@{$self->{char}}) {
1317     $self->{next_input_character} = shift @{$self->{char}};
1318     } else {
1319     $self->{set_next_input_character}->($self);
1320     }
1321    
1322     push @next_char, $self->{next_input_character};
1323     if ($self->{next_input_character} == 0x0054 or # T
1324     $self->{next_input_character} == 0x0074) { # t
1325    
1326     if (@{$self->{char}}) {
1327     $self->{next_input_character} = shift @{$self->{char}};
1328     } else {
1329     $self->{set_next_input_character}->($self);
1330     }
1331    
1332     push @next_char, $self->{next_input_character};
1333     if ($self->{next_input_character} == 0x0059 or # Y
1334     $self->{next_input_character} == 0x0079) { # y
1335    
1336     if (@{$self->{char}}) {
1337     $self->{next_input_character} = shift @{$self->{char}};
1338     } else {
1339     $self->{set_next_input_character}->($self);
1340     }
1341    
1342     push @next_char, $self->{next_input_character};
1343     if ($self->{next_input_character} == 0x0050 or # P
1344     $self->{next_input_character} == 0x0070) { # p
1345    
1346     if (@{$self->{char}}) {
1347     $self->{next_input_character} = shift @{$self->{char}};
1348     } else {
1349     $self->{set_next_input_character}->($self);
1350     }
1351    
1352     push @next_char, $self->{next_input_character};
1353     if ($self->{next_input_character} == 0x0045 or # E
1354     $self->{next_input_character} == 0x0065) { # e
1355     ## ISSUE: What a stupid code this is!
1356     $self->{state} = 'DOCTYPE';
1357    
1358     if (@{$self->{char}}) {
1359     $self->{next_input_character} = shift @{$self->{char}};
1360     } else {
1361     $self->{set_next_input_character}->($self);
1362     }
1363    
1364     redo A;
1365     }
1366     }
1367     }
1368     }
1369     }
1370     }
1371     }
1372    
1373 wakaba 1.3 $self->{parse_error}-> (type => 'bogus comment open');
1374 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1375     unshift @{$self->{char}}, (@next_char);
1376     $self->{state} = 'bogus comment';
1377     redo A;
1378    
1379     ## ISSUE: typos in spec: chacacters, is is a parse error
1380     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1381     } elsif ($self->{state} eq 'comment') {
1382     if ($self->{next_input_character} == 0x002D) { # -
1383     $self->{state} = 'comment dash';
1384    
1385     if (@{$self->{char}}) {
1386     $self->{next_input_character} = shift @{$self->{char}};
1387     } else {
1388     $self->{set_next_input_character}->($self);
1389     }
1390    
1391     redo A;
1392     } elsif ($self->{next_input_character} == -1) {
1393 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1394 wakaba 1.1 $self->{state} = 'data';
1395     ## reconsume
1396    
1397     return ($self->{current_token}); # comment
1398     undef $self->{current_token};
1399    
1400     redo A;
1401     } else {
1402     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1403     ## Stay in the state
1404    
1405     if (@{$self->{char}}) {
1406     $self->{next_input_character} = shift @{$self->{char}};
1407     } else {
1408     $self->{set_next_input_character}->($self);
1409     }
1410    
1411     redo A;
1412     }
1413     } elsif ($self->{state} eq 'comment dash') {
1414     if ($self->{next_input_character} == 0x002D) { # -
1415     $self->{state} = 'comment end';
1416    
1417     if (@{$self->{char}}) {
1418     $self->{next_input_character} = shift @{$self->{char}};
1419     } else {
1420     $self->{set_next_input_character}->($self);
1421     }
1422    
1423     redo A;
1424     } elsif ($self->{next_input_character} == -1) {
1425 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1426 wakaba 1.1 $self->{state} = 'data';
1427     ## reconsume
1428    
1429     return ($self->{current_token}); # comment
1430     undef $self->{current_token};
1431    
1432     redo A;
1433     } else {
1434     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1435     $self->{state} = 'comment';
1436    
1437     if (@{$self->{char}}) {
1438     $self->{next_input_character} = shift @{$self->{char}};
1439     } else {
1440     $self->{set_next_input_character}->($self);
1441     }
1442    
1443     redo A;
1444     }
1445     } elsif ($self->{state} eq 'comment end') {
1446     if ($self->{next_input_character} == 0x003E) { # >
1447     $self->{state} = 'data';
1448    
1449     if (@{$self->{char}}) {
1450     $self->{next_input_character} = shift @{$self->{char}};
1451     } else {
1452     $self->{set_next_input_character}->($self);
1453     }
1454    
1455    
1456     return ($self->{current_token}); # comment
1457     undef $self->{current_token};
1458    
1459     redo A;
1460     } elsif ($self->{next_input_character} == 0x002D) { # -
1461 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1462 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1463     ## Stay in the state
1464    
1465     if (@{$self->{char}}) {
1466     $self->{next_input_character} = shift @{$self->{char}};
1467     } else {
1468     $self->{set_next_input_character}->($self);
1469     }
1470    
1471     redo A;
1472     } elsif ($self->{next_input_character} == -1) {
1473 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1474 wakaba 1.1 $self->{state} = 'data';
1475     ## reconsume
1476    
1477     return ($self->{current_token}); # comment
1478     undef $self->{current_token};
1479    
1480     redo A;
1481     } else {
1482 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1483 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1484     $self->{state} = 'comment';
1485    
1486     if (@{$self->{char}}) {
1487     $self->{next_input_character} = shift @{$self->{char}};
1488     } else {
1489     $self->{set_next_input_character}->($self);
1490     }
1491    
1492     redo A;
1493     }
1494     } elsif ($self->{state} eq 'DOCTYPE') {
1495     if ($self->{next_input_character} == 0x0009 or # HT
1496     $self->{next_input_character} == 0x000A or # LF
1497     $self->{next_input_character} == 0x000B or # VT
1498     $self->{next_input_character} == 0x000C or # FF
1499     $self->{next_input_character} == 0x0020) { # SP
1500     $self->{state} = 'before DOCTYPE name';
1501    
1502     if (@{$self->{char}}) {
1503     $self->{next_input_character} = shift @{$self->{char}};
1504     } else {
1505     $self->{set_next_input_character}->($self);
1506     }
1507    
1508     redo A;
1509     } else {
1510 wakaba 1.3 $self->{parse_error}-> (type => 'no space before DOCTYPE name');
1511 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1512     ## reconsume
1513     redo A;
1514     }
1515     } elsif ($self->{state} eq 'before DOCTYPE name') {
1516     if ($self->{next_input_character} == 0x0009 or # HT
1517     $self->{next_input_character} == 0x000A or # LF
1518     $self->{next_input_character} == 0x000B or # VT
1519     $self->{next_input_character} == 0x000C or # FF
1520     $self->{next_input_character} == 0x0020) { # SP
1521     ## Stay in the state
1522    
1523     if (@{$self->{char}}) {
1524     $self->{next_input_character} = shift @{$self->{char}};
1525     } else {
1526     $self->{set_next_input_character}->($self);
1527     }
1528    
1529     redo A;
1530 wakaba 1.18 } elsif ($self->{next_input_character} == 0x003E) { # >
1531     $self->{parse_error}-> (type => 'no DOCTYPE name');
1532     $self->{state} = 'data';
1533    
1534     if (@{$self->{char}}) {
1535     $self->{next_input_character} = shift @{$self->{char}};
1536     } else {
1537     $self->{set_next_input_character}->($self);
1538     }
1539    
1540    
1541     return ({type => 'DOCTYPE'}); # incorrect
1542    
1543     redo A;
1544     } elsif ($self->{next_input_character} == -1) {
1545     $self->{parse_error}-> (type => 'no DOCTYPE name');
1546     $self->{state} = 'data';
1547     ## reconsume
1548    
1549     return ({type => 'DOCTYPE'}); # incorrect
1550    
1551     redo A;
1552     } else {
1553     $self->{current_token}
1554     = {type => 'DOCTYPE',
1555     name => chr ($self->{next_input_character}),
1556     correct => 1};
1557 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1558 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1559    
1560     if (@{$self->{char}}) {
1561     $self->{next_input_character} = shift @{$self->{char}};
1562     } else {
1563     $self->{set_next_input_character}->($self);
1564     }
1565    
1566     redo A;
1567 wakaba 1.18 }
1568     } elsif ($self->{state} eq 'DOCTYPE name') {
1569     ## ISSUE: Redundant "First," in the spec.
1570     if ($self->{next_input_character} == 0x0009 or # HT
1571     $self->{next_input_character} == 0x000A or # LF
1572     $self->{next_input_character} == 0x000B or # VT
1573     $self->{next_input_character} == 0x000C or # FF
1574     $self->{next_input_character} == 0x0020) { # SP
1575     $self->{state} = 'after DOCTYPE name';
1576    
1577     if (@{$self->{char}}) {
1578     $self->{next_input_character} = shift @{$self->{char}};
1579     } else {
1580     $self->{set_next_input_character}->($self);
1581     }
1582    
1583     redo A;
1584 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
1585     $self->{state} = 'data';
1586    
1587     if (@{$self->{char}}) {
1588     $self->{next_input_character} = shift @{$self->{char}};
1589     } else {
1590     $self->{set_next_input_character}->($self);
1591     }
1592    
1593    
1594 wakaba 1.18 return ($self->{current_token}); # DOCTYPE
1595     undef $self->{current_token};
1596 wakaba 1.1
1597     redo A;
1598 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1599     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1600 wakaba 1.1 $self->{state} = 'data';
1601     ## reconsume
1602    
1603 wakaba 1.18 delete $self->{current_token}->{correct};
1604     return ($self->{current_token}); # DOCTYPE
1605     undef $self->{current_token};
1606 wakaba 1.1
1607     redo A;
1608     } else {
1609 wakaba 1.18 $self->{current_token}->{name}
1610     .= chr ($self->{next_input_character}); # DOCTYPE
1611     ## Stay in the state
1612 wakaba 1.1
1613     if (@{$self->{char}}) {
1614     $self->{next_input_character} = shift @{$self->{char}};
1615     } else {
1616     $self->{set_next_input_character}->($self);
1617     }
1618    
1619     redo A;
1620     }
1621 wakaba 1.18 } elsif ($self->{state} eq 'after DOCTYPE name') {
1622 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1623     $self->{next_input_character} == 0x000A or # LF
1624     $self->{next_input_character} == 0x000B or # VT
1625     $self->{next_input_character} == 0x000C or # FF
1626     $self->{next_input_character} == 0x0020) { # SP
1627 wakaba 1.18 ## Stay in the state
1628 wakaba 1.1
1629     if (@{$self->{char}}) {
1630     $self->{next_input_character} = shift @{$self->{char}};
1631     } else {
1632     $self->{set_next_input_character}->($self);
1633     }
1634    
1635     redo A;
1636     } elsif ($self->{next_input_character} == 0x003E) { # >
1637     $self->{state} = 'data';
1638    
1639     if (@{$self->{char}}) {
1640     $self->{next_input_character} = shift @{$self->{char}};
1641     } else {
1642     $self->{set_next_input_character}->($self);
1643     }
1644    
1645    
1646     return ($self->{current_token}); # DOCTYPE
1647     undef $self->{current_token};
1648    
1649     redo A;
1650 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1651     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1652     $self->{state} = 'data';
1653     ## reconsume
1654    
1655     delete $self->{current_token}->{correct};
1656     return ($self->{current_token}); # DOCTYPE
1657     undef $self->{current_token};
1658    
1659     redo A;
1660     } elsif ($self->{next_input_character} == 0x0050 or # P
1661     $self->{next_input_character} == 0x0070) { # p
1662    
1663     if (@{$self->{char}}) {
1664     $self->{next_input_character} = shift @{$self->{char}};
1665     } else {
1666     $self->{set_next_input_character}->($self);
1667     }
1668    
1669     if ($self->{next_input_character} == 0x0055 or # U
1670     $self->{next_input_character} == 0x0075) { # u
1671    
1672     if (@{$self->{char}}) {
1673     $self->{next_input_character} = shift @{$self->{char}};
1674     } else {
1675     $self->{set_next_input_character}->($self);
1676     }
1677    
1678     if ($self->{next_input_character} == 0x0042 or # B
1679     $self->{next_input_character} == 0x0062) { # b
1680    
1681     if (@{$self->{char}}) {
1682     $self->{next_input_character} = shift @{$self->{char}};
1683     } else {
1684     $self->{set_next_input_character}->($self);
1685     }
1686    
1687     if ($self->{next_input_character} == 0x004C or # L
1688     $self->{next_input_character} == 0x006C) { # l
1689    
1690     if (@{$self->{char}}) {
1691     $self->{next_input_character} = shift @{$self->{char}};
1692     } else {
1693     $self->{set_next_input_character}->($self);
1694     }
1695    
1696     if ($self->{next_input_character} == 0x0049 or # I
1697     $self->{next_input_character} == 0x0069) { # i
1698    
1699     if (@{$self->{char}}) {
1700     $self->{next_input_character} = shift @{$self->{char}};
1701     } else {
1702     $self->{set_next_input_character}->($self);
1703     }
1704    
1705     if ($self->{next_input_character} == 0x0043 or # C
1706     $self->{next_input_character} == 0x0063) { # c
1707     $self->{state} = 'before DOCTYPE public identifier';
1708    
1709     if (@{$self->{char}}) {
1710     $self->{next_input_character} = shift @{$self->{char}};
1711     } else {
1712     $self->{set_next_input_character}->($self);
1713     }
1714    
1715     redo A;
1716     }
1717     }
1718     }
1719     }
1720     }
1721    
1722     #
1723     } elsif ($self->{next_input_character} == 0x0053 or # S
1724     $self->{next_input_character} == 0x0073) { # s
1725    
1726     if (@{$self->{char}}) {
1727     $self->{next_input_character} = shift @{$self->{char}};
1728     } else {
1729     $self->{set_next_input_character}->($self);
1730     }
1731    
1732     if ($self->{next_input_character} == 0x0059 or # Y
1733     $self->{next_input_character} == 0x0079) { # y
1734    
1735     if (@{$self->{char}}) {
1736     $self->{next_input_character} = shift @{$self->{char}};
1737     } else {
1738     $self->{set_next_input_character}->($self);
1739     }
1740    
1741     if ($self->{next_input_character} == 0x0053 or # S
1742     $self->{next_input_character} == 0x0073) { # s
1743    
1744     if (@{$self->{char}}) {
1745     $self->{next_input_character} = shift @{$self->{char}};
1746     } else {
1747     $self->{set_next_input_character}->($self);
1748     }
1749    
1750     if ($self->{next_input_character} == 0x0054 or # T
1751     $self->{next_input_character} == 0x0074) { # t
1752    
1753     if (@{$self->{char}}) {
1754     $self->{next_input_character} = shift @{$self->{char}};
1755     } else {
1756     $self->{set_next_input_character}->($self);
1757     }
1758    
1759     if ($self->{next_input_character} == 0x0045 or # E
1760     $self->{next_input_character} == 0x0065) { # e
1761    
1762     if (@{$self->{char}}) {
1763     $self->{next_input_character} = shift @{$self->{char}};
1764     } else {
1765     $self->{set_next_input_character}->($self);
1766     }
1767    
1768     if ($self->{next_input_character} == 0x004D or # M
1769     $self->{next_input_character} == 0x006D) { # m
1770     $self->{state} = 'before DOCTYPE system identifier';
1771    
1772     if (@{$self->{char}}) {
1773     $self->{next_input_character} = shift @{$self->{char}};
1774     } else {
1775     $self->{set_next_input_character}->($self);
1776     }
1777    
1778     redo A;
1779     }
1780     }
1781     }
1782     }
1783     }
1784    
1785     #
1786     } else {
1787    
1788     if (@{$self->{char}}) {
1789     $self->{next_input_character} = shift @{$self->{char}};
1790     } else {
1791     $self->{set_next_input_character}->($self);
1792     }
1793    
1794     #
1795     }
1796    
1797     $self->{parse_error}-> (type => 'string after DOCTYPE name');
1798     $self->{state} = 'bogus DOCTYPE';
1799     # next-input-character is already done
1800     redo A;
1801     } elsif ($self->{state} eq 'before DOCTYPE public identifier') {
1802     if ({
1803     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1804     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1805     }->{$self->{next_input_character}}) {
1806 wakaba 1.1 ## Stay in the state
1807    
1808     if (@{$self->{char}}) {
1809     $self->{next_input_character} = shift @{$self->{char}};
1810     } else {
1811     $self->{set_next_input_character}->($self);
1812     }
1813    
1814     redo A;
1815 wakaba 1.18 } elsif ($self->{next_input_character} eq 0x0022) { # "
1816     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1817     $self->{state} = 'DOCTYPE public identifier (double-quoted)';
1818    
1819     if (@{$self->{char}}) {
1820     $self->{next_input_character} = shift @{$self->{char}};
1821     } else {
1822     $self->{set_next_input_character}->($self);
1823     }
1824    
1825     redo A;
1826     } elsif ($self->{next_input_character} eq 0x0027) { # '
1827     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1828     $self->{state} = 'DOCTYPE public identifier (single-quoted)';
1829    
1830     if (@{$self->{char}}) {
1831     $self->{next_input_character} = shift @{$self->{char}};
1832     } else {
1833     $self->{set_next_input_character}->($self);
1834     }
1835    
1836     redo A;
1837     } elsif ($self->{next_input_character} eq 0x003E) { # >
1838     $self->{parse_error}-> (type => 'no PUBLIC literal');
1839    
1840     $self->{state} = 'data';
1841    
1842     if (@{$self->{char}}) {
1843     $self->{next_input_character} = shift @{$self->{char}};
1844     } else {
1845     $self->{set_next_input_character}->($self);
1846     }
1847    
1848    
1849     delete $self->{current_token}->{correct};
1850     return ($self->{current_token}); # DOCTYPE
1851     undef $self->{current_token};
1852    
1853     redo A;
1854 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
1855 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1856 wakaba 1.18
1857 wakaba 1.1 $self->{state} = 'data';
1858     ## reconsume
1859    
1860 wakaba 1.18 delete $self->{current_token}->{correct};
1861     return ($self->{current_token}); # DOCTYPE
1862 wakaba 1.1 undef $self->{current_token};
1863    
1864     redo A;
1865     } else {
1866 wakaba 1.18 $self->{parse_error}-> (type => 'string after PUBLIC');
1867     $self->{state} = 'bogus DOCTYPE';
1868    
1869     if (@{$self->{char}}) {
1870     $self->{next_input_character} = shift @{$self->{char}};
1871     } else {
1872     $self->{set_next_input_character}->($self);
1873     }
1874    
1875     redo A;
1876     }
1877     } elsif ($self->{state} eq 'DOCTYPE public identifier (double-quoted)') {
1878     if ($self->{next_input_character} == 0x0022) { # "
1879     $self->{state} = 'after DOCTYPE public identifier';
1880    
1881     if (@{$self->{char}}) {
1882     $self->{next_input_character} = shift @{$self->{char}};
1883     } else {
1884     $self->{set_next_input_character}->($self);
1885     }
1886    
1887     redo A;
1888     } elsif ($self->{next_input_character} == -1) {
1889     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
1890    
1891     $self->{state} = 'data';
1892     ## reconsume
1893    
1894     delete $self->{current_token}->{correct};
1895     return ($self->{current_token}); # DOCTYPE
1896     undef $self->{current_token};
1897    
1898     redo A;
1899     } else {
1900     $self->{current_token}->{public_identifier} # DOCTYPE
1901     .= chr $self->{next_input_character};
1902     ## Stay in the state
1903    
1904     if (@{$self->{char}}) {
1905     $self->{next_input_character} = shift @{$self->{char}};
1906     } else {
1907     $self->{set_next_input_character}->($self);
1908     }
1909    
1910     redo A;
1911     }
1912     } elsif ($self->{state} eq 'DOCTYPE public identifier (single-quoted)') {
1913     if ($self->{next_input_character} == 0x0027) { # '
1914     $self->{state} = 'after DOCTYPE public identifier';
1915    
1916     if (@{$self->{char}}) {
1917     $self->{next_input_character} = shift @{$self->{char}};
1918     } else {
1919     $self->{set_next_input_character}->($self);
1920     }
1921    
1922     redo A;
1923     } elsif ($self->{next_input_character} == -1) {
1924     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
1925    
1926     $self->{state} = 'data';
1927     ## reconsume
1928    
1929     delete $self->{current_token}->{correct};
1930     return ($self->{current_token}); # DOCTYPE
1931     undef $self->{current_token};
1932    
1933     redo A;
1934     } else {
1935     $self->{current_token}->{public_identifier} # DOCTYPE
1936     .= chr $self->{next_input_character};
1937     ## Stay in the state
1938    
1939     if (@{$self->{char}}) {
1940     $self->{next_input_character} = shift @{$self->{char}};
1941     } else {
1942     $self->{set_next_input_character}->($self);
1943     }
1944    
1945     redo A;
1946     }
1947     } elsif ($self->{state} eq 'after DOCTYPE public identifier') {
1948     if ({
1949     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1950     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1951     }->{$self->{next_input_character}}) {
1952 wakaba 1.1 ## Stay in the state
1953    
1954     if (@{$self->{char}}) {
1955     $self->{next_input_character} = shift @{$self->{char}};
1956     } else {
1957     $self->{set_next_input_character}->($self);
1958     }
1959    
1960     redo A;
1961 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
1962     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1963     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
1964    
1965     if (@{$self->{char}}) {
1966     $self->{next_input_character} = shift @{$self->{char}};
1967     } else {
1968     $self->{set_next_input_character}->($self);
1969     }
1970    
1971     redo A;
1972     } elsif ($self->{next_input_character} == 0x0027) { # '
1973     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1974     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
1975    
1976     if (@{$self->{char}}) {
1977     $self->{next_input_character} = shift @{$self->{char}};
1978     } else {
1979     $self->{set_next_input_character}->($self);
1980     }
1981    
1982     redo A;
1983     } elsif ($self->{next_input_character} == 0x003E) { # >
1984     $self->{state} = 'data';
1985    
1986     if (@{$self->{char}}) {
1987     $self->{next_input_character} = shift @{$self->{char}};
1988     } else {
1989     $self->{set_next_input_character}->($self);
1990     }
1991    
1992    
1993     return ($self->{current_token}); # DOCTYPE
1994     undef $self->{current_token};
1995    
1996     redo A;
1997     } elsif ($self->{next_input_character} == -1) {
1998     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1999    
2000     $self->{state} = 'data';
2001     ## recomsume
2002    
2003     delete $self->{current_token}->{correct};
2004     return ($self->{current_token}); # DOCTYPE
2005     undef $self->{current_token};
2006    
2007     redo A;
2008     } else {
2009     $self->{parse_error}-> (type => 'string after PUBLIC literal');
2010     $self->{state} = 'bogus DOCTYPE';
2011    
2012     if (@{$self->{char}}) {
2013     $self->{next_input_character} = shift @{$self->{char}};
2014     } else {
2015     $self->{set_next_input_character}->($self);
2016     }
2017    
2018     redo A;
2019 wakaba 1.1 }
2020 wakaba 1.18 } elsif ($self->{state} eq 'before DOCTYPE system identifier') {
2021     if ({
2022     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2023     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2024     }->{$self->{next_input_character}}) {
2025 wakaba 1.1 ## Stay in the state
2026    
2027     if (@{$self->{char}}) {
2028     $self->{next_input_character} = shift @{$self->{char}};
2029     } else {
2030     $self->{set_next_input_character}->($self);
2031     }
2032    
2033     redo A;
2034 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
2035     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2036     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
2037    
2038     if (@{$self->{char}}) {
2039     $self->{next_input_character} = shift @{$self->{char}};
2040     } else {
2041     $self->{set_next_input_character}->($self);
2042     }
2043    
2044     redo A;
2045     } elsif ($self->{next_input_character} == 0x0027) { # '
2046     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2047     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
2048    
2049     if (@{$self->{char}}) {
2050     $self->{next_input_character} = shift @{$self->{char}};
2051     } else {
2052     $self->{set_next_input_character}->($self);
2053     }
2054    
2055     redo A;
2056 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
2057 wakaba 1.18 $self->{parse_error}-> (type => 'no SYSTEM literal');
2058 wakaba 1.1 $self->{state} = 'data';
2059    
2060     if (@{$self->{char}}) {
2061     $self->{next_input_character} = shift @{$self->{char}};
2062     } else {
2063     $self->{set_next_input_character}->($self);
2064     }
2065    
2066    
2067 wakaba 1.18 delete $self->{current_token}->{correct};
2068 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2069     undef $self->{current_token};
2070    
2071     redo A;
2072     } elsif ($self->{next_input_character} == -1) {
2073 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2074 wakaba 1.18
2075     $self->{state} = 'data';
2076     ## recomsume
2077    
2078     delete $self->{current_token}->{correct};
2079     return ($self->{current_token}); # DOCTYPE
2080     undef $self->{current_token};
2081    
2082     redo A;
2083     } else {
2084     $self->{parse_error}-> (type => 'string after PUBLIC literal');
2085     $self->{state} = 'bogus DOCTYPE';
2086    
2087     if (@{$self->{char}}) {
2088     $self->{next_input_character} = shift @{$self->{char}};
2089     } else {
2090     $self->{set_next_input_character}->($self);
2091     }
2092    
2093     redo A;
2094     }
2095     } elsif ($self->{state} eq 'DOCTYPE system identifier (double-quoted)') {
2096     if ($self->{next_input_character} == 0x0022) { # "
2097     $self->{state} = 'after DOCTYPE system identifier';
2098    
2099     if (@{$self->{char}}) {
2100     $self->{next_input_character} = shift @{$self->{char}};
2101     } else {
2102     $self->{set_next_input_character}->($self);
2103     }
2104    
2105     redo A;
2106     } elsif ($self->{next_input_character} == -1) {
2107     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2108    
2109 wakaba 1.1 $self->{state} = 'data';
2110     ## reconsume
2111    
2112 wakaba 1.18 delete $self->{current_token}->{correct};
2113 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2114     undef $self->{current_token};
2115    
2116     redo A;
2117     } else {
2118 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2119     .= chr $self->{next_input_character};
2120     ## Stay in the state
2121    
2122     if (@{$self->{char}}) {
2123     $self->{next_input_character} = shift @{$self->{char}};
2124     } else {
2125     $self->{set_next_input_character}->($self);
2126     }
2127    
2128     redo A;
2129     }
2130     } elsif ($self->{state} eq 'DOCTYPE system identifier (single-quoted)') {
2131     if ($self->{next_input_character} == 0x0027) { # '
2132     $self->{state} = 'after DOCTYPE system identifier';
2133    
2134     if (@{$self->{char}}) {
2135     $self->{next_input_character} = shift @{$self->{char}};
2136     } else {
2137     $self->{set_next_input_character}->($self);
2138     }
2139    
2140     redo A;
2141     } elsif ($self->{next_input_character} == -1) {
2142     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2143    
2144     $self->{state} = 'data';
2145     ## reconsume
2146    
2147     delete $self->{current_token}->{correct};
2148     return ($self->{current_token}); # DOCTYPE
2149     undef $self->{current_token};
2150    
2151     redo A;
2152     } else {
2153     $self->{current_token}->{system_identifier} # DOCTYPE
2154     .= chr $self->{next_input_character};
2155     ## Stay in the state
2156    
2157     if (@{$self->{char}}) {
2158     $self->{next_input_character} = shift @{$self->{char}};
2159     } else {
2160     $self->{set_next_input_character}->($self);
2161     }
2162    
2163     redo A;
2164     }
2165     } elsif ($self->{state} eq 'after DOCTYPE system identifier') {
2166     if ({
2167     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2168     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2169     }->{$self->{next_input_character}}) {
2170     ## Stay in the state
2171    
2172     if (@{$self->{char}}) {
2173     $self->{next_input_character} = shift @{$self->{char}};
2174     } else {
2175     $self->{set_next_input_character}->($self);
2176     }
2177    
2178     redo A;
2179     } elsif ($self->{next_input_character} == 0x003E) { # >
2180     $self->{state} = 'data';
2181    
2182     if (@{$self->{char}}) {
2183     $self->{next_input_character} = shift @{$self->{char}};
2184     } else {
2185     $self->{set_next_input_character}->($self);
2186     }
2187    
2188    
2189     return ($self->{current_token}); # DOCTYPE
2190     undef $self->{current_token};
2191    
2192     redo A;
2193     } elsif ($self->{next_input_character} == -1) {
2194     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2195    
2196     $self->{state} = 'data';
2197     ## recomsume
2198    
2199     delete $self->{current_token}->{correct};
2200     return ($self->{current_token}); # DOCTYPE
2201     undef $self->{current_token};
2202    
2203     redo A;
2204     } else {
2205     $self->{parse_error}-> (type => 'string after SYSTEM literal');
2206 wakaba 1.1 $self->{state} = 'bogus DOCTYPE';
2207    
2208     if (@{$self->{char}}) {
2209     $self->{next_input_character} = shift @{$self->{char}};
2210     } else {
2211     $self->{set_next_input_character}->($self);
2212     }
2213    
2214     redo A;
2215     }
2216     } elsif ($self->{state} eq 'bogus DOCTYPE') {
2217     if ($self->{next_input_character} == 0x003E) { # >
2218     $self->{state} = 'data';
2219    
2220     if (@{$self->{char}}) {
2221     $self->{next_input_character} = shift @{$self->{char}};
2222     } else {
2223     $self->{set_next_input_character}->($self);
2224     }
2225    
2226    
2227 wakaba 1.18 delete $self->{current_token}->{correct};
2228 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2229     undef $self->{current_token};
2230    
2231     redo A;
2232     } elsif ($self->{next_input_character} == -1) {
2233 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2234 wakaba 1.1 $self->{state} = 'data';
2235     ## reconsume
2236    
2237 wakaba 1.18 delete $self->{current_token}->{correct};
2238 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2239     undef $self->{current_token};
2240    
2241     redo A;
2242     } else {
2243     ## Stay in the state
2244    
2245     if (@{$self->{char}}) {
2246     $self->{next_input_character} = shift @{$self->{char}};
2247     } else {
2248     $self->{set_next_input_character}->($self);
2249     }
2250    
2251     redo A;
2252     }
2253     } else {
2254     die "$0: $self->{state}: Unknown state";
2255     }
2256     } # A
2257    
2258     die "$0: _get_next_token: unexpected case";
2259     } # _get_next_token
2260    
2261     sub _tokenize_attempt_to_consume_an_entity ($) {
2262     my $self = shift;
2263 wakaba 1.20
2264     if ({
2265     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2266     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2267     }->{$self->{next_input_character}}) {
2268     ## Don't consume
2269     ## No error
2270     return undef;
2271     } elsif ($self->{next_input_character} == 0x0023) { # #
2272 wakaba 1.1
2273     if (@{$self->{char}}) {
2274     $self->{next_input_character} = shift @{$self->{char}};
2275     } else {
2276     $self->{set_next_input_character}->($self);
2277     }
2278    
2279     if ($self->{next_input_character} == 0x0078 or # x
2280     $self->{next_input_character} == 0x0058) { # X
2281 wakaba 1.4 my $num;
2282 wakaba 1.1 X: {
2283     my $x_char = $self->{next_input_character};
2284    
2285     if (@{$self->{char}}) {
2286     $self->{next_input_character} = shift @{$self->{char}};
2287     } else {
2288     $self->{set_next_input_character}->($self);
2289     }
2290    
2291     if (0x0030 <= $self->{next_input_character} and
2292     $self->{next_input_character} <= 0x0039) { # 0..9
2293     $num ||= 0;
2294     $num *= 0x10;
2295     $num += $self->{next_input_character} - 0x0030;
2296     redo X;
2297     } elsif (0x0061 <= $self->{next_input_character} and
2298     $self->{next_input_character} <= 0x0066) { # a..f
2299     ## ISSUE: the spec says U+0078, which is apparently incorrect
2300     $num ||= 0;
2301     $num *= 0x10;
2302     $num += $self->{next_input_character} - 0x0060 + 9;
2303     redo X;
2304     } elsif (0x0041 <= $self->{next_input_character} and
2305     $self->{next_input_character} <= 0x0046) { # A..F
2306     ## ISSUE: the spec says U+0058, which is apparently incorrect
2307     $num ||= 0;
2308     $num *= 0x10;
2309     $num += $self->{next_input_character} - 0x0040 + 9;
2310     redo X;
2311     } elsif (not defined $num) { # no hexadecimal digit
2312 wakaba 1.3 $self->{parse_error}-> (type => 'bare hcro');
2313 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
2314     unshift @{$self->{char}}, ($x_char);
2315     return undef;
2316     } elsif ($self->{next_input_character} == 0x003B) { # ;
2317    
2318     if (@{$self->{char}}) {
2319     $self->{next_input_character} = shift @{$self->{char}};
2320     } else {
2321     $self->{set_next_input_character}->($self);
2322     }
2323    
2324     } else {
2325 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2326 wakaba 1.1 }
2327    
2328     ## TODO: check the definition for |a valid Unicode character|.
2329 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
2330 wakaba 1.1 if ($num > 1114111 or $num == 0) {
2331     $num = 0xFFFD; # REPLACEMENT CHARACTER
2332     ## ISSUE: Why this is not an error?
2333 wakaba 1.4 } elsif (0x80 <= $num and $num <= 0x9F) {
2334 wakaba 1.8 $self->{parse_error}-> (type => sprintf 'c1 entity:U+%04X', $num);
2335 wakaba 1.4 $num = $c1_entity_char->{$num};
2336 wakaba 1.1 }
2337    
2338     return {type => 'character', data => chr $num};
2339     } # X
2340     } elsif (0x0030 <= $self->{next_input_character} and
2341     $self->{next_input_character} <= 0x0039) { # 0..9
2342     my $code = $self->{next_input_character} - 0x0030;
2343    
2344     if (@{$self->{char}}) {
2345     $self->{next_input_character} = shift @{$self->{char}};
2346     } else {
2347     $self->{set_next_input_character}->($self);
2348     }
2349    
2350    
2351     while (0x0030 <= $self->{next_input_character} and
2352     $self->{next_input_character} <= 0x0039) { # 0..9
2353     $code *= 10;
2354     $code += $self->{next_input_character} - 0x0030;
2355    
2356    
2357     if (@{$self->{char}}) {
2358     $self->{next_input_character} = shift @{$self->{char}};
2359     } else {
2360     $self->{set_next_input_character}->($self);
2361     }
2362    
2363     }
2364    
2365     if ($self->{next_input_character} == 0x003B) { # ;
2366    
2367     if (@{$self->{char}}) {
2368     $self->{next_input_character} = shift @{$self->{char}};
2369     } else {
2370     $self->{set_next_input_character}->($self);
2371     }
2372    
2373     } else {
2374 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2375 wakaba 1.1 }
2376    
2377     ## TODO: check the definition for |a valid Unicode character|.
2378     if ($code > 1114111 or $code == 0) {
2379     $code = 0xFFFD; # REPLACEMENT CHARACTER
2380     ## ISSUE: Why this is not an error?
2381 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2382 wakaba 1.8 $self->{parse_error}-> (type => sprintf 'c1 entity:U+%04X', $code);
2383 wakaba 1.4 $code = $c1_entity_char->{$code};
2384 wakaba 1.1 }
2385    
2386     return {type => 'character', data => chr $code};
2387     } else {
2388 wakaba 1.3 $self->{parse_error}-> (type => 'bare nero');
2389 wakaba 1.1 unshift @{$self->{char}}, ($self->{next_input_character});
2390     $self->{next_input_character} = 0x0023; # #
2391     return undef;
2392     }
2393     } elsif ((0x0041 <= $self->{next_input_character} and
2394     $self->{next_input_character} <= 0x005A) or
2395     (0x0061 <= $self->{next_input_character} and
2396     $self->{next_input_character} <= 0x007A)) {
2397     my $entity_name = chr $self->{next_input_character};
2398    
2399     if (@{$self->{char}}) {
2400     $self->{next_input_character} = shift @{$self->{char}};
2401     } else {
2402     $self->{set_next_input_character}->($self);
2403     }
2404    
2405    
2406     my $value = $entity_name;
2407     my $match;
2408 wakaba 1.16 require Whatpm::_NamedEntityList;
2409     our $EntityChar;
2410 wakaba 1.1
2411     while (length $entity_name < 10 and
2412     ## NOTE: Some number greater than the maximum length of entity name
2413 wakaba 1.16 ((0x0041 <= $self->{next_input_character} and # a
2414     $self->{next_input_character} <= 0x005A) or # x
2415     (0x0061 <= $self->{next_input_character} and # a
2416     $self->{next_input_character} <= 0x007A) or # z
2417     (0x0030 <= $self->{next_input_character} and # 0
2418     $self->{next_input_character} <= 0x0039) or # 9
2419     $self->{next_input_character} == 0x003B)) { # ;
2420 wakaba 1.1 $entity_name .= chr $self->{next_input_character};
2421 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2422     $value = $EntityChar->{$entity_name};
2423     if ($self->{next_input_character} == 0x003B) { # ;
2424     $match = 1;
2425    
2426     if (@{$self->{char}}) {
2427     $self->{next_input_character} = shift @{$self->{char}};
2428     } else {
2429     $self->{set_next_input_character}->($self);
2430     }
2431    
2432     last;
2433     } else {
2434     $match = -1;
2435     }
2436 wakaba 1.1 } else {
2437     $value .= chr $self->{next_input_character};
2438     }
2439    
2440     if (@{$self->{char}}) {
2441     $self->{next_input_character} = shift @{$self->{char}};
2442     } else {
2443     $self->{set_next_input_character}->($self);
2444     }
2445    
2446     }
2447    
2448 wakaba 1.16 if ($match > 0) {
2449     return {type => 'character', data => $value};
2450     } elsif ($match < 0) {
2451     $self->{parse_error}-> (type => 'refc');
2452 wakaba 1.1 return {type => 'character', data => $value};
2453     } else {
2454 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2455 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
2456     unshift @{$self->{token}}, ({type => 'character', data => $value});
2457     return undef;
2458     }
2459     } else {
2460     ## no characters are consumed
2461 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2462 wakaba 1.1 return undef;
2463     }
2464     } # _tokenize_attempt_to_consume_an_entity
2465    
2466     sub _initialize_tree_constructor ($) {
2467     my $self = shift;
2468     ## NOTE: $self->{document} MUST be specified before this method is called
2469     $self->{document}->strict_error_checking (0);
2470     ## TODO: Turn mutation events off # MUST
2471     ## TODO: Turn loose Document option (manakai extension) on
2472 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2473 wakaba 1.1 } # _initialize_tree_constructor
2474    
2475     sub _terminate_tree_constructor ($) {
2476     my $self = shift;
2477     $self->{document}->strict_error_checking (1);
2478     ## TODO: Turn mutation events on
2479     } # _terminate_tree_constructor
2480    
2481     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2482    
2483 wakaba 1.3 { # tree construction stage
2484     my $token;
2485    
2486 wakaba 1.1 sub _construct_tree ($) {
2487     my ($self) = @_;
2488    
2489     ## When an interactive UA render the $self->{document} available
2490     ## to the user, or when it begin accepting user input, are
2491     ## not defined.
2492    
2493     ## Append a character: collect it and all subsequent consecutive
2494     ## characters and insert one Text node whose data is concatenation
2495     ## of all those characters. # MUST
2496    
2497     $token = $self->_get_next_token;
2498    
2499 wakaba 1.3 $self->{insertion_mode} = 'before head';
2500     undef $self->{form_element};
2501     undef $self->{head_element};
2502     $self->{open_elements} = [];
2503     undef $self->{inner_html_node};
2504    
2505     $self->_tree_construction_initial; # MUST
2506     $self->_tree_construction_root_element;
2507     $self->_tree_construction_main;
2508     } # _construct_tree
2509    
2510     sub _tree_construction_initial ($) {
2511     my $self = shift;
2512 wakaba 1.18 INITIAL: {
2513     if ($token->{type} eq 'DOCTYPE') {
2514     ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2515     ## error, switch to a conformance checking mode for another
2516     ## language.
2517     my $doctype_name = $token->{name};
2518     $doctype_name = '' unless defined $doctype_name;
2519     $doctype_name =~ tr/a-z/A-Z/;
2520     if (not defined $token->{name} or # <!DOCTYPE>
2521     defined $token->{public_identifier} or
2522     defined $token->{system_identifier}) {
2523     $self->{parse_error}-> (type => 'not HTML5');
2524     } elsif ($doctype_name ne 'HTML') {
2525     ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2526     $self->{parse_error}-> (type => 'not HTML5');
2527     }
2528    
2529     my $doctype = $self->{document}->create_document_type_definition
2530     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2531     $doctype->public_id ($token->{public_identifier})
2532     if defined $token->{public_identifier};
2533     $doctype->system_id ($token->{system_identifier})
2534     if defined $token->{system_identifier};
2535     ## NOTE: Other DocumentType attributes are null or empty lists.
2536     ## ISSUE: internalSubset = null??
2537     $self->{document}->append_child ($doctype);
2538    
2539     if (not $token->{correct} or $doctype_name ne 'HTML') {
2540     $self->{document}->manakai_compat_mode ('quirks');
2541     } elsif (defined $token->{public_identifier}) {
2542     my $pubid = $token->{public_identifier};
2543     $pubid =~ tr/a-z/A-z/;
2544     if ({
2545     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2546     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2547     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2548     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2549     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2550     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2551     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2552     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2553     "-//IETF//DTD HTML 2.0//EN" => 1,
2554     "-//IETF//DTD HTML 2.1E//EN" => 1,
2555     "-//IETF//DTD HTML 3.0//EN" => 1,
2556     "-//IETF//DTD HTML 3.0//EN//" => 1,
2557     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2558     "-//IETF//DTD HTML 3.2//EN" => 1,
2559     "-//IETF//DTD HTML 3//EN" => 1,
2560     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2561     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2562     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2563     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2564     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2565     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2566     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2567     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2568     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2569     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2570     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2571     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2572     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2573     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2574     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2575     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2576     "-//IETF//DTD HTML STRICT//EN" => 1,
2577     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2578     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2579     "-//IETF//DTD HTML//EN" => 1,
2580     "-//IETF//DTD HTML//EN//2.0" => 1,
2581     "-//IETF//DTD HTML//EN//3.0" => 1,
2582     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2583     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2584     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2585     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2586     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2587     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2588     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2589     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2590     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2591     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2592     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2593     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2594     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2595     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2596     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2597     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2598     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2599     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2600     "-//W3C//DTD HTML 3.2//EN" => 1,
2601     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2602     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2603     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2604     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2605     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2606     "-//W3C//DTD W3 HTML//EN" => 1,
2607     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2608     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2609     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2610     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2611     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2612     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2613     "HTML" => 1,
2614     }->{$pubid}) {
2615     $self->{document}->manakai_compat_mode ('quirks');
2616     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2617     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2618     if (defined $token->{system_identifier}) {
2619     $self->{document}->manakai_compat_mode ('quirks');
2620     } else {
2621     $self->{document}->manakai_compat_mode ('limited quirks');
2622 wakaba 1.3 }
2623 wakaba 1.18 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 Frameset//EN" or
2624     $pubid eq "-//W3C//DTD XHTML 1.0 Transitional//EN") {
2625     $self->{document}->manakai_compat_mode ('limited quirks');
2626     }
2627     }
2628     if (defined $token->{system_identifier}) {
2629     my $sysid = $token->{system_identifier};
2630     $sysid =~ tr/A-Z/a-z/;
2631     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2632     $self->{document}->manakai_compat_mode ('quirks');
2633     }
2634     }
2635    
2636     ## Go to the root element phase.
2637     $token = $self->_get_next_token;
2638     return;
2639     } elsif ({
2640     'start tag' => 1,
2641     'end tag' => 1,
2642     'end-of-file' => 1,
2643     }->{$token->{type}}) {
2644     $self->{parse_error}-> (type => 'no DOCTYPE');
2645     $self->{document}->manakai_compat_mode ('quirks');
2646     ## Go to the root element phase
2647     ## reprocess
2648     return;
2649     } elsif ($token->{type} eq 'character') {
2650     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2651     ## Ignore the token
2652     unless (length $token->{data}) {
2653     ## Stay in the phase
2654     $token = $self->_get_next_token;
2655     redo INITIAL;
2656 wakaba 1.3 }
2657     }
2658 wakaba 1.18
2659     $self->{parse_error}-> (type => 'no DOCTYPE');
2660     $self->{document}->manakai_compat_mode ('quirks');
2661     ## Go to the root element phase
2662     ## reprocess
2663     return;
2664     } elsif ($token->{type} eq 'comment') {
2665     my $comment = $self->{document}->create_comment ($token->{data});
2666     $self->{document}->append_child ($comment);
2667    
2668     ## Stay in the phase.
2669     $token = $self->_get_next_token;
2670     redo INITIAL;
2671     } else {
2672     die "$0: $token->{type}: Unknown token";
2673     }
2674     } # INITIAL
2675 wakaba 1.3 } # _tree_construction_initial
2676    
2677     sub _tree_construction_root_element ($) {
2678     my $self = shift;
2679    
2680     B: {
2681     if ($token->{type} eq 'DOCTYPE') {
2682     $self->{parse_error}-> (type => 'in html:#DOCTYPE');
2683     ## Ignore the token
2684     ## Stay in the phase
2685     $token = $self->_get_next_token;
2686     redo B;
2687     } elsif ($token->{type} eq 'comment') {
2688     my $comment = $self->{document}->create_comment ($token->{data});
2689     $self->{document}->append_child ($comment);
2690     ## Stay in the phase
2691     $token = $self->_get_next_token;
2692     redo B;
2693     } elsif ($token->{type} eq 'character') {
2694     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2695     $self->{document}->manakai_append_text ($1);
2696     ## ISSUE: DOM3 Core does not allow Document > Text
2697     unless (length $token->{data}) {
2698     ## Stay in the phase
2699     $token = $self->_get_next_token;
2700     redo B;
2701     }
2702     }
2703     #
2704     } elsif ({
2705     'start tag' => 1,
2706     'end tag' => 1,
2707     'end-of-file' => 1,
2708     }->{$token->{type}}) {
2709     ## ISSUE: There is an issue in the spec
2710     #
2711     } else {
2712     die "$0: $token->{type}: Unknown token";
2713     }
2714     my $root_element;
2715     $root_element = $self->{document}->create_element_ns
2716     (q<http://www.w3.org/1999/xhtml>, [undef, 'html']);
2717    
2718     $self->{document}->append_child ($root_element);
2719     push @{$self->{open_elements}}, [$root_element, 'html'];
2720     #$phase = 'main';
2721     ## reprocess
2722     #redo B;
2723     return;
2724     } # B
2725     } # _tree_construction_root_element
2726    
2727     sub _reset_insertion_mode ($) {
2728     my $self = shift;
2729    
2730     ## Step 1
2731     my $last;
2732    
2733     ## Step 2
2734     my $i = -1;
2735     my $node = $self->{open_elements}->[$i];
2736    
2737     ## Step 3
2738     S3: {
2739     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
2740     if (defined $self->{inner_html_node}) {
2741     if ($self->{inner_html_node}->[1] eq 'td' or
2742     $self->{inner_html_node}->[1] eq 'th') {
2743     #
2744     } else {
2745     $node = $self->{inner_html_node};
2746     }
2747     }
2748    
2749     ## Step 4..13
2750     my $new_mode = {
2751     select => 'in select',
2752     td => 'in cell',
2753     th => 'in cell',
2754     tr => 'in row',
2755     tbody => 'in table body',
2756     thead => 'in table head',
2757     tfoot => 'in table foot',
2758     caption => 'in caption',
2759     colgroup => 'in column group',
2760     table => 'in table',
2761     head => 'in body', # not in head!
2762     body => 'in body',
2763     frameset => 'in frameset',
2764     }->{$node->[1]};
2765     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2766    
2767     ## Step 14
2768     if ($node->[1] eq 'html') {
2769     unless (defined $self->{head_element}) {
2770     $self->{insertion_mode} = 'before head';
2771     } else {
2772     $self->{insertion_mode} = 'after head';
2773     }
2774     return;
2775     }
2776    
2777     ## Step 15
2778     $self->{insertion_mode} = 'in body' and return if $last;
2779    
2780     ## Step 16
2781     $i--;
2782     $node = $self->{open_elements}->[$i];
2783    
2784     ## Step 17
2785     redo S3;
2786     } # S3
2787     } # _reset_insertion_mode
2788    
2789     sub _tree_construction_main ($) {
2790     my $self = shift;
2791    
2792     my $phase = 'main';
2793 wakaba 1.1
2794     my $active_formatting_elements = [];
2795    
2796     my $reconstruct_active_formatting_elements = sub { # MUST
2797     my $insert = shift;
2798    
2799     ## Step 1
2800     return unless @$active_formatting_elements;
2801    
2802     ## Step 3
2803     my $i = -1;
2804     my $entry = $active_formatting_elements->[$i];
2805    
2806     ## Step 2
2807     return if $entry->[0] eq '#marker';
2808 wakaba 1.3 for (@{$self->{open_elements}}) {
2809 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2810     return;
2811     }
2812     }
2813    
2814     S4: {
2815     ## Step 4
2816     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2817    
2818     ## Step 5
2819     $i--;
2820     $entry = $active_formatting_elements->[$i];
2821    
2822     ## Step 6
2823     if ($entry->[0] eq '#marker') {
2824     #
2825     } else {
2826     my $in_open_elements;
2827 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
2828 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2829     $in_open_elements = 1;
2830     last OE;
2831     }
2832     }
2833     if ($in_open_elements) {
2834     #
2835     } else {
2836     redo S4;
2837     }
2838     }
2839    
2840     ## Step 7
2841     $i++;
2842     $entry = $active_formatting_elements->[$i];
2843     } # S4
2844    
2845     S7: {
2846     ## Step 8
2847     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2848    
2849     ## Step 9
2850     $insert->($clone->[0]);
2851 wakaba 1.3 push @{$self->{open_elements}}, $clone;
2852 wakaba 1.1
2853     ## Step 10
2854 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2855 wakaba 1.1
2856     ## Step 11
2857     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2858     ## Step 7'
2859     $i++;
2860     $entry = $active_formatting_elements->[$i];
2861    
2862     redo S7;
2863     }
2864     } # S7
2865     }; # $reconstruct_active_formatting_elements
2866    
2867     my $clear_up_to_marker = sub {
2868     for (reverse 0..$#$active_formatting_elements) {
2869     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2870     splice @$active_formatting_elements, $_;
2871     return;
2872     }
2873     }
2874     }; # $clear_up_to_marker
2875    
2876     my $style_start_tag = sub {
2877     my $style_el;
2878     $style_el = $self->{document}->create_element_ns
2879     (q<http://www.w3.org/1999/xhtml>, [undef, 'style']);
2880    
2881 wakaba 1.6 for my $attr_name (keys %{ $token->{attributes}}) {
2882     $style_el->set_attribute_ns (undef, [undef, $attr_name],
2883     $token->{attributes} ->{$attr_name}->{value});
2884     }
2885    
2886 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
2887     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
2888     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2889 wakaba 1.1 ->append_child ($style_el);
2890     $self->{content_model_flag} = 'CDATA';
2891 wakaba 1.13 delete $self->{escape}; # MUST
2892 wakaba 1.1
2893     my $text = '';
2894     $token = $self->_get_next_token;
2895     while ($token->{type} eq 'character') {
2896     $text .= $token->{data};
2897     $token = $self->_get_next_token;
2898     } # stop if non-character token or tokenizer stops tokenising
2899     if (length $text) {
2900     $style_el->manakai_append_text ($text);
2901     }
2902    
2903     $self->{content_model_flag} = 'PCDATA';
2904    
2905     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
2906     ## Ignore the token
2907     } else {
2908 wakaba 1.3 $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
2909 wakaba 1.1 ## ISSUE: And ignore?
2910     }
2911     $token = $self->_get_next_token;
2912     }; # $style_start_tag
2913    
2914     my $script_start_tag = sub {
2915     my $script_el;
2916    
2917     $script_el = $self->{document}->create_element_ns
2918     (q<http://www.w3.org/1999/xhtml>, [undef, 'script']);
2919    
2920     for my $attr_name (keys %{ $token->{attributes}}) {
2921     $script_el->set_attribute_ns (undef, [undef, $attr_name],
2922     $token->{attributes} ->{$attr_name}->{value});
2923     }
2924    
2925     ## TODO: mark as "parser-inserted"
2926    
2927     $self->{content_model_flag} = 'CDATA';
2928 wakaba 1.13 delete $self->{escape}; # MUST
2929 wakaba 1.1
2930     my $text = '';
2931     $token = $self->_get_next_token;
2932     while ($token->{type} eq 'character') {
2933     $text .= $token->{data};
2934     $token = $self->_get_next_token;
2935     } # stop if non-character token or tokenizer stops tokenising
2936     if (length $text) {
2937     $script_el->manakai_append_text ($text);
2938     }
2939    
2940     $self->{content_model_flag} = 'PCDATA';
2941    
2942     if ($token->{type} eq 'end tag' and
2943     $token->{tag_name} eq 'script') {
2944     ## Ignore the token
2945     } else {
2946 wakaba 1.3 $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
2947 wakaba 1.1 ## ISSUE: And ignore?
2948     ## TODO: mark as "already executed"
2949     }
2950    
2951 wakaba 1.3 if (defined $self->{inner_html_node}) {
2952     ## TODO: mark as "already executed"
2953     } else {
2954 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
2955     ## TODO: insertion point = just before the next input character
2956    
2957 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
2958     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
2959 wakaba 1.1
2960     ## TODO: insertion point = $old_insertion_point (might be "undefined")
2961    
2962     ## TODO: if there is a script that will execute as soon as the parser resume, then...
2963     }
2964    
2965     $token = $self->_get_next_token;
2966     }; # $script_start_tag
2967    
2968     my $formatting_end_tag = sub {
2969     my $tag_name = shift;
2970    
2971     FET: {
2972     ## Step 1
2973     my $formatting_element;
2974     my $formatting_element_i_in_active;
2975     AFE: for (reverse 0..$#$active_formatting_elements) {
2976     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2977     $formatting_element = $active_formatting_elements->[$_];
2978     $formatting_element_i_in_active = $_;
2979     last AFE;
2980     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2981     last AFE;
2982     }
2983     } # AFE
2984     unless (defined $formatting_element) {
2985 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$tag_name);
2986 wakaba 1.1 ## Ignore the token
2987     $token = $self->_get_next_token;
2988     return;
2989     }
2990     ## has an element in scope
2991     my $in_scope = 1;
2992     my $formatting_element_i_in_open;
2993 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2994     my $node = $self->{open_elements}->[$_];
2995 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
2996     if ($in_scope) {
2997     $formatting_element_i_in_open = $_;
2998     last INSCOPE;
2999     } else { # in open elements but not in scope
3000 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3001 wakaba 1.1 ## Ignore the token
3002     $token = $self->_get_next_token;
3003     return;
3004     }
3005     } elsif ({
3006     table => 1, caption => 1, td => 1, th => 1,
3007     button => 1, marquee => 1, object => 1, html => 1,
3008     }->{$node->[1]}) {
3009     $in_scope = 0;
3010     }
3011     } # INSCOPE
3012     unless (defined $formatting_element_i_in_open) {
3013 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3014 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3015     $token = $self->_get_next_token; ## TODO: ok?
3016     return;
3017     }
3018 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3019 wakaba 1.4 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3020 wakaba 1.1 }
3021    
3022     ## Step 2
3023     my $furthest_block;
3024     my $furthest_block_i_in_open;
3025 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3026     my $node = $self->{open_elements}->[$_];
3027 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
3028     #not $phrasing_category->{$node->[1]} and
3029     ($special_category->{$node->[1]} or
3030     $scoping_category->{$node->[1]})) {
3031     $furthest_block = $node;
3032     $furthest_block_i_in_open = $_;
3033     } elsif ($node->[0] eq $formatting_element->[0]) {
3034     last OE;
3035     }
3036     } # OE
3037    
3038     ## Step 3
3039     unless (defined $furthest_block) { # MUST
3040 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3041 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3042     $token = $self->_get_next_token;
3043     return;
3044     }
3045    
3046     ## Step 4
3047 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3048 wakaba 1.1
3049     ## Step 5
3050     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3051     if (defined $furthest_block_parent) {
3052     $furthest_block_parent->remove_child ($furthest_block->[0]);
3053     }
3054    
3055     ## Step 6
3056     my $bookmark_prev_el
3057     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3058     ->[0];
3059    
3060     ## Step 7
3061     my $node = $furthest_block;
3062     my $node_i_in_open = $furthest_block_i_in_open;
3063     my $last_node = $furthest_block;
3064     S7: {
3065     ## Step 1
3066     $node_i_in_open--;
3067 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3068 wakaba 1.1
3069     ## Step 2
3070     my $node_i_in_active;
3071     S7S2: {
3072     for (reverse 0..$#$active_formatting_elements) {
3073     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3074     $node_i_in_active = $_;
3075     last S7S2;
3076     }
3077     }
3078 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3079 wakaba 1.1 redo S7;
3080     } # S7S2
3081    
3082     ## Step 3
3083     last S7 if $node->[0] eq $formatting_element->[0];
3084    
3085     ## Step 4
3086     if ($last_node->[0] eq $furthest_block->[0]) {
3087     $bookmark_prev_el = $node->[0];
3088     }
3089    
3090     ## Step 5
3091     if ($node->[0]->has_child_nodes ()) {
3092     my $clone = [$node->[0]->clone_node (0), $node->[1]];
3093     $active_formatting_elements->[$node_i_in_active] = $clone;
3094 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3095 wakaba 1.1 $node = $clone;
3096     }
3097    
3098     ## Step 6
3099     $node->[0]->append_child ($last_node->[0]);
3100    
3101     ## Step 7
3102     $last_node = $node;
3103    
3104     ## Step 8
3105     redo S7;
3106     } # S7
3107    
3108     ## Step 8
3109     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3110    
3111     ## Step 9
3112     my $clone = [$formatting_element->[0]->clone_node (0),
3113     $formatting_element->[1]];
3114    
3115     ## Step 10
3116     my @cn = @{$furthest_block->[0]->child_nodes};
3117     $clone->[0]->append_child ($_) for @cn;
3118    
3119     ## Step 11
3120     $furthest_block->[0]->append_child ($clone->[0]);
3121    
3122     ## Step 12
3123     my $i;
3124     AFE: for (reverse 0..$#$active_formatting_elements) {
3125     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3126     splice @$active_formatting_elements, $_, 1;
3127     $i-- and last AFE if defined $i;
3128     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3129     $i = $_;
3130     }
3131     } # AFE
3132     splice @$active_formatting_elements, $i + 1, 0, $clone;
3133    
3134     ## Step 13
3135     undef $i;
3136 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3137     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3138     splice @{$self->{open_elements}}, $_, 1;
3139 wakaba 1.1 $i-- and last OE if defined $i;
3140 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3141 wakaba 1.1 $i = $_;
3142     }
3143     } # OE
3144 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3145 wakaba 1.1
3146     ## Step 14
3147     redo FET;
3148     } # FET
3149     }; # $formatting_end_tag
3150    
3151     my $insert_to_current = sub {
3152 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
3153 wakaba 1.1 }; # $insert_to_current
3154    
3155     my $insert_to_foster = sub {
3156     my $child = shift;
3157     if ({
3158     table => 1, tbody => 1, tfoot => 1,
3159     thead => 1, tr => 1,
3160 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3161 wakaba 1.1 # MUST
3162     my $foster_parent_element;
3163     my $next_sibling;
3164 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3165     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3166     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3167 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3168     $foster_parent_element = $parent;
3169 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3170 wakaba 1.1 } else {
3171     $foster_parent_element
3172 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3173 wakaba 1.1 }
3174     last OE;
3175     }
3176     } # OE
3177 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3178 wakaba 1.1 unless defined $foster_parent_element;
3179     $foster_parent_element->insert_before
3180     ($child, $next_sibling);
3181     } else {
3182 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
3183 wakaba 1.1 }
3184     }; # $insert_to_foster
3185    
3186     my $in_body = sub {
3187     my $insert = shift;
3188     if ($token->{type} eq 'start tag') {
3189     if ($token->{tag_name} eq 'script') {
3190     $script_start_tag->();
3191     return;
3192     } elsif ($token->{tag_name} eq 'style') {
3193     $style_start_tag->();
3194     return;
3195     } elsif ({
3196     base => 1, link => 1, meta => 1,
3197     }->{$token->{tag_name}}) {
3198 wakaba 1.3 $self->{parse_error}-> (type => 'in body:'.$token->{tag_name});
3199 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
3200     my $el;
3201    
3202     $el = $self->{document}->create_element_ns
3203     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3204    
3205     for my $attr_name (keys %{ $token->{attributes}}) {
3206     $el->set_attribute_ns (undef, [undef, $attr_name],
3207     $token->{attributes} ->{$attr_name}->{value});
3208     }
3209    
3210 wakaba 1.3 if (defined $self->{head_element}) {
3211     $self->{head_element}->append_child ($el);
3212 wakaba 1.1 } else {
3213     $insert->($el);
3214     }
3215    
3216     $token = $self->_get_next_token;
3217     return;
3218     } elsif ($token->{tag_name} eq 'title') {
3219 wakaba 1.3 $self->{parse_error}-> (type => 'in body:title');
3220 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
3221     my $title_el;
3222    
3223     $title_el = $self->{document}->create_element_ns
3224     (q<http://www.w3.org/1999/xhtml>, [undef, 'title']);
3225    
3226     for my $attr_name (keys %{ $token->{attributes}}) {
3227     $title_el->set_attribute_ns (undef, [undef, $attr_name],
3228     $token->{attributes} ->{$attr_name}->{value});
3229     }
3230    
3231 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3232 wakaba 1.1 ->append_child ($title_el);
3233     $self->{content_model_flag} = 'RCDATA';
3234 wakaba 1.13 delete $self->{escape}; # MUST
3235 wakaba 1.1
3236     my $text = '';
3237     $token = $self->_get_next_token;
3238     while ($token->{type} eq 'character') {
3239     $text .= $token->{data};
3240     $token = $self->_get_next_token;
3241     }
3242     if (length $text) {
3243     $title_el->manakai_append_text ($text);
3244     }
3245    
3246     $self->{content_model_flag} = 'PCDATA';
3247    
3248     if ($token->{type} eq 'end tag' and
3249     $token->{tag_name} eq 'title') {
3250     ## Ignore the token
3251     } else {
3252 wakaba 1.3 $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
3253 wakaba 1.1 ## ISSUE: And ignore?
3254     }
3255     $token = $self->_get_next_token;
3256     return;
3257     } elsif ($token->{tag_name} eq 'body') {
3258 wakaba 1.3 $self->{parse_error}-> (type => 'in body:body');
3259 wakaba 1.1
3260 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
3261     $self->{open_elements}->[1]->[1] ne 'body') {
3262 wakaba 1.1 ## Ignore the token
3263     } else {
3264 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
3265 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
3266     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
3267     $body_el->set_attribute_ns
3268     (undef, [undef, $attr_name],
3269     $token->{attributes}->{$attr_name}->{value});
3270     }
3271     }
3272     }
3273     $token = $self->_get_next_token;
3274     return;
3275     } elsif ({
3276     address => 1, blockquote => 1, center => 1, dir => 1,
3277     div => 1, dl => 1, fieldset => 1, listing => 1,
3278     menu => 1, ol => 1, p => 1, ul => 1,
3279     pre => 1,
3280     }->{$token->{tag_name}}) {
3281     ## has a p element in scope
3282 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3283 wakaba 1.1 if ($_->[1] eq 'p') {
3284     unshift @{$self->{token}}, $token;
3285     $token = {type => 'end tag', tag_name => 'p'};
3286     return;
3287     } elsif ({
3288     table => 1, caption => 1, td => 1, th => 1,
3289     button => 1, marquee => 1, object => 1, html => 1,
3290     }->{$_->[1]}) {
3291     last INSCOPE;
3292     }
3293     } # INSCOPE
3294    
3295    
3296     {
3297     my $el;
3298    
3299     $el = $self->{document}->create_element_ns
3300     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3301    
3302     for my $attr_name (keys %{ $token->{attributes}}) {
3303     $el->set_attribute_ns (undef, [undef, $attr_name],
3304     $token->{attributes} ->{$attr_name}->{value});
3305     }
3306    
3307     $insert->($el);
3308 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3309 wakaba 1.1 }
3310    
3311     if ($token->{tag_name} eq 'pre') {
3312     $token = $self->_get_next_token;
3313     if ($token->{type} eq 'character') {
3314     $token->{data} =~ s/^\x0A//;
3315     unless (length $token->{data}) {
3316     $token = $self->_get_next_token;
3317     }
3318     }
3319     } else {
3320     $token = $self->_get_next_token;
3321     }
3322     return;
3323     } elsif ($token->{tag_name} eq 'form') {
3324 wakaba 1.3 if (defined $self->{form_element}) {
3325     $self->{parse_error}-> (type => 'in form:form');
3326 wakaba 1.1 ## Ignore the token
3327 wakaba 1.7 $token = $self->_get_next_token;
3328     return;
3329 wakaba 1.1 } else {
3330     ## has a p element in scope
3331 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3332 wakaba 1.1 if ($_->[1] eq 'p') {
3333     unshift @{$self->{token}}, $token;
3334     $token = {type => 'end tag', tag_name => 'p'};
3335     return;
3336     } elsif ({
3337     table => 1, caption => 1, td => 1, th => 1,
3338     button => 1, marquee => 1, object => 1, html => 1,
3339     }->{$_->[1]}) {
3340     last INSCOPE;
3341     }
3342     } # INSCOPE
3343    
3344    
3345     {
3346     my $el;
3347    
3348     $el = $self->{document}->create_element_ns
3349     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3350    
3351     for my $attr_name (keys %{ $token->{attributes}}) {
3352     $el->set_attribute_ns (undef, [undef, $attr_name],
3353     $token->{attributes} ->{$attr_name}->{value});
3354     }
3355    
3356     $insert->($el);
3357 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3358 wakaba 1.1 }
3359    
3360 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
3361 wakaba 1.1 $token = $self->_get_next_token;
3362     return;
3363     }
3364     } elsif ($token->{tag_name} eq 'li') {
3365     ## has a p element in scope
3366 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3367 wakaba 1.1 if ($_->[1] eq 'p') {
3368     unshift @{$self->{token}}, $token;
3369     $token = {type => 'end tag', tag_name => 'p'};
3370     return;
3371     } elsif ({
3372     table => 1, caption => 1, td => 1, th => 1,
3373     button => 1, marquee => 1, object => 1, html => 1,
3374     }->{$_->[1]}) {
3375     last INSCOPE;
3376     }
3377     } # INSCOPE
3378    
3379     ## Step 1
3380     my $i = -1;
3381 wakaba 1.3 my $node = $self->{open_elements}->[$i];
3382 wakaba 1.1 LI: {
3383     ## Step 2
3384     if ($node->[1] eq 'li') {
3385 wakaba 1.8 if ($i != -1) {
3386     $self->{parse_error}-> (type => 'end tag missing:'.
3387     $self->{open_elements}->[-1]->[1]);
3388     ## TODO: test
3389     }
3390 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3391 wakaba 1.1 last LI;
3392     }
3393    
3394     ## Step 3
3395     if (not $formatting_category->{$node->[1]} and
3396     #not $phrasing_category->{$node->[1]} and
3397     ($special_category->{$node->[1]} or
3398     $scoping_category->{$node->[1]}) and
3399     $node->[1] ne 'address' and $node->[1] ne 'div') {
3400     last LI;
3401     }
3402    
3403     ## Step 4
3404     $i--;
3405 wakaba 1.3 $node = $self->{open_elements}->[$i];
3406 wakaba 1.1 redo LI;
3407     } # LI
3408    
3409    
3410     {
3411     my $el;
3412    
3413     $el = $self->{document}->create_element_ns
3414     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3415    
3416     for my $attr_name (keys %{ $token->{attributes}}) {
3417     $el->set_attribute_ns (undef, [undef, $attr_name],
3418     $token->{attributes} ->{$attr_name}->{value});
3419     }
3420    
3421     $insert->($el);
3422 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3423 wakaba 1.1 }
3424    
3425     $token = $self->_get_next_token;
3426     return;
3427     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
3428     ## has a p element in scope
3429 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3430 wakaba 1.1 if ($_->[1] eq 'p') {
3431     unshift @{$self->{token}}, $token;
3432     $token = {type => 'end tag', tag_name => 'p'};
3433     return;
3434     } elsif ({
3435     table => 1, caption => 1, td => 1, th => 1,
3436     button => 1, marquee => 1, object => 1, html => 1,
3437     }->{$_->[1]}) {
3438     last INSCOPE;
3439     }
3440     } # INSCOPE
3441    
3442     ## Step 1
3443     my $i = -1;
3444 wakaba 1.3 my $node = $self->{open_elements}->[$i];
3445 wakaba 1.1 LI: {
3446     ## Step 2
3447     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
3448 wakaba 1.8 if ($i != -1) {
3449     $self->{parse_error}-> (type => 'end tag missing:'.
3450     $self->{open_elements}->[-1]->[1]);
3451     ## TODO: test
3452     }
3453 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3454 wakaba 1.1 last LI;
3455     }
3456    
3457     ## Step 3
3458     if (not $formatting_category->{$node->[1]} and
3459     #not $phrasing_category->{$node->[1]} and
3460     ($special_category->{$node->[1]} or
3461     $scoping_category->{$node->[1]}) and
3462     $node->[1] ne 'address' and $node->[1] ne 'div') {
3463     last LI;
3464     }
3465    
3466     ## Step 4
3467     $i--;
3468 wakaba 1.3 $node = $self->{open_elements}->[$i];
3469 wakaba 1.1 redo LI;
3470     } # LI
3471    
3472    
3473     {
3474     my $el;
3475    
3476     $el = $self->{document}->create_element_ns
3477     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3478    
3479     for my $attr_name (keys %{ $token->{attributes}}) {
3480     $el->set_attribute_ns (undef, [undef, $attr_name],
3481     $token->{attributes} ->{$attr_name}->{value});
3482     }
3483    
3484     $insert->($el);
3485 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3486 wakaba 1.1 }
3487    
3488     $token = $self->_get_next_token;
3489     return;
3490     } elsif ($token->{tag_name} eq 'plaintext') {
3491     ## has a p element in scope
3492 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3493 wakaba 1.1 if ($_->[1] eq 'p') {
3494     unshift @{$self->{token}}, $token;
3495     $token = {type => 'end tag', tag_name => 'p'};
3496     return;
3497     } elsif ({
3498     table => 1, caption => 1, td => 1, th => 1,
3499     button => 1, marquee => 1, object => 1, html => 1,
3500     }->{$_->[1]}) {
3501     last INSCOPE;
3502     }
3503     } # INSCOPE
3504    
3505    
3506     {
3507     my $el;
3508    
3509     $el = $self->{document}->create_element_ns
3510     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3511    
3512     for my $attr_name (keys %{ $token->{attributes}}) {
3513     $el->set_attribute_ns (undef, [undef, $attr_name],
3514     $token->{attributes} ->{$attr_name}->{value});
3515     }
3516    
3517     $insert->($el);
3518 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3519 wakaba 1.1 }
3520    
3521    
3522     $self->{content_model_flag} = 'PLAINTEXT';
3523    
3524     $token = $self->_get_next_token;
3525     return;
3526     } elsif ({
3527     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3528     }->{$token->{tag_name}}) {
3529     ## has a p element in scope
3530 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3531     my $node = $self->{open_elements}->[$_];
3532 wakaba 1.1 if ($node->[1] eq 'p') {
3533     unshift @{$self->{token}}, $token;
3534     $token = {type => 'end tag', tag_name => 'p'};
3535     return;
3536     } elsif ({
3537     table => 1, caption => 1, td => 1, th => 1,
3538     button => 1, marquee => 1, object => 1, html => 1,
3539     }->{$node->[1]}) {
3540     last INSCOPE;
3541     }
3542     } # INSCOPE
3543    
3544     ## has an element in scope
3545     my $i;
3546 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3547     my $node = $self->{open_elements}->[$_];
3548 wakaba 1.1 if ({
3549     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3550     }->{$node->[1]}) {
3551     $i = $_;
3552     last INSCOPE;
3553     } elsif ({
3554     table => 1, caption => 1, td => 1, th => 1,
3555     button => 1, marquee => 1, object => 1, html => 1,
3556     }->{$node->[1]}) {
3557     last INSCOPE;
3558     }
3559     } # INSCOPE
3560    
3561     if (defined $i) {
3562 wakaba 1.3 $self->{parse_error}-> (type => 'in hn:hn');
3563     splice @{$self->{open_elements}}, $i;
3564 wakaba 1.1 }
3565    
3566    
3567     {
3568     my $el;
3569    
3570     $el = $self->{document}->create_element_ns
3571     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3572    
3573     for my $attr_name (keys %{ $token->{attributes}}) {
3574     $el->set_attribute_ns (undef, [undef, $attr_name],
3575     $token->{attributes} ->{$attr_name}->{value});
3576     }
3577    
3578     $insert->($el);
3579 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3580 wakaba 1.1 }
3581    
3582    
3583     $token = $self->_get_next_token;
3584     return;
3585     } elsif ($token->{tag_name} eq 'a') {
3586     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
3587     my $node = $active_formatting_elements->[$i];
3588     if ($node->[1] eq 'a') {
3589 wakaba 1.3 $self->{parse_error}-> (type => 'in a:a');
3590 wakaba 1.1
3591     unshift @{$self->{token}}, $token;
3592     $token = {type => 'end tag', tag_name => 'a'};
3593     $formatting_end_tag->($token->{tag_name});
3594    
3595     AFE2: for (reverse 0..$#$active_formatting_elements) {
3596     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3597     splice @$active_formatting_elements, $_, 1;
3598     last AFE2;
3599     }
3600     } # AFE2
3601 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3602     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
3603     splice @{$self->{open_elements}}, $_, 1;
3604 wakaba 1.1 last OE;
3605     }
3606     } # OE
3607     last AFE;
3608     } elsif ($node->[0] eq '#marker') {
3609     last AFE;
3610     }
3611     } # AFE
3612    
3613     $reconstruct_active_formatting_elements->($insert_to_current);
3614    
3615    
3616     {
3617     my $el;
3618    
3619     $el = $self->{document}->create_element_ns
3620     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3621    
3622     for my $attr_name (keys %{ $token->{attributes}}) {
3623     $el->set_attribute_ns (undef, [undef, $attr_name],
3624     $token->{attributes} ->{$attr_name}->{value});
3625     }
3626    
3627     $insert->($el);
3628 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3629 wakaba 1.1 }
3630    
3631 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
3632 wakaba 1.1
3633     $token = $self->_get_next_token;
3634     return;
3635     } elsif ({
3636     b => 1, big => 1, em => 1, font => 1, i => 1,
3637 wakaba 1.19 s => 1, small => 1, strile => 1,
3638 wakaba 1.1 strong => 1, tt => 1, u => 1,
3639     }->{$token->{tag_name}}) {
3640     $reconstruct_active_formatting_elements->($insert_to_current);
3641    
3642    
3643     {
3644     my $el;
3645    
3646     $el = $self->{document}->create_element_ns
3647     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3648    
3649     for my $attr_name (keys %{ $token->{attributes}}) {
3650     $el->set_attribute_ns (undef, [undef, $attr_name],
3651     $token->{attributes} ->{$attr_name}->{value});
3652     }
3653    
3654     $insert->($el);
3655 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3656 wakaba 1.1 }
3657    
3658 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
3659 wakaba 1.1
3660     $token = $self->_get_next_token;
3661     return;
3662 wakaba 1.19 } elsif ($token->{tag_name} eq 'nobr') {
3663     $reconstruct_active_formatting_elements->($insert_to_current);
3664    
3665     ## has a |nobr| element in scope
3666     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3667     my $node = $self->{open_elements}->[$_];
3668     if ($node->[1] eq 'nobr') {
3669     unshift @{$self->{token}}, $token;
3670     $token = {type => 'end tag', tag_name => 'nobr'};
3671     return;
3672     } elsif ({
3673     table => 1, caption => 1, td => 1, th => 1,
3674     button => 1, marquee => 1, object => 1, html => 1,
3675     }->{$node->[1]}) {
3676     last INSCOPE;
3677     }
3678     } # INSCOPE
3679    
3680    
3681     {
3682     my $el;
3683    
3684     $el = $self->{document}->create_element_ns
3685     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3686    
3687     for my $attr_name (keys %{ $token->{attributes}}) {
3688     $el->set_attribute_ns (undef, [undef, $attr_name],
3689     $token->{attributes} ->{$attr_name}->{value});
3690     }
3691    
3692     $insert->($el);
3693     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3694     }
3695    
3696     push @$active_formatting_elements, $self->{open_elements}->[-1];
3697    
3698     $token = $self->_get_next_token;
3699     return;
3700 wakaba 1.1 } elsif ($token->{tag_name} eq 'button') {
3701     ## has a button element in scope
3702 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3703     my $node = $self->{open_elements}->[$_];
3704 wakaba 1.1 if ($node->[1] eq 'button') {
3705 wakaba 1.3 $self->{parse_error}-> (type => 'in button:button');
3706 wakaba 1.1 unshift @{$self->{token}}, $token;
3707     $token = {type => 'end tag', tag_name => 'button'};
3708     return;
3709     } elsif ({
3710     table => 1, caption => 1, td => 1, th => 1,
3711     button => 1, marquee => 1, object => 1, html => 1,
3712     }->{$node->[1]}) {
3713     last INSCOPE;
3714     }
3715     } # INSCOPE
3716    
3717     $reconstruct_active_formatting_elements->($insert_to_current);
3718    
3719    
3720     {
3721     my $el;
3722    
3723     $el = $self->{document}->create_element_ns
3724     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3725    
3726     for my $attr_name (keys %{ $token->{attributes}}) {
3727     $el->set_attribute_ns (undef, [undef, $attr_name],
3728     $token->{attributes} ->{$attr_name}->{value});
3729     }
3730    
3731     $insert->($el);
3732 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3733 wakaba 1.1 }
3734    
3735     push @$active_formatting_elements, ['#marker', ''];
3736    
3737     $token = $self->_get_next_token;
3738     return;
3739     } elsif ($token->{tag_name} eq 'marquee' or
3740     $token->{tag_name} eq 'object') {
3741     $reconstruct_active_formatting_elements->($insert_to_current);
3742    
3743    
3744     {
3745     my $el;
3746    
3747     $el = $self->{document}->create_element_ns
3748     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3749    
3750     for my $attr_name (keys %{ $token->{attributes}}) {
3751     $el->set_attribute_ns (undef, [undef, $attr_name],
3752     $token->{attributes} ->{$attr_name}->{value});
3753     }
3754    
3755     $insert->($el);
3756 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3757 wakaba 1.1 }
3758    
3759     push @$active_formatting_elements, ['#marker', ''];
3760    
3761     $token = $self->_get_next_token;
3762     return;
3763     } elsif ($token->{tag_name} eq 'xmp') {
3764     $reconstruct_active_formatting_elements->($insert_to_current);
3765    
3766    
3767     {
3768     my $el;
3769    
3770     $el = $self->{document}->create_element_ns
3771     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3772    
3773     for my $attr_name (keys %{ $token->{attributes}}) {
3774     $el->set_attribute_ns (undef, [undef, $attr_name],
3775     $token->{attributes} ->{$attr_name}->{value});
3776     }
3777    
3778     $insert->($el);
3779 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3780 wakaba 1.1 }
3781    
3782    
3783     $self->{content_model_flag} = 'CDATA';
3784 wakaba 1.13 delete $self->{escape}; # MUST
3785 wakaba 1.1
3786     $token = $self->_get_next_token;
3787     return;
3788     } elsif ($token->{tag_name} eq 'table') {
3789     ## has a p element in scope
3790 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3791 wakaba 1.1 if ($_->[1] eq 'p') {
3792     unshift @{$self->{token}}, $token;
3793     $token = {type => 'end tag', tag_name => 'p'};
3794     return;
3795     } elsif ({
3796     table => 1, caption => 1, td => 1, th => 1,
3797     button => 1, marquee => 1, object => 1, html => 1,
3798     }->{$_->[1]}) {
3799     last INSCOPE;
3800     }
3801     } # INSCOPE
3802    
3803    
3804     {
3805     my $el;
3806    
3807     $el = $self->{document}->create_element_ns
3808     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3809    
3810     for my $attr_name (keys %{ $token->{attributes}}) {
3811     $el->set_attribute_ns (undef, [undef, $attr_name],
3812     $token->{attributes} ->{$attr_name}->{value});
3813     }
3814    
3815     $insert->($el);
3816 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3817 wakaba 1.1 }
3818    
3819    
3820 wakaba 1.3 $self->{insertion_mode} = 'in table';
3821 wakaba 1.1
3822     $token = $self->_get_next_token;
3823     return;
3824     } elsif ({
3825     area => 1, basefont => 1, bgsound => 1, br => 1,
3826     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
3827     image => 1,
3828     }->{$token->{tag_name}}) {
3829     if ($token->{tag_name} eq 'image') {
3830 wakaba 1.3 $self->{parse_error}-> (type => 'image');
3831 wakaba 1.1 $token->{tag_name} = 'img';
3832     }
3833    
3834     $reconstruct_active_formatting_elements->($insert_to_current);
3835    
3836    
3837     {
3838     my $el;
3839    
3840     $el = $self->{document}->create_element_ns
3841     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3842    
3843     for my $attr_name (keys %{ $token->{attributes}}) {
3844     $el->set_attribute_ns (undef, [undef, $attr_name],
3845     $token->{attributes} ->{$attr_name}->{value});
3846     }
3847    
3848     $insert->($el);
3849 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3850 wakaba 1.1 }
3851    
3852 wakaba 1.3 pop @{$self->{open_elements}};
3853 wakaba 1.1
3854     $token = $self->_get_next_token;
3855     return;
3856     } elsif ($token->{tag_name} eq 'hr') {
3857     ## has a p element in scope
3858 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3859 wakaba 1.1 if ($_->[1] eq 'p') {
3860     unshift @{$self->{token}}, $token;
3861     $token = {type => 'end tag', tag_name => 'p'};
3862     return;
3863     } elsif ({
3864     table => 1, caption => 1, td => 1, th => 1,
3865     button => 1, marquee => 1, object => 1, html => 1,
3866     }->{$_->[1]}) {
3867     last INSCOPE;
3868     }
3869     } # INSCOPE
3870    
3871    
3872     {
3873     my $el;
3874    
3875     $el = $self->{document}->create_element_ns
3876     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3877    
3878     for my $attr_name (keys %{ $token->{attributes}}) {
3879     $el->set_attribute_ns (undef, [undef, $attr_name],
3880     $token->{attributes} ->{$attr_name}->{value});
3881     }
3882    
3883     $insert->($el);
3884 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3885 wakaba 1.1 }
3886    
3887 wakaba 1.3 pop @{$self->{open_elements}};
3888 wakaba 1.1
3889     $token = $self->_get_next_token;
3890     return;
3891     } elsif ($token->{tag_name} eq 'input') {
3892     $reconstruct_active_formatting_elements->($insert_to_current);
3893    
3894    
3895     {
3896     my $el;
3897    
3898     $el = $self->{document}->create_element_ns
3899     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3900    
3901     for my $attr_name (keys %{ $token->{attributes}}) {
3902     $el->set_attribute_ns (undef, [undef, $attr_name],
3903     $token->{attributes} ->{$attr_name}->{value});
3904     }
3905    
3906     $insert->($el);
3907 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3908 wakaba 1.1 }
3909    
3910 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
3911     pop @{$self->{open_elements}};
3912 wakaba 1.1
3913     $token = $self->_get_next_token;
3914     return;
3915     } elsif ($token->{tag_name} eq 'isindex') {
3916 wakaba 1.3 $self->{parse_error}-> (type => 'isindex');
3917 wakaba 1.1
3918 wakaba 1.3 if (defined $self->{form_element}) {
3919 wakaba 1.1 ## Ignore the token
3920     $token = $self->_get_next_token;
3921     return;
3922     } else {
3923     my $at = $token->{attributes};
3924 wakaba 1.22 my $form_attrs;
3925     $form_attrs->{action} = $at->{action} if $at->{action};
3926     my $prompt_attr = $at->{prompt};
3927 wakaba 1.1 $at->{name} = {name => 'name', value => 'isindex'};
3928 wakaba 1.22 delete $at->{action};
3929     delete $at->{prompt};
3930 wakaba 1.1 my @tokens = (
3931 wakaba 1.22 {type => 'start tag', tag_name => 'form',
3932     attributes => $form_attrs},
3933 wakaba 1.1 {type => 'start tag', tag_name => 'hr'},
3934     {type => 'start tag', tag_name => 'p'},
3935     {type => 'start tag', tag_name => 'label'},
3936 wakaba 1.22 );
3937     if ($prompt_attr) {
3938     push @tokens, {type => 'character', data => $prompt_attr->{value}};
3939     } else {
3940     push @tokens, {type => 'character',
3941     data => 'This is a searchable index. Insert your search keywords here: '}; # SHOULD
3942     ## TODO: make this configurable
3943     }
3944     push @tokens,
3945 wakaba 1.1 {type => 'start tag', tag_name => 'input', attributes => $at},
3946     #{type => 'character', data => ''}, # SHOULD
3947     {type => 'end tag', tag_name => 'label'},
3948     {type => 'end tag', tag_name => 'p'},
3949     {type => 'start tag', tag_name => 'hr'},
3950 wakaba 1.22 {type => 'end tag', tag_name => 'form'};
3951 wakaba 1.1 $token = shift @tokens;
3952     unshift @{$self->{token}}, (@tokens);
3953     return;
3954     }
3955     } elsif ({
3956     textarea => 1,
3957 wakaba 1.5 iframe => 1,
3958 wakaba 1.1 noembed => 1,
3959     noframes => 1,
3960     noscript => 0, ## TODO: 1 if scripting is enabled
3961     }->{$token->{tag_name}}) {
3962     my $tag_name = $token->{tag_name};
3963     my $el;
3964    
3965     $el = $self->{document}->create_element_ns
3966     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3967    
3968     for my $attr_name (keys %{ $token->{attributes}}) {
3969     $el->set_attribute_ns (undef, [undef, $attr_name],
3970     $token->{attributes} ->{$attr_name}->{value});
3971     }
3972    
3973    
3974     if ($token->{tag_name} eq 'textarea') {
3975 wakaba 1.3 ## TODO: $self->{form_element} if defined
3976 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
3977     } else {
3978     $self->{content_model_flag} = 'CDATA';
3979     }
3980 wakaba 1.13 delete $self->{escape}; # MUST
3981 wakaba 1.1
3982     $insert->($el);
3983    
3984     my $text = '';
3985 wakaba 1.8 if ($token->{tag_name} eq 'textarea') {
3986     $token = $self->_get_next_token;
3987     if ($token->{type} eq 'character') {
3988     $token->{data} =~ s/^\x0A//;
3989     unless (length $token->{data}) {
3990     $token = $self->_get_next_token;
3991     }
3992     }
3993     } else {
3994     $token = $self->_get_next_token;
3995     }
3996 wakaba 1.1 while ($token->{type} eq 'character') {
3997     $text .= $token->{data};
3998     $token = $self->_get_next_token;
3999     }
4000     if (length $text) {
4001     $el->manakai_append_text ($text);
4002     }
4003    
4004     $self->{content_model_flag} = 'PCDATA';
4005    
4006     if ($token->{type} eq 'end tag' and
4007     $token->{tag_name} eq $tag_name) {
4008     ## Ignore the token
4009     } else {
4010 wakaba 1.10 if ($token->{tag_name} eq 'textarea') {
4011     $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
4012     } else {
4013 wakaba 1.3 $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
4014     }
4015 wakaba 1.1 ## ISSUE: And ignore?
4016     }
4017     $token = $self->_get_next_token;
4018     return;
4019     } elsif ($token->{tag_name} eq 'select') {
4020     $reconstruct_active_formatting_elements->($insert_to_current);
4021    
4022    
4023     {
4024     my $el;
4025    
4026     $el = $self->{document}->create_element_ns
4027     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4028    
4029     for my $attr_name (keys %{ $token->{attributes}}) {
4030     $el->set_attribute_ns (undef, [undef, $attr_name],
4031     $token->{attributes} ->{$attr_name}->{value});
4032     }
4033    
4034     $insert->($el);
4035 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4036 wakaba 1.1 }
4037    
4038    
4039 wakaba 1.3 $self->{insertion_mode} = 'in select';
4040 wakaba 1.1 $token = $self->_get_next_token;
4041     return;
4042     } elsif ({
4043     caption => 1, col => 1, colgroup => 1, frame => 1,
4044     frameset => 1, head => 1, option => 1, optgroup => 1,
4045     tbody => 1, td => 1, tfoot => 1, th => 1,
4046     thead => 1, tr => 1,
4047     }->{$token->{tag_name}}) {
4048 wakaba 1.3 $self->{parse_error}-> (type => 'in body:'.$token->{tag_name});
4049 wakaba 1.1 ## Ignore the token
4050     $token = $self->_get_next_token;
4051     return;
4052    
4053     ## ISSUE: An issue on HTML5 new elements in the spec.
4054     } else {
4055     $reconstruct_active_formatting_elements->($insert_to_current);
4056    
4057    
4058     {
4059     my $el;
4060    
4061     $el = $self->{document}->create_element_ns
4062     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4063    
4064     for my $attr_name (keys %{ $token->{attributes}}) {
4065     $el->set_attribute_ns (undef, [undef, $attr_name],
4066     $token->{attributes} ->{$attr_name}->{value});
4067     }
4068    
4069     $insert->($el);
4070 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4071 wakaba 1.1 }
4072    
4073    
4074     $token = $self->_get_next_token;
4075     return;
4076     }
4077     } elsif ($token->{type} eq 'end tag') {
4078     if ($token->{tag_name} eq 'body') {
4079 wakaba 1.20 if (@{$self->{open_elements}} > 1 and
4080     $self->{open_elements}->[1]->[1] eq 'body') {
4081     for (@{$self->{open_elements}}) {
4082     unless ({
4083     dd => 1, dt => 1, li => 1, p => 1, td => 1,
4084     th => 1, tr => 1, body => 1, html => 1,
4085     }->{$_->[1]}) {
4086     $self->{parse_error}-> (type => 'not closed:'.$_->[1]);
4087     }
4088 wakaba 1.1 }
4089 wakaba 1.20
4090 wakaba 1.3 $self->{insertion_mode} = 'after body';
4091 wakaba 1.1 $token = $self->_get_next_token;
4092     return;
4093     } else {
4094 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4095 wakaba 1.1 ## Ignore the token
4096     $token = $self->_get_next_token;
4097     return;
4098     }
4099     } elsif ($token->{tag_name} eq 'html') {
4100 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
4101 wakaba 1.1 ## ISSUE: There is an issue in the spec.
4102 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
4103     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
4104 wakaba 1.1 }
4105 wakaba 1.3 $self->{insertion_mode} = 'after body';
4106 wakaba 1.1 ## reprocess
4107     return;
4108     } else {
4109 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4110 wakaba 1.1 ## Ignore the token
4111     $token = $self->_get_next_token;
4112     return;
4113     }
4114     } elsif ({
4115     address => 1, blockquote => 1, center => 1, dir => 1,
4116     div => 1, dl => 1, fieldset => 1, listing => 1,
4117     menu => 1, ol => 1, pre => 1, ul => 1,
4118     p => 1,
4119     dd => 1, dt => 1, li => 1,
4120     button => 1, marquee => 1, object => 1,
4121     }->{$token->{tag_name}}) {
4122     ## has an element in scope
4123     my $i;
4124 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4125     my $node = $self->{open_elements}->[$_];
4126 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4127     ## generate implied end tags
4128     if ({
4129     dd => ($token->{tag_name} ne 'dd'),
4130     dt => ($token->{tag_name} ne 'dt'),
4131     li => ($token->{tag_name} ne 'li'),
4132     p => ($token->{tag_name} ne 'p'),
4133     td => 1, th => 1, tr => 1,
4134 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4135 wakaba 1.1 unshift @{$self->{token}}, $token;
4136     $token = {type => 'end tag',
4137 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4138 wakaba 1.1 return;
4139     }
4140     $i = $_;
4141     last INSCOPE unless $token->{tag_name} eq 'p';
4142     } elsif ({
4143     table => 1, caption => 1, td => 1, th => 1,
4144     button => 1, marquee => 1, object => 1, html => 1,
4145     }->{$node->[1]}) {
4146     last INSCOPE;
4147     }
4148     } # INSCOPE
4149    
4150 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4151     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4152 wakaba 1.1 }
4153    
4154 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
4155 wakaba 1.1 $clear_up_to_marker->()
4156     if {
4157     button => 1, marquee => 1, object => 1,
4158     }->{$token->{tag_name}};
4159 wakaba 1.12 $token = $self->_get_next_token;
4160     return;
4161     } elsif ($token->{tag_name} eq 'form') {
4162     ## has an element in scope
4163     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4164     my $node = $self->{open_elements}->[$_];
4165     if ($node->[1] eq $token->{tag_name}) {
4166     ## generate implied end tags
4167     if ({
4168     dd => 1, dt => 1, li => 1, p => 1,
4169     td => 1, th => 1, tr => 1,
4170     }->{$self->{open_elements}->[-1]->[1]}) {
4171     unshift @{$self->{token}}, $token;
4172     $token = {type => 'end tag',
4173     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4174     return;
4175     }
4176     last INSCOPE;
4177     } elsif ({
4178     table => 1, caption => 1, td => 1, th => 1,
4179     button => 1, marquee => 1, object => 1, html => 1,
4180     }->{$node->[1]}) {
4181     last INSCOPE;
4182     }
4183     } # INSCOPE
4184    
4185     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
4186     pop @{$self->{open_elements}};
4187     } else {
4188     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4189     }
4190    
4191     undef $self->{form_element};
4192 wakaba 1.1 $token = $self->_get_next_token;
4193     return;
4194     } elsif ({
4195     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
4196     }->{$token->{tag_name}}) {
4197     ## has an element in scope
4198     my $i;
4199 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4200     my $node = $self->{open_elements}->[$_];
4201 wakaba 1.1 if ({
4202     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
4203     }->{$node->[1]}) {
4204     ## generate implied end tags
4205     if ({
4206     dd => 1, dt => 1, li => 1, p => 1,
4207     td => 1, th => 1, tr => 1,
4208 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4209 wakaba 1.1 unshift @{$self->{token}}, $token;
4210     $token = {type => 'end tag',
4211 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4212 wakaba 1.1 return;
4213     }
4214     $i = $_;
4215     last INSCOPE;
4216     } elsif ({
4217     table => 1, caption => 1, td => 1, th => 1,
4218     button => 1, marquee => 1, object => 1, html => 1,
4219     }->{$node->[1]}) {
4220     last INSCOPE;
4221     }
4222     } # INSCOPE
4223    
4224 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4225     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4226 wakaba 1.1 }
4227    
4228 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
4229 wakaba 1.1 $token = $self->_get_next_token;
4230     return;
4231     } elsif ({
4232     a => 1,
4233     b => 1, big => 1, em => 1, font => 1, i => 1,
4234     nobr => 1, s => 1, small => 1, strile => 1,
4235     strong => 1, tt => 1, u => 1,
4236     }->{$token->{tag_name}}) {
4237     $formatting_end_tag->($token->{tag_name});
4238 wakaba 1.8 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
4239 wakaba 1.1 return;
4240     } elsif ({
4241     caption => 1, col => 1, colgroup => 1, frame => 1,
4242     frameset => 1, head => 1, option => 1, optgroup => 1,
4243     tbody => 1, td => 1, tfoot => 1, th => 1,
4244     thead => 1, tr => 1,
4245     area => 1, basefont => 1, bgsound => 1, br => 1,
4246     embed => 1, hr => 1, iframe => 1, image => 1,
4247 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
4248 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
4249     table => 1, textarea => 1, wbr => 1,
4250     noscript => 0, ## TODO: if scripting is enabled
4251     }->{$token->{tag_name}}) {
4252 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4253 wakaba 1.1 ## Ignore the token
4254     $token = $self->_get_next_token;
4255     return;
4256    
4257     ## ISSUE: Issue on HTML5 new elements in spec
4258    
4259     } else {
4260     ## Step 1
4261     my $node_i = -1;
4262 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
4263 wakaba 1.1
4264     ## Step 2
4265     S2: {
4266     if ($node->[1] eq $token->{tag_name}) {
4267     ## Step 1
4268     ## generate implied end tags
4269     if ({
4270     dd => 1, dt => 1, li => 1, p => 1,
4271     td => 1, th => 1, tr => 1,
4272 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4273 wakaba 1.1 unshift @{$self->{token}}, $token;
4274     $token = {type => 'end tag',
4275 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4276 wakaba 1.1 return;
4277     }
4278    
4279     ## Step 2
4280 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
4281     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4282 wakaba 1.1 }
4283    
4284     ## Step 3
4285 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
4286    
4287     $token = $self->_get_next_token;
4288 wakaba 1.1 last S2;
4289     } else {
4290     ## Step 3
4291     if (not $formatting_category->{$node->[1]} and
4292     #not $phrasing_category->{$node->[1]} and
4293     ($special_category->{$node->[1]} or
4294     $scoping_category->{$node->[1]})) {
4295 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:'.$node->[1]);
4296 wakaba 1.1 ## Ignore the token
4297     $token = $self->_get_next_token;
4298     last S2;
4299     }
4300     }
4301    
4302     ## Step 4
4303     $node_i--;
4304 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
4305 wakaba 1.1
4306     ## Step 5;
4307     redo S2;
4308     } # S2
4309 wakaba 1.3 return;
4310 wakaba 1.1 }
4311     }
4312     }; # $in_body
4313    
4314     B: {
4315 wakaba 1.3 if ($phase eq 'main') {
4316 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
4317 wakaba 1.3 $self->{parse_error}-> (type => 'in html:#DOCTYPE');
4318 wakaba 1.1 ## Ignore the token
4319     ## Stay in the phase
4320     $token = $self->_get_next_token;
4321     redo B;
4322     } elsif ($token->{type} eq 'start tag' and
4323     $token->{tag_name} eq 'html') {
4324     ## TODO: unless it is the first start tag token, parse-error
4325 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
4326 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
4327     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
4328     $top_el->set_attribute_ns
4329     (undef, [undef, $attr_name],
4330     $token->{attributes}->{$attr_name}->{value});
4331     }
4332     }
4333     $token = $self->_get_next_token;
4334     redo B;
4335     } elsif ($token->{type} eq 'end-of-file') {
4336     ## Generate implied end tags
4337     if ({
4338     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
4339 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4340 wakaba 1.1 unshift @{$self->{token}}, $token;
4341 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
4342 wakaba 1.1 redo B;
4343     }
4344    
4345 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
4346     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
4347     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4348     } elsif (defined $self->{inner_html_node} and
4349     @{$self->{open_elements}} > 1 and
4350     $self->{open_elements}->[1]->[1] ne 'body') {
4351     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4352 wakaba 1.1 }
4353    
4354     ## Stop parsing
4355     last B;
4356    
4357     ## ISSUE: There is an issue in the spec.
4358     } else {
4359 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
4360 wakaba 1.1 if ($token->{type} eq 'character') {
4361     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4362 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4363 wakaba 1.1 unless (length $token->{data}) {
4364     $token = $self->_get_next_token;
4365     redo B;
4366     }
4367     }
4368     ## As if <head>
4369    
4370 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4371 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4372    
4373 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4374     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4375     $self->{insertion_mode} = 'in head';
4376 wakaba 1.1 ## reprocess
4377     redo B;
4378     } elsif ($token->{type} eq 'comment') {
4379     my $comment = $self->{document}->create_comment ($token->{data});
4380 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4381 wakaba 1.1 $token = $self->_get_next_token;
4382     redo B;
4383     } elsif ($token->{type} eq 'start tag') {
4384     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
4385    
4386 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4387 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4388    
4389     for my $attr_name (keys %{ $attr}) {
4390 wakaba 1.3 $self->{head_element}->set_attribute_ns (undef, [undef, $attr_name],
4391 wakaba 1.1 $attr ->{$attr_name}->{value});
4392     }
4393    
4394 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4395     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4396     $self->{insertion_mode} = 'in head';
4397 wakaba 1.1 if ($token->{tag_name} eq 'head') {
4398     $token = $self->_get_next_token;
4399     #} elsif ({
4400     # base => 1, link => 1, meta => 1,
4401     # script => 1, style => 1, title => 1,
4402     # }->{$token->{tag_name}}) {
4403     # ## reprocess
4404     } else {
4405     ## reprocess
4406     }
4407     redo B;
4408     } elsif ($token->{type} eq 'end tag') {
4409 wakaba 1.21 if ({head => 1, body => 1, html => 1}->{$token->{tag_name}}) {
4410 wakaba 1.1 ## As if <head>
4411    
4412 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4413 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4414    
4415 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4416     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4417     $self->{insertion_mode} = 'in head';
4418 wakaba 1.1 ## reprocess
4419     redo B;
4420     } else {
4421 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4422 wakaba 1.21 ## Ignore the token ## ISSUE: An issue in the spec.
4423 wakaba 1.1 $token = $self->_get_next_token;
4424     redo B;
4425     }
4426     } else {
4427     die "$0: $token->{type}: Unknown type";
4428     }
4429 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
4430 wakaba 1.1 if ($token->{type} eq 'character') {
4431     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4432 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4433 wakaba 1.1 unless (length $token->{data}) {
4434     $token = $self->_get_next_token;
4435     redo B;
4436     }
4437     }
4438    
4439     #
4440     } elsif ($token->{type} eq 'comment') {
4441     my $comment = $self->{document}->create_comment ($token->{data});
4442 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4443 wakaba 1.1 $token = $self->_get_next_token;
4444     redo B;
4445     } elsif ($token->{type} eq 'start tag') {
4446     if ($token->{tag_name} eq 'title') {
4447     ## NOTE: There is an "as if in head" code clone
4448     my $title_el;
4449    
4450     $title_el = $self->{document}->create_element_ns
4451     (q<http://www.w3.org/1999/xhtml>, [undef, 'title']);
4452    
4453     for my $attr_name (keys %{ $token->{attributes}}) {
4454     $title_el->set_attribute_ns (undef, [undef, $attr_name],
4455     $token->{attributes} ->{$attr_name}->{value});
4456     }
4457    
4458 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
4459 wakaba 1.1 ->append_child ($title_el);
4460     $self->{content_model_flag} = 'RCDATA';
4461 wakaba 1.13 delete $self->{escape}; # MUST
4462 wakaba 1.1
4463     my $text = '';
4464     $token = $self->_get_next_token;
4465     while ($token->{type} eq 'character') {
4466     $text .= $token->{data};
4467     $token = $self->_get_next_token;
4468     }
4469     if (length $text) {
4470     $title_el->manakai_append_text ($text);
4471     }
4472    
4473     $self->{content_model_flag} = 'PCDATA';
4474    
4475     if ($token->{type} eq 'end tag' and
4476     $token->{tag_name} eq 'title') {
4477     ## Ignore the token
4478     } else {
4479 wakaba 1.3 $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
4480 wakaba 1.1 ## ISSUE: And ignore?
4481     }
4482     $token = $self->_get_next_token;
4483     redo B;
4484     } elsif ($token->{tag_name} eq 'style') {
4485     $style_start_tag->();
4486     redo B;
4487     } elsif ($token->{tag_name} eq 'script') {
4488     $script_start_tag->();
4489     redo B;
4490     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
4491     ## NOTE: There are "as if in head" code clones
4492     my $el;
4493    
4494     $el = $self->{document}->create_element_ns
4495     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4496    
4497     for my $attr_name (keys %{ $token->{attributes}}) {
4498     $el->set_attribute_ns (undef, [undef, $attr_name],
4499     $token->{attributes} ->{$attr_name}->{value});
4500     }
4501    
4502 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
4503 wakaba 1.1 ->append_child ($el);
4504    
4505     $token = $self->_get_next_token;
4506     redo B;
4507     } elsif ($token->{tag_name} eq 'head') {
4508 wakaba 1.3 $self->{parse_error}-> (type => 'in head:head');
4509 wakaba 1.1 ## Ignore the token
4510     $token = $self->_get_next_token;
4511     redo B;
4512     } else {
4513     #
4514     }
4515     } elsif ($token->{type} eq 'end tag') {
4516     if ($token->{tag_name} eq 'head') {
4517 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
4518     pop @{$self->{open_elements}};
4519 wakaba 1.1 } else {
4520 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:head');
4521 wakaba 1.1 }
4522 wakaba 1.3 $self->{insertion_mode} = 'after head';
4523 wakaba 1.1 $token = $self->_get_next_token;
4524     redo B;
4525 wakaba 1.21 } elsif ($token->{tag_name} eq 'body' or
4526     $token->{tag_name} eq 'html') {
4527 wakaba 1.1 #
4528     } else {
4529 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4530 wakaba 1.1 ## Ignore the token
4531     $token = $self->_get_next_token;
4532     redo B;
4533     }
4534     } else {
4535     #
4536     }
4537    
4538 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
4539 wakaba 1.1 ## As if </head>
4540 wakaba 1.3 pop @{$self->{open_elements}};
4541 wakaba 1.1 }
4542 wakaba 1.3 $self->{insertion_mode} = 'after head';
4543 wakaba 1.1 ## reprocess
4544     redo B;
4545    
4546     ## ISSUE: An issue in the spec.
4547 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
4548 wakaba 1.1 if ($token->{type} eq 'character') {
4549     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4550 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4551 wakaba 1.1 unless (length $token->{data}) {
4552     $token = $self->_get_next_token;
4553     redo B;
4554     }
4555     }
4556    
4557     #
4558     } elsif ($token->{type} eq 'comment') {
4559     my $comment = $self->{document}->create_comment ($token->{data});
4560 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4561 wakaba 1.1 $token = $self->_get_next_token;
4562     redo B;
4563     } elsif ($token->{type} eq 'start tag') {
4564     if ($token->{tag_name} eq 'body') {
4565    
4566     {
4567     my $el;
4568    
4569     $el = $self->{document}->create_element_ns
4570     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
4571    
4572     for my $attr_name (keys %{ $token->{attributes}}) {
4573     $el->set_attribute_ns (undef, [undef, $attr_name],
4574     $token->{attributes} ->{$attr_name}->{value});
4575     }
4576    
4577 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4578     push @{$self->{open_elements}}, [$el, 'body'];
4579 wakaba 1.1 }
4580    
4581 wakaba 1.3 $self->{insertion_mode} = 'in body';
4582 wakaba 1.1 $token = $self->_get_next_token;
4583     redo B;
4584     } elsif ($token->{tag_name} eq 'frameset') {
4585    
4586     {
4587     my $el;
4588    
4589     $el = $self->{document}->create_element_ns
4590     (q<http://www.w3.org/1999/xhtml>, [undef, 'frameset']);
4591    
4592     for my $attr_name (keys %{ $token->{attributes}}) {
4593     $el->set_attribute_ns (undef, [undef, $attr_name],
4594     $token->{attributes} ->{$attr_name}->{value});
4595     }
4596    
4597 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4598     push @{$self->{open_elements}}, [$el, 'frameset'];
4599 wakaba 1.1 }
4600    
4601 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
4602 wakaba 1.1 $token = $self->_get_next_token;
4603     redo B;
4604     } elsif ({
4605     base => 1, link => 1, meta => 1,
4606 wakaba 1.3 script => 1, style => 1, title => 1,
4607 wakaba 1.1 }->{$token->{tag_name}}) {
4608 wakaba 1.3 $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4609     $self->{insertion_mode} = 'in head';
4610 wakaba 1.1 ## reprocess
4611     redo B;
4612     } else {
4613     #
4614     }
4615     } else {
4616     #
4617     }
4618    
4619     ## As if <body>
4620    
4621     {
4622     my $el;
4623    
4624     $el = $self->{document}->create_element_ns
4625     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
4626    
4627 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4628     push @{$self->{open_elements}}, [$el, 'body'];
4629 wakaba 1.1 }
4630    
4631 wakaba 1.3 $self->{insertion_mode} = 'in body';
4632 wakaba 1.1 ## reprocess
4633     redo B;
4634 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
4635 wakaba 1.1 if ($token->{type} eq 'character') {
4636     ## NOTE: There is a code clone of "character in body".
4637     $reconstruct_active_formatting_elements->($insert_to_current);
4638    
4639 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4640 wakaba 1.1
4641     $token = $self->_get_next_token;
4642     redo B;
4643     } elsif ($token->{type} eq 'comment') {
4644     ## NOTE: There is a code clone of "comment in body".
4645     my $comment = $self->{document}->create_comment ($token->{data});
4646 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4647 wakaba 1.1 $token = $self->_get_next_token;
4648     redo B;
4649     } else {
4650     $in_body->($insert_to_current);
4651     redo B;
4652     }
4653 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
4654 wakaba 1.1 if ($token->{type} eq 'character') {
4655     ## NOTE: There are "character in table" code clones.
4656     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4657 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4658 wakaba 1.1
4659     unless (length $token->{data}) {
4660     $token = $self->_get_next_token;
4661     redo B;
4662     }
4663     }
4664    
4665 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
4666    
4667 wakaba 1.1 ## As if in body, but insert into foster parent element
4668     ## ISSUE: Spec says that "whenever a node would be inserted
4669     ## into the current node" while characters might not be
4670     ## result in a new Text node.
4671     $reconstruct_active_formatting_elements->($insert_to_foster);
4672    
4673     if ({
4674     table => 1, tbody => 1, tfoot => 1,
4675     thead => 1, tr => 1,
4676 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4677 wakaba 1.1 # MUST
4678     my $foster_parent_element;
4679     my $next_sibling;
4680     my $prev_sibling;
4681 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4682     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4683     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4684 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4685     $foster_parent_element = $parent;
4686 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4687 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4688     } else {
4689 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4690 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4691     }
4692     last OE;
4693     }
4694     } # OE
4695 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4696 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4697     unless defined $foster_parent_element;
4698     if (defined $prev_sibling and
4699     $prev_sibling->node_type == 3) {
4700     $prev_sibling->manakai_append_text ($token->{data});
4701     } else {
4702     $foster_parent_element->insert_before
4703     ($self->{document}->create_text_node ($token->{data}),
4704     $next_sibling);
4705     }
4706     } else {
4707 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4708 wakaba 1.1 }
4709    
4710     $token = $self->_get_next_token;
4711     redo B;
4712     } elsif ($token->{type} eq 'comment') {
4713     my $comment = $self->{document}->create_comment ($token->{data});
4714 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4715 wakaba 1.1 $token = $self->_get_next_token;
4716     redo B;
4717     } elsif ($token->{type} eq 'start tag') {
4718     if ({
4719     caption => 1,
4720     colgroup => 1,
4721     tbody => 1, tfoot => 1, thead => 1,
4722     }->{$token->{tag_name}}) {
4723     ## Clear back to table context
4724 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
4725     $self->{open_elements}->[-1]->[1] ne 'html') {
4726     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4727     pop @{$self->{open_elements}};
4728 wakaba 1.1 }
4729    
4730     push @$active_formatting_elements, ['#marker', '']
4731     if $token->{tag_name} eq 'caption';
4732    
4733    
4734     {
4735     my $el;
4736    
4737     $el = $self->{document}->create_element_ns
4738     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4739    
4740     for my $attr_name (keys %{ $token->{attributes}}) {
4741     $el->set_attribute_ns (undef, [undef, $attr_name],
4742     $token->{attributes} ->{$attr_name}->{value});
4743     }
4744    
4745 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4746     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4747 wakaba 1.1 }
4748    
4749 wakaba 1.3 $self->{insertion_mode} = {
4750 wakaba 1.1 caption => 'in caption',
4751     colgroup => 'in column group',
4752     tbody => 'in table body',
4753     tfoot => 'in table body',
4754     thead => 'in table body',
4755     }->{$token->{tag_name}};
4756     $token = $self->_get_next_token;
4757     redo B;
4758     } elsif ({
4759     col => 1,
4760     td => 1, th => 1, tr => 1,
4761     }->{$token->{tag_name}}) {
4762     ## Clear back to table context
4763 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
4764     $self->{open_elements}->[-1]->[1] ne 'html') {
4765     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4766     pop @{$self->{open_elements}};
4767 wakaba 1.1 }
4768    
4769    
4770     {
4771     my $el;
4772    
4773     $el = $self->{document}->create_element_ns
4774     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name} eq 'col' ? 'colgroup' : 'tbody']);
4775    
4776 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4777     push @{$self->{open_elements}}, [$el, $token->{tag_name} eq 'col' ? 'colgroup' : 'tbody'];
4778 wakaba 1.1 }
4779    
4780 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
4781 wakaba 1.1 ? 'in column group' : 'in table body';
4782     ## reprocess
4783     redo B;
4784     } elsif ($token->{tag_name} eq 'table') {
4785     ## NOTE: There are code clones for this "table in table"
4786 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4787 wakaba 1.1
4788     ## As if </table>
4789     ## have a table element in table scope
4790     my $i;
4791 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4792     my $node = $self->{open_elements}->[$_];
4793 wakaba 1.1 if ($node->[1] eq 'table') {
4794     $i = $_;
4795     last INSCOPE;
4796     } elsif ({
4797     table => 1, html => 1,
4798     }->{$node->[1]}) {
4799     last INSCOPE;
4800     }
4801     } # INSCOPE
4802     unless (defined $i) {
4803 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
4804 wakaba 1.1 ## Ignore tokens </table><table>
4805     $token = $self->_get_next_token;
4806     redo B;
4807     }
4808    
4809     ## generate implied end tags
4810     if ({
4811     dd => 1, dt => 1, li => 1, p => 1,
4812     td => 1, th => 1, tr => 1,
4813 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4814 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
4815     $token = {type => 'end tag', tag_name => 'table'};
4816     unshift @{$self->{token}}, $token;
4817     $token = {type => 'end tag',
4818 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4819 wakaba 1.1 redo B;
4820     }
4821    
4822 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4823     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4824 wakaba 1.1 }
4825    
4826 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4827 wakaba 1.1
4828 wakaba 1.3 $self->_reset_insertion_mode;
4829 wakaba 1.1
4830     ## reprocess
4831     redo B;
4832     } else {
4833     #
4834     }
4835     } elsif ($token->{type} eq 'end tag') {
4836     if ($token->{tag_name} eq 'table') {
4837     ## have a table element in table scope
4838     my $i;
4839 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4840     my $node = $self->{open_elements}->[$_];
4841 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4842     $i = $_;
4843     last INSCOPE;
4844     } elsif ({
4845     table => 1, html => 1,
4846     }->{$node->[1]}) {
4847     last INSCOPE;
4848     }
4849     } # INSCOPE
4850     unless (defined $i) {
4851 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4852 wakaba 1.1 ## Ignore the token
4853     $token = $self->_get_next_token;
4854     redo B;
4855     }
4856    
4857     ## generate implied end tags
4858     if ({
4859     dd => 1, dt => 1, li => 1, p => 1,
4860     td => 1, th => 1, tr => 1,
4861 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4862 wakaba 1.1 unshift @{$self->{token}}, $token;
4863     $token = {type => 'end tag',
4864 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4865 wakaba 1.1 redo B;
4866     }
4867    
4868 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4869     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4870 wakaba 1.1 }
4871    
4872 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4873 wakaba 1.1
4874 wakaba 1.3 $self->_reset_insertion_mode;
4875 wakaba 1.1
4876     $token = $self->_get_next_token;
4877     redo B;
4878     } elsif ({
4879     body => 1, caption => 1, col => 1, colgroup => 1,
4880     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
4881     thead => 1, tr => 1,
4882     }->{$token->{tag_name}}) {
4883 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4884 wakaba 1.1 ## Ignore the token
4885     $token = $self->_get_next_token;
4886     redo B;
4887     } else {
4888     #
4889     }
4890     } else {
4891     #
4892     }
4893    
4894 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
4895 wakaba 1.1 $in_body->($insert_to_foster);
4896     redo B;
4897 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
4898 wakaba 1.1 if ($token->{type} eq 'character') {
4899     ## NOTE: This is a code clone of "character in body".
4900     $reconstruct_active_formatting_elements->($insert_to_current);
4901    
4902 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4903 wakaba 1.1
4904     $token = $self->_get_next_token;
4905     redo B;
4906     } elsif ($token->{type} eq 'comment') {
4907     ## NOTE: This is a code clone of "comment in body".
4908     my $comment = $self->{document}->create_comment ($token->{data});
4909 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4910 wakaba 1.1 $token = $self->_get_next_token;
4911     redo B;
4912     } elsif ($token->{type} eq 'start tag') {
4913     if ({
4914     caption => 1, col => 1, colgroup => 1, tbody => 1,
4915     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4916     }->{$token->{tag_name}}) {
4917 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:caption');
4918 wakaba 1.1
4919     ## As if </caption>
4920     ## have a table element in table scope
4921     my $i;
4922 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4923     my $node = $self->{open_elements}->[$_];
4924 wakaba 1.1 if ($node->[1] eq 'caption') {
4925     $i = $_;
4926     last INSCOPE;
4927     } elsif ({
4928     table => 1, html => 1,
4929     }->{$node->[1]}) {
4930     last INSCOPE;
4931     }
4932     } # INSCOPE
4933     unless (defined $i) {
4934 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:caption');
4935 wakaba 1.1 ## Ignore the token
4936     $token = $self->_get_next_token;
4937     redo B;
4938     }
4939    
4940     ## generate implied end tags
4941     if ({
4942     dd => 1, dt => 1, li => 1, p => 1,
4943     td => 1, th => 1, tr => 1,
4944 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4945 wakaba 1.1 unshift @{$self->{token}}, $token; # <?>
4946     $token = {type => 'end tag', tag_name => 'caption'};
4947     unshift @{$self->{token}}, $token;
4948     $token = {type => 'end tag',
4949 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4950 wakaba 1.1 redo B;
4951     }
4952    
4953 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4954     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4955 wakaba 1.1 }
4956    
4957 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4958 wakaba 1.1
4959     $clear_up_to_marker->();
4960    
4961 wakaba 1.3 $self->{insertion_mode} = 'in table';
4962 wakaba 1.1
4963     ## reprocess
4964     redo B;
4965     } else {
4966     #
4967     }
4968     } elsif ($token->{type} eq 'end tag') {
4969     if ($token->{tag_name} eq 'caption') {
4970     ## have a table element in table scope
4971     my $i;
4972 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4973     my $node = $self->{open_elements}->[$_];
4974 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4975     $i = $_;
4976     last INSCOPE;
4977     } elsif ({
4978     table => 1, html => 1,
4979     }->{$node->[1]}) {
4980     last INSCOPE;
4981     }
4982     } # INSCOPE
4983     unless (defined $i) {
4984 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4985 wakaba 1.1 ## Ignore the token
4986     $token = $self->_get_next_token;
4987     redo B;
4988     }
4989    
4990     ## generate implied end tags
4991     if ({
4992     dd => 1, dt => 1, li => 1, p => 1,
4993     td => 1, th => 1, tr => 1,
4994 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4995 wakaba 1.1 unshift @{$self->{token}}, $token;
4996     $token = {type => 'end tag',
4997 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4998 wakaba 1.1 redo B;
4999     }
5000    
5001 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
5002     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5003 wakaba 1.1 }
5004    
5005 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5006 wakaba 1.1
5007     $clear_up_to_marker->();
5008    
5009 wakaba 1.3 $self->{insertion_mode} = 'in table';
5010 wakaba 1.1
5011     $token = $self->_get_next_token;
5012     redo B;
5013     } elsif ($token->{tag_name} eq 'table') {
5014 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:caption');
5015 wakaba 1.1
5016     ## As if </caption>
5017     ## have a table element in table scope
5018     my $i;
5019 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5020     my $node = $self->{open_elements}->[$_];
5021 wakaba 1.1 if ($node->[1] eq 'caption') {
5022     $i = $_;
5023     last INSCOPE;
5024     } elsif ({
5025     table => 1, html => 1,
5026     }->{$node->[1]}) {
5027     last INSCOPE;
5028     }
5029     } # INSCOPE
5030     unless (defined $i) {
5031 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:caption');
5032 wakaba 1.1 ## Ignore the token
5033     $token = $self->_get_next_token;
5034     redo B;
5035     }
5036    
5037     ## generate implied end tags
5038     if ({
5039     dd => 1, dt => 1, li => 1, p => 1,
5040     td => 1, th => 1, tr => 1,
5041 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5042 wakaba 1.1 unshift @{$self->{token}}, $token; # </table>
5043     $token = {type => 'end tag', tag_name => 'caption'};
5044     unshift @{$self->{token}}, $token;
5045     $token = {type => 'end tag',
5046 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5047 wakaba 1.1 redo B;
5048     }
5049    
5050 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
5051     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5052 wakaba 1.1 }
5053    
5054 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5055 wakaba 1.1
5056     $clear_up_to_marker->();
5057    
5058 wakaba 1.3 $self->{insertion_mode} = 'in table';
5059 wakaba 1.1
5060     ## reprocess
5061     redo B;
5062     } elsif ({
5063     body => 1, col => 1, colgroup => 1,
5064     html => 1, tbody => 1, td => 1, tfoot => 1,
5065     th => 1, thead => 1, tr => 1,
5066     }->{$token->{tag_name}}) {
5067 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5068 wakaba 1.1 ## Ignore the token
5069     redo B;
5070     } else {
5071     #
5072     }
5073     } else {
5074     #
5075     }
5076    
5077     $in_body->($insert_to_current);
5078     redo B;
5079 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
5080 wakaba 1.1 if ($token->{type} eq 'character') {
5081     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5082 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5083 wakaba 1.1 unless (length $token->{data}) {
5084     $token = $self->_get_next_token;
5085     redo B;
5086     }
5087     }
5088    
5089     #
5090     } elsif ($token->{type} eq 'comment') {
5091     my $comment = $self->{document}->create_comment ($token->{data});
5092 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5093 wakaba 1.1 $token = $self->_get_next_token;
5094     redo B;
5095     } elsif ($token->{type} eq 'start tag') {
5096     if ($token->{tag_name} eq 'col') {
5097    
5098     {
5099     my $el;
5100    
5101     $el = $self->{document}->create_element_ns
5102     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5103    
5104     for my $attr_name (keys %{ $token->{attributes}}) {
5105     $el->set_attribute_ns (undef, [undef, $attr_name],
5106     $token->{attributes} ->{$attr_name}->{value});
5107     }
5108    
5109 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5110     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5111 wakaba 1.1 }
5112    
5113 wakaba 1.3 pop @{$self->{open_elements}};
5114 wakaba 1.1 $token = $self->_get_next_token;
5115     redo B;
5116     } else {
5117     #
5118     }
5119     } elsif ($token->{type} eq 'end tag') {
5120     if ($token->{tag_name} eq 'colgroup') {
5121 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
5122     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
5123 wakaba 1.1 ## Ignore the token
5124     $token = $self->_get_next_token;
5125     redo B;
5126     } else {
5127 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
5128     $self->{insertion_mode} = 'in table';
5129 wakaba 1.1 $token = $self->_get_next_token;
5130     redo B;
5131     }
5132     } elsif ($token->{tag_name} eq 'col') {
5133 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:col');
5134 wakaba 1.1 ## Ignore the token
5135     $token = $self->_get_next_token;
5136     redo B;
5137     } else {
5138     #
5139     }
5140     } else {
5141     #
5142     }
5143    
5144     ## As if </colgroup>
5145 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
5146     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
5147 wakaba 1.1 ## Ignore the token
5148     $token = $self->_get_next_token;
5149     redo B;
5150     } else {
5151 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
5152     $self->{insertion_mode} = 'in table';
5153 wakaba 1.1 ## reprocess
5154     redo B;
5155     }
5156 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
5157 wakaba 1.1 if ($token->{type} eq 'character') {
5158     ## NOTE: This is a "character in table" code clone.
5159     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5160 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5161 wakaba 1.1
5162     unless (length $token->{data}) {
5163     $token = $self->_get_next_token;
5164     redo B;
5165     }
5166     }
5167    
5168 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
5169    
5170 wakaba 1.1 ## As if in body, but insert into foster parent element
5171     ## ISSUE: Spec says that "whenever a node would be inserted
5172     ## into the current node" while characters might not be
5173     ## result in a new Text node.
5174     $reconstruct_active_formatting_elements->($insert_to_foster);
5175    
5176     if ({
5177     table => 1, tbody => 1, tfoot => 1,
5178     thead => 1, tr => 1,
5179 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5180 wakaba 1.1 # MUST
5181     my $foster_parent_element;
5182     my $next_sibling;
5183     my $prev_sibling;
5184 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
5185     if ($self->{open_elements}->[$_]->[1] eq 'table') {
5186     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5187 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
5188     $foster_parent_element = $parent;
5189 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
5190 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
5191     } else {
5192 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5193 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
5194     }
5195     last OE;
5196     }
5197     } # OE
5198 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5199 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
5200     unless defined $foster_parent_element;
5201     if (defined $prev_sibling and
5202     $prev_sibling->node_type == 3) {
5203     $prev_sibling->manakai_append_text ($token->{data});
5204     } else {
5205     $foster_parent_element->insert_before
5206     ($self->{document}->create_text_node ($token->{data}),
5207     $next_sibling);
5208     }
5209     } else {
5210 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5211 wakaba 1.1 }
5212    
5213     $token = $self->_get_next_token;
5214     redo B;
5215     } elsif ($token->{type} eq 'comment') {
5216     ## Copied from 'in table'
5217     my $comment = $self->{document}->create_comment ($token->{data});
5218 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5219 wakaba 1.1 $token = $self->_get_next_token;
5220     redo B;
5221     } elsif ($token->{type} eq 'start tag') {
5222     if ({
5223     tr => 1,
5224     th => 1, td => 1,
5225     }->{$token->{tag_name}}) {
5226 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
5227     $self->{parse_error}-> (type => 'missing start tag:tr');
5228     }
5229    
5230 wakaba 1.1 ## Clear back to table body context
5231     while (not {
5232     tbody => 1, tfoot => 1, thead => 1, html => 1,
5233 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5234     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5235     pop @{$self->{open_elements}};
5236 wakaba 1.1 }
5237    
5238 wakaba 1.3 $self->{insertion_mode} = 'in row';
5239 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
5240    
5241     {
5242     my $el;
5243    
5244     $el = $self->{document}->create_element_ns
5245     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5246    
5247     for my $attr_name (keys %{ $token->{attributes}}) {
5248     $el->set_attribute_ns (undef, [undef, $attr_name],
5249     $token->{attributes} ->{$attr_name}->{value});
5250     }
5251    
5252 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5253     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5254 wakaba 1.1 }
5255    
5256     $token = $self->_get_next_token;
5257     } else {
5258    
5259     {
5260     my $el;
5261    
5262     $el = $self->{document}->create_element_ns
5263     (q<http://www.w3.org/1999/xhtml>, [undef, 'tr']);
5264    
5265 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5266     push @{$self->{open_elements}}, [$el, 'tr'];
5267 wakaba 1.1 }
5268    
5269     ## reprocess
5270     }
5271     redo B;
5272     } elsif ({
5273     caption => 1, col => 1, colgroup => 1,
5274     tbody => 1, tfoot => 1, thead => 1,
5275     }->{$token->{tag_name}}) {
5276     ## have an element in table scope
5277     my $i;
5278 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5279     my $node = $self->{open_elements}->[$_];
5280 wakaba 1.1 if ({
5281     tbody => 1, thead => 1, tfoot => 1,
5282     }->{$node->[1]}) {
5283     $i = $_;
5284     last INSCOPE;
5285     } elsif ({
5286     table => 1, html => 1,
5287     }->{$node->[1]}) {
5288     last INSCOPE;
5289     }
5290     } # INSCOPE
5291     unless (defined $i) {
5292 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5293 wakaba 1.1 ## Ignore the token
5294     $token = $self->_get_next_token;
5295     redo B;
5296     }
5297    
5298     ## Clear back to table body context
5299     while (not {
5300     tbody => 1, tfoot => 1, thead => 1, html => 1,
5301 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5302     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5303     pop @{$self->{open_elements}};
5304 wakaba 1.1 }
5305    
5306     ## As if <{current node}>
5307     ## have an element in table scope
5308     ## true by definition
5309    
5310     ## Clear back to table body context
5311     ## nop by definition
5312    
5313 wakaba 1.3 pop @{$self->{open_elements}};
5314     $self->{insertion_mode} = 'in table';
5315 wakaba 1.1 ## reprocess
5316     redo B;
5317     } elsif ($token->{tag_name} eq 'table') {
5318     ## NOTE: This is a code clone of "table in table"
5319 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:table');
5320 wakaba 1.1
5321     ## As if </table>
5322     ## have a table element in table scope
5323     my $i;
5324 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5325     my $node = $self->{open_elements}->[$_];
5326 wakaba 1.1 if ($node->[1] eq 'table') {
5327     $i = $_;
5328     last INSCOPE;
5329     } elsif ({
5330     table => 1, html => 1,
5331     }->{$node->[1]}) {
5332     last INSCOPE;
5333     }
5334     } # INSCOPE
5335     unless (defined $i) {
5336 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
5337 wakaba 1.1 ## Ignore tokens </table><table>
5338     $token = $self->_get_next_token;
5339     redo B;
5340     }
5341    
5342     ## generate implied end tags
5343     if ({
5344     dd => 1, dt => 1, li => 1, p => 1,
5345     td => 1, th => 1, tr => 1,
5346 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5347 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
5348     $token = {type => 'end tag', tag_name => 'table'};
5349     unshift @{$self->{token}}, $token;
5350     $token = {type => 'end tag',
5351 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5352 wakaba 1.1 redo B;
5353     }
5354    
5355 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5356     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5357 wakaba 1.1 }
5358    
5359 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5360 wakaba 1.1
5361 wakaba 1.3 $self->_reset_insertion_mode;
5362 wakaba 1.1
5363     ## reprocess
5364     redo B;
5365     } else {
5366     #
5367     }
5368     } elsif ($token->{type} eq 'end tag') {
5369     if ({
5370     tbody => 1, tfoot => 1, thead => 1,
5371     }->{$token->{tag_name}}) {
5372     ## have an element in table scope
5373     my $i;
5374 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5375     my $node = $self->{open_elements}->[$_];
5376 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5377     $i = $_;
5378     last INSCOPE;
5379     } elsif ({
5380     table => 1, html => 1,
5381     }->{$node->[1]}) {
5382     last INSCOPE;
5383     }
5384     } # INSCOPE
5385     unless (defined $i) {
5386 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5387 wakaba 1.1 ## Ignore the token
5388     $token = $self->_get_next_token;
5389     redo B;
5390     }
5391    
5392     ## Clear back to table body context
5393     while (not {
5394     tbody => 1, tfoot => 1, thead => 1, html => 1,
5395 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5396     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5397     pop @{$self->{open_elements}};
5398 wakaba 1.1 }
5399    
5400 wakaba 1.3 pop @{$self->{open_elements}};
5401     $self->{insertion_mode} = 'in table';
5402 wakaba 1.1 $token = $self->_get_next_token;
5403     redo B;
5404     } elsif ($token->{tag_name} eq 'table') {
5405     ## have an element in table scope
5406     my $i;
5407 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5408     my $node = $self->{open_elements}->[$_];
5409 wakaba 1.1 if ({
5410     tbody => 1, thead => 1, tfoot => 1,
5411     }->{$node->[1]}) {
5412     $i = $_;
5413     last INSCOPE;
5414     } elsif ({
5415     table => 1, html => 1,
5416     }->{$node->[1]}) {
5417     last INSCOPE;
5418     }
5419     } # INSCOPE
5420     unless (defined $i) {
5421 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5422 wakaba 1.1 ## Ignore the token
5423     $token = $self->_get_next_token;
5424     redo B;
5425     }
5426    
5427     ## Clear back to table body context
5428     while (not {
5429     tbody => 1, tfoot => 1, thead => 1, html => 1,
5430 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5431     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5432     pop @{$self->{open_elements}};
5433 wakaba 1.1 }
5434    
5435     ## As if <{current node}>
5436     ## have an element in table scope
5437     ## true by definition
5438    
5439     ## Clear back to table body context
5440     ## nop by definition
5441    
5442 wakaba 1.3 pop @{$self->{open_elements}};
5443     $self->{insertion_mode} = 'in table';
5444 wakaba 1.1 ## reprocess
5445     redo B;
5446     } elsif ({
5447     body => 1, caption => 1, col => 1, colgroup => 1,
5448     html => 1, td => 1, th => 1, tr => 1,
5449     }->{$token->{tag_name}}) {
5450 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5451 wakaba 1.1 ## Ignore the token
5452     $token = $self->_get_next_token;
5453     redo B;
5454     } else {
5455     #
5456     }
5457     } else {
5458     #
5459     }
5460    
5461     ## As if in table
5462 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
5463 wakaba 1.1 $in_body->($insert_to_foster);
5464     redo B;
5465 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
5466 wakaba 1.1 if ($token->{type} eq 'character') {
5467     ## NOTE: This is a "character in table" code clone.
5468     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5469 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5470 wakaba 1.1
5471     unless (length $token->{data}) {
5472     $token = $self->_get_next_token;
5473     redo B;
5474     }
5475     }
5476    
5477 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
5478    
5479 wakaba 1.1 ## As if in body, but insert into foster parent element
5480     ## ISSUE: Spec says that "whenever a node would be inserted
5481     ## into the current node" while characters might not be
5482     ## result in a new Text node.
5483     $reconstruct_active_formatting_elements->($insert_to_foster);
5484    
5485     if ({
5486     table => 1, tbody => 1, tfoot => 1,
5487     thead => 1, tr => 1,
5488 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5489 wakaba 1.1 # MUST
5490     my $foster_parent_element;
5491     my $next_sibling;
5492     my $prev_sibling;
5493 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
5494     if ($self->{open_elements}->[$_]->[1] eq 'table') {
5495     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5496 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
5497     $foster_parent_element = $parent;
5498 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
5499 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
5500     } else {
5501 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5502 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
5503     }
5504     last OE;
5505     }
5506     } # OE
5507 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5508 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
5509     unless defined $foster_parent_element;
5510     if (defined $prev_sibling and
5511     $prev_sibling->node_type == 3) {
5512     $prev_sibling->manakai_append_text ($token->{data});
5513     } else {
5514     $foster_parent_element->insert_before
5515     ($self->{document}->create_text_node ($token->{data}),
5516     $next_sibling);
5517     }
5518     } else {
5519 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5520 wakaba 1.1 }
5521    
5522     $token = $self->_get_next_token;
5523     redo B;
5524     } elsif ($token->{type} eq 'comment') {
5525     ## Copied from 'in table'
5526     my $comment = $self->{document}->create_comment ($token->{data});
5527 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5528 wakaba 1.1 $token = $self->_get_next_token;
5529     redo B;
5530     } elsif ($token->{type} eq 'start tag') {
5531     if ($token->{tag_name} eq 'th' or
5532     $token->{tag_name} eq 'td') {
5533     ## Clear back to table row context
5534     while (not {
5535     tr => 1, html => 1,
5536 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5537     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5538     pop @{$self->{open_elements}};
5539 wakaba 1.1 }
5540    
5541    
5542     {
5543     my $el;
5544    
5545     $el = $self->{document}->create_element_ns
5546     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5547    
5548     for my $attr_name (keys %{ $token->{attributes}}) {
5549     $el->set_attribute_ns (undef, [undef, $attr_name],
5550     $token->{attributes} ->{$attr_name}->{value});
5551     }
5552    
5553 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5554     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5555 wakaba 1.1 }
5556    
5557 wakaba 1.3 $self->{insertion_mode} = 'in cell';
5558 wakaba 1.1
5559     push @$active_formatting_elements, ['#marker', ''];
5560    
5561     $token = $self->_get_next_token;
5562     redo B;
5563     } elsif ({
5564     caption => 1, col => 1, colgroup => 1,
5565     tbody => 1, tfoot => 1, thead => 1, tr => 1,
5566     }->{$token->{tag_name}}) {
5567     ## As if </tr>
5568     ## have an element in table scope
5569     my $i;
5570 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5571     my $node = $self->{open_elements}->[$_];
5572 wakaba 1.1 if ($node->[1] eq 'tr') {
5573     $i = $_;
5574     last INSCOPE;
5575     } elsif ({
5576     table => 1, html => 1,
5577     }->{$node->[1]}) {
5578     last INSCOPE;
5579     }
5580     } # INSCOPE
5581     unless (defined $i) {
5582 wakaba 1.3 $self->{parse_error}-> (type => 'unmacthed end tag:'.$token->{tag_name});
5583 wakaba 1.1 ## Ignore the token
5584     $token = $self->_get_next_token;
5585     redo B;
5586     }
5587    
5588     ## Clear back to table row context
5589     while (not {
5590     tr => 1, html => 1,
5591 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5592     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5593     pop @{$self->{open_elements}};
5594 wakaba 1.1 }
5595    
5596 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5597     $self->{insertion_mode} = 'in table body';
5598 wakaba 1.1 ## reprocess
5599     redo B;
5600     } elsif ($token->{tag_name} eq 'table') {
5601     ## NOTE: This is a code clone of "table in table"
5602 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:table');
5603 wakaba 1.1
5604     ## As if </table>
5605     ## have a table element in table scope
5606     my $i;
5607 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5608     my $node = $self->{open_elements}->[$_];
5609 wakaba 1.1 if ($node->[1] eq 'table') {
5610     $i = $_;
5611     last INSCOPE;
5612     } elsif ({
5613     table => 1, html => 1,
5614     }->{$node->[1]}) {
5615     last INSCOPE;
5616     }
5617     } # INSCOPE
5618     unless (defined $i) {
5619 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
5620 wakaba 1.1 ## Ignore tokens </table><table>
5621     $token = $self->_get_next_token;
5622     redo B;
5623     }
5624    
5625     ## generate implied end tags
5626     if ({
5627     dd => 1, dt => 1, li => 1, p => 1,
5628     td => 1, th => 1, tr => 1,
5629 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5630 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
5631     $token = {type => 'end tag', tag_name => 'table'};
5632     unshift @{$self->{token}}, $token;
5633     $token = {type => 'end tag',
5634 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5635 wakaba 1.1 redo B;
5636     }
5637    
5638 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5639     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5640 wakaba 1.1 }
5641    
5642 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5643 wakaba 1.1
5644 wakaba 1.3 $self->_reset_insertion_mode;
5645 wakaba 1.1
5646     ## reprocess
5647     redo B;
5648     } else {
5649     #
5650     }
5651     } elsif ($token->{type} eq 'end tag') {
5652     if ($token->{tag_name} eq 'tr') {
5653     ## have an element in table scope
5654     my $i;
5655 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5656     my $node = $self->{open_elements}->[$_];
5657 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5658     $i = $_;
5659     last INSCOPE;
5660     } elsif ({
5661     table => 1, html => 1,
5662     }->{$node->[1]}) {
5663     last INSCOPE;
5664     }
5665     } # INSCOPE
5666     unless (defined $i) {
5667 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5668 wakaba 1.1 ## Ignore the token
5669     $token = $self->_get_next_token;
5670     redo B;
5671     }
5672    
5673     ## Clear back to table row context
5674     while (not {
5675     tr => 1, html => 1,
5676 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5677     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5678     pop @{$self->{open_elements}};
5679 wakaba 1.1 }
5680    
5681 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5682     $self->{insertion_mode} = 'in table body';
5683 wakaba 1.1 $token = $self->_get_next_token;
5684     redo B;
5685     } elsif ($token->{tag_name} eq 'table') {
5686     ## As if </tr>
5687     ## have an element in table scope
5688     my $i;
5689 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5690     my $node = $self->{open_elements}->[$_];
5691 wakaba 1.1 if ($node->[1] eq 'tr') {
5692     $i = $_;
5693     last INSCOPE;
5694     } elsif ({
5695     table => 1, html => 1,
5696     }->{$node->[1]}) {
5697     last INSCOPE;
5698     }
5699     } # INSCOPE
5700     unless (defined $i) {
5701 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{type});
5702 wakaba 1.1 ## Ignore the token
5703     $token = $self->_get_next_token;
5704     redo B;
5705     }
5706    
5707     ## Clear back to table row context
5708     while (not {
5709     tr => 1, html => 1,
5710 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5711     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5712     pop @{$self->{open_elements}};
5713 wakaba 1.1 }
5714    
5715 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5716     $self->{insertion_mode} = 'in table body';
5717 wakaba 1.1 ## reprocess
5718     redo B;
5719     } elsif ({
5720     tbody => 1, tfoot => 1, thead => 1,
5721     }->{$token->{tag_name}}) {
5722     ## have an element in table scope
5723     my $i;
5724 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5725     my $node = $self->{open_elements}->[$_];
5726 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5727     $i = $_;
5728     last INSCOPE;
5729     } elsif ({
5730     table => 1, html => 1,
5731     }->{$node->[1]}) {
5732     last INSCOPE;
5733     }
5734     } # INSCOPE
5735     unless (defined $i) {
5736 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5737 wakaba 1.1 ## Ignore the token
5738     $token = $self->_get_next_token;
5739     redo B;
5740     }
5741    
5742     ## As if </tr>
5743     ## have an element in table scope
5744     my $i;
5745 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5746     my $node = $self->{open_elements}->[$_];
5747 wakaba 1.1 if ($node->[1] eq 'tr') {
5748     $i = $_;
5749     last INSCOPE;
5750     } elsif ({
5751     table => 1, html => 1,
5752     }->{$node->[1]}) {
5753     last INSCOPE;
5754     }
5755     } # INSCOPE
5756     unless (defined $i) {
5757 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:tr');
5758 wakaba 1.1 ## Ignore the token
5759     $token = $self->_get_next_token;
5760     redo B;
5761     }
5762    
5763     ## Clear back to table row context
5764     while (not {
5765     tr => 1, html => 1,
5766 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5767     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5768     pop @{$self->{open_elements}};
5769 wakaba 1.1 }
5770    
5771 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5772     $self->{insertion_mode} = 'in table body';
5773 wakaba 1.1 ## reprocess
5774     redo B;
5775     } elsif ({
5776     body => 1, caption => 1, col => 1,
5777     colgroup => 1, html => 1, td => 1, th => 1,
5778     }->{$token->{tag_name}}) {
5779 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5780 wakaba 1.1 ## Ignore the token
5781     $token = $self->_get_next_token;
5782     redo B;
5783     } else {
5784     #
5785     }
5786     } else {
5787     #
5788     }
5789    
5790     ## As if in table
5791 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
5792 wakaba 1.1 $in_body->($insert_to_foster);
5793     redo B;
5794 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
5795 wakaba 1.1 if ($token->{type} eq 'character') {
5796     ## NOTE: This is a code clone of "character in body".
5797     $reconstruct_active_formatting_elements->($insert_to_current);
5798    
5799 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5800 wakaba 1.1
5801     $token = $self->_get_next_token;
5802     redo B;
5803     } elsif ($token->{type} eq 'comment') {
5804     ## NOTE: This is a code clone of "comment in body".
5805     my $comment = $self->{document}->create_comment ($token->{data});
5806 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5807 wakaba 1.1 $token = $self->_get_next_token;
5808     redo B;
5809     } elsif ($token->{type} eq 'start tag') {
5810     if ({
5811     caption => 1, col => 1, colgroup => 1,
5812     tbody => 1, td => 1, tfoot => 1, th => 1,
5813     thead => 1, tr => 1,
5814     }->{$token->{tag_name}}) {
5815     ## have an element in table scope
5816     my $tn;
5817 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5818     my $node = $self->{open_elements}->[$_];
5819 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
5820     $tn = $node->[1];
5821     last INSCOPE;
5822     } elsif ({
5823     table => 1, html => 1,
5824     }->{$node->[1]}) {
5825     last INSCOPE;
5826     }
5827     } # INSCOPE
5828     unless (defined $tn) {
5829 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5830 wakaba 1.1 ## Ignore the token
5831     $token = $self->_get_next_token;
5832     redo B;
5833     }
5834    
5835     ## Close the cell
5836     unshift @{$self->{token}}, $token; # <?>
5837     $token = {type => 'end tag', tag_name => $tn};
5838     redo B;
5839     } else {
5840     #
5841     }
5842     } elsif ($token->{type} eq 'end tag') {
5843     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
5844     ## have an element in table scope
5845     my $i;
5846 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5847     my $node = $self->{open_elements}->[$_];
5848 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5849     $i = $_;
5850     last INSCOPE;
5851     } elsif ({
5852     table => 1, html => 1,
5853     }->{$node->[1]}) {
5854     last INSCOPE;
5855     }
5856     } # INSCOPE
5857     unless (defined $i) {
5858 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5859 wakaba 1.1 ## Ignore the token
5860     $token = $self->_get_next_token;
5861     redo B;
5862     }
5863    
5864     ## generate implied end tags
5865     if ({
5866     dd => 1, dt => 1, li => 1, p => 1,
5867     td => ($token->{tag_name} eq 'th'),
5868     th => ($token->{tag_name} eq 'td'),
5869     tr => 1,
5870 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5871 wakaba 1.1 unshift @{$self->{token}}, $token;
5872     $token = {type => 'end tag',
5873 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5874 wakaba 1.1 redo B;
5875     }
5876    
5877 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
5878     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5879 wakaba 1.1 }
5880    
5881 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5882 wakaba 1.1
5883     $clear_up_to_marker->();
5884    
5885 wakaba 1.3 $self->{insertion_mode} = 'in row';
5886 wakaba 1.1
5887     $token = $self->_get_next_token;
5888     redo B;
5889     } elsif ({
5890     body => 1, caption => 1, col => 1,
5891     colgroup => 1, html => 1,
5892     }->{$token->{tag_name}}) {
5893 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5894 wakaba 1.1 ## Ignore the token
5895     $token = $self->_get_next_token;
5896     redo B;
5897     } elsif ({
5898     table => 1, tbody => 1, tfoot => 1,
5899     thead => 1, tr => 1,
5900     }->{$token->{tag_name}}) {
5901     ## have an element in table scope
5902     my $i;
5903     my $tn;
5904 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5905     my $node = $self->{open_elements}->[$_];
5906 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5907     $i = $_;
5908     last INSCOPE;
5909     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
5910     $tn = $node->[1];
5911     ## NOTE: There is exactly one |td| or |th| element
5912     ## in scope in the stack of open elements by definition.
5913     } elsif ({
5914     table => 1, html => 1,
5915     }->{$node->[1]}) {
5916     last INSCOPE;
5917     }
5918     } # INSCOPE
5919     unless (defined $i) {
5920 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5921 wakaba 1.1 ## Ignore the token
5922     $token = $self->_get_next_token;
5923     redo B;
5924     }
5925    
5926     ## Close the cell
5927     unshift @{$self->{token}}, $token; # </?>
5928     $token = {type => 'end tag', tag_name => $tn};
5929     redo B;
5930     } else {
5931     #
5932     }
5933     } else {
5934     #
5935     }
5936    
5937     $in_body->($insert_to_current);
5938     redo B;
5939 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
5940 wakaba 1.1 if ($token->{type} eq 'character') {
5941 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5942 wakaba 1.1 $token = $self->_get_next_token;
5943     redo B;
5944     } elsif ($token->{type} eq 'comment') {
5945     my $comment = $self->{document}->create_comment ($token->{data});
5946 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5947 wakaba 1.1 $token = $self->_get_next_token;
5948     redo B;
5949     } elsif ($token->{type} eq 'start tag') {
5950     if ($token->{tag_name} eq 'option') {
5951 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
5952 wakaba 1.1 ## As if </option>
5953 wakaba 1.3 pop @{$self->{open_elements}};
5954 wakaba 1.1 }
5955    
5956    
5957     {
5958     my $el;
5959    
5960     $el = $self->{document}->create_element_ns
5961     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5962    
5963     for my $attr_name (keys %{ $token->{attributes}}) {
5964     $el->set_attribute_ns (undef, [undef, $attr_name],
5965     $token->{attributes} ->{$attr_name}->{value});
5966     }
5967    
5968 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5969     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5970 wakaba 1.1 }
5971    
5972     $token = $self->_get_next_token;
5973     redo B;
5974     } elsif ($token->{tag_name} eq 'optgroup') {
5975 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
5976 wakaba 1.1 ## As if </option>
5977 wakaba 1.3 pop @{$self->{open_elements}};
5978 wakaba 1.1 }
5979    
5980 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
5981 wakaba 1.1 ## As if </optgroup>
5982 wakaba 1.3 pop @{$self->{open_elements}};
5983 wakaba 1.1 }
5984    
5985    
5986     {
5987     my $el;
5988    
5989     $el = $self->{document}->create_element_ns
5990     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5991    
5992     for my $attr_name (keys %{ $token->{attributes}}) {
5993     $el->set_attribute_ns (undef, [undef, $attr_name],
5994     $token->{attributes} ->{$attr_name}->{value});
5995     }
5996    
5997 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5998     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5999 wakaba 1.1 }
6000    
6001     $token = $self->_get_next_token;
6002     redo B;
6003     } elsif ($token->{tag_name} eq 'select') {
6004 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:select');
6005 wakaba 1.1 ## As if </select> instead
6006     ## have an element in table scope
6007     my $i;
6008 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6009     my $node = $self->{open_elements}->[$_];
6010 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6011     $i = $_;
6012     last INSCOPE;
6013     } elsif ({
6014     table => 1, html => 1,
6015     }->{$node->[1]}) {
6016     last INSCOPE;
6017     }
6018     } # INSCOPE
6019     unless (defined $i) {
6020 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:select');
6021 wakaba 1.1 ## Ignore the token
6022     $token = $self->_get_next_token;
6023     redo B;
6024     }
6025    
6026 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6027 wakaba 1.1
6028 wakaba 1.3 $self->_reset_insertion_mode;
6029 wakaba 1.1
6030     $token = $self->_get_next_token;
6031     redo B;
6032     } else {
6033     #
6034     }
6035     } elsif ($token->{type} eq 'end tag') {
6036     if ($token->{tag_name} eq 'optgroup') {
6037 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
6038     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
6039 wakaba 1.1 ## As if </option>
6040 wakaba 1.3 splice @{$self->{open_elements}}, -2;
6041     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
6042     pop @{$self->{open_elements}};
6043 wakaba 1.1 } else {
6044 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6045 wakaba 1.1 ## Ignore the token
6046     }
6047     $token = $self->_get_next_token;
6048     redo B;
6049     } elsif ($token->{tag_name} eq 'option') {
6050 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6051     pop @{$self->{open_elements}};
6052 wakaba 1.1 } else {
6053 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6054 wakaba 1.1 ## Ignore the token
6055     }
6056     $token = $self->_get_next_token;
6057     redo B;
6058     } elsif ($token->{tag_name} eq 'select') {
6059     ## have an element in table scope
6060     my $i;
6061 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6062     my $node = $self->{open_elements}->[$_];
6063 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6064     $i = $_;
6065     last INSCOPE;
6066     } elsif ({
6067     table => 1, html => 1,
6068     }->{$node->[1]}) {
6069     last INSCOPE;
6070     }
6071     } # INSCOPE
6072     unless (defined $i) {
6073 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6074 wakaba 1.1 ## Ignore the token
6075     $token = $self->_get_next_token;
6076     redo B;
6077     }
6078    
6079 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6080 wakaba 1.1
6081 wakaba 1.3 $self->_reset_insertion_mode;
6082 wakaba 1.1
6083     $token = $self->_get_next_token;
6084     redo B;
6085     } elsif ({
6086     caption => 1, table => 1, tbody => 1,
6087     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6088     }->{$token->{tag_name}}) {
6089 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6090 wakaba 1.1
6091     ## have an element in table scope
6092     my $i;
6093 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6094     my $node = $self->{open_elements}->[$_];
6095 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6096     $i = $_;
6097     last INSCOPE;
6098     } elsif ({
6099     table => 1, html => 1,
6100     }->{$node->[1]}) {
6101     last INSCOPE;
6102     }
6103     } # INSCOPE
6104     unless (defined $i) {
6105     ## Ignore the token
6106     $token = $self->_get_next_token;
6107     redo B;
6108     }
6109    
6110     ## As if </select>
6111     ## have an element in table scope
6112     undef $i;
6113 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6114     my $node = $self->{open_elements}->[$_];
6115 wakaba 1.1 if ($node->[1] eq 'select') {
6116     $i = $_;
6117     last INSCOPE;
6118     } elsif ({
6119     table => 1, html => 1,
6120     }->{$node->[1]}) {
6121     last INSCOPE;
6122     }
6123     } # INSCOPE
6124     unless (defined $i) {
6125 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:select');
6126 wakaba 1.1 ## Ignore the </select> token
6127     $token = $self->_get_next_token; ## TODO: ok?
6128     redo B;
6129     }
6130    
6131 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6132 wakaba 1.1
6133 wakaba 1.3 $self->_reset_insertion_mode;
6134 wakaba 1.1
6135     ## reprocess
6136     redo B;
6137     } else {
6138     #
6139     }
6140     } else {
6141     #
6142     }
6143    
6144 wakaba 1.3 $self->{parse_error}-> (type => 'in select:'.$token->{tag_name});
6145 wakaba 1.1 ## Ignore the token
6146     $token = $self->_get_next_token;
6147     redo B;
6148 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
6149 wakaba 1.1 if ($token->{type} eq 'character') {
6150     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6151     ## As if in body
6152     $reconstruct_active_formatting_elements->($insert_to_current);
6153    
6154 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6155 wakaba 1.1
6156     unless (length $token->{data}) {
6157     $token = $self->_get_next_token;
6158     redo B;
6159     }
6160     }
6161    
6162     #
6163 wakaba 1.3 $self->{parse_error}-> (type => 'after body:#'.$token->{type});
6164 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
6165     my $comment = $self->{document}->create_comment ($token->{data});
6166 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
6167 wakaba 1.1 $token = $self->_get_next_token;
6168     redo B;
6169 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
6170     $self->{parse_error}-> (type => 'after body:'.$token->{tag_name});
6171     #
6172 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
6173     if ($token->{tag_name} eq 'html') {
6174 wakaba 1.3 if (defined $self->{inner_html_node}) {
6175     $self->{parse_error}-> (type => 'unmatched end tag:html');
6176     ## Ignore the token
6177     $token = $self->_get_next_token;
6178     redo B;
6179     } else {
6180     $phase = 'trailing end';
6181     $token = $self->_get_next_token;
6182     redo B;
6183     }
6184 wakaba 1.1 } else {
6185 wakaba 1.3 $self->{parse_error}-> (type => 'after body:/'.$token->{tag_name});
6186 wakaba 1.1 }
6187     } else {
6188 wakaba 1.3 $self->{parse_error}-> (type => 'after body:#'.$token->{type});
6189 wakaba 1.1 }
6190    
6191 wakaba 1.3 $self->{insertion_mode} = 'in body';
6192 wakaba 1.1 ## reprocess
6193     redo B;
6194 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
6195 wakaba 1.1 if ($token->{type} eq 'character') {
6196     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6197 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6198 wakaba 1.1
6199     unless (length $token->{data}) {
6200     $token = $self->_get_next_token;
6201     redo B;
6202     }
6203     }
6204    
6205     #
6206     } elsif ($token->{type} eq 'comment') {
6207     my $comment = $self->{document}->create_comment ($token->{data});
6208 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
6209 wakaba 1.1 $token = $self->_get_next_token;
6210     redo B;
6211     } elsif ($token->{type} eq 'start tag') {
6212     if ($token->{tag_name} eq 'frameset') {
6213    
6214     {
6215     my $el;
6216    
6217     $el = $self->{document}->create_element_ns
6218     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6219    
6220     for my $attr_name (keys %{ $token->{attributes}}) {
6221     $el->set_attribute_ns (undef, [undef, $attr_name],
6222     $token->{attributes} ->{$attr_name}->{value});
6223     }
6224    
6225 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6226     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6227 wakaba 1.1 }
6228    
6229     $token = $self->_get_next_token;
6230     redo B;
6231     } elsif ($token->{tag_name} eq 'frame') {
6232    
6233     {
6234     my $el;
6235    
6236     $el = $self->{document}->create_element_ns
6237     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6238    
6239     for my $attr_name (keys %{ $token->{attributes}}) {
6240     $el->set_attribute_ns (undef, [undef, $attr_name],
6241     $token->{attributes} ->{$attr_name}->{value});
6242     }
6243    
6244 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6245     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6246 wakaba 1.1 }
6247    
6248 wakaba 1.3 pop @{$self->{open_elements}};
6249 wakaba 1.1 $token = $self->_get_next_token;
6250     redo B;
6251     } elsif ($token->{tag_name} eq 'noframes') {
6252     $in_body->($insert_to_current);
6253     redo B;
6254     } else {
6255     #
6256     }
6257     } elsif ($token->{type} eq 'end tag') {
6258     if ($token->{tag_name} eq 'frameset') {
6259 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
6260     @{$self->{open_elements}} == 1) {
6261     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6262 wakaba 1.1 ## Ignore the token
6263     $token = $self->_get_next_token;
6264     } else {
6265 wakaba 1.3 pop @{$self->{open_elements}};
6266 wakaba 1.1 $token = $self->_get_next_token;
6267     }
6268    
6269     ## if not inner_html and
6270 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
6271     $self->{insertion_mode} = 'after frameset';
6272 wakaba 1.1 }
6273     redo B;
6274     } else {
6275     #
6276     }
6277     } else {
6278     #
6279     }
6280    
6281 wakaba 1.3 if (defined $token->{tag_name}) {
6282     $self->{parse_error}-> (type => 'in frameset:'.$token->{tag_name});
6283     } else {
6284     $self->{parse_error}-> (type => 'in frameset:#'.$token->{type});
6285     }
6286 wakaba 1.1 ## Ignore the token
6287     $token = $self->_get_next_token;
6288     redo B;
6289 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
6290 wakaba 1.1 if ($token->{type} eq 'character') {
6291     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6292 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6293 wakaba 1.1
6294     unless (length $token->{data}) {
6295     $token = $self->_get_next_token;
6296     redo B;
6297     }
6298     }
6299    
6300     #
6301     } elsif ($token->{type} eq 'comment') {
6302     my $comment = $self->{document}->create_comment ($token->{data});
6303 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
6304 wakaba 1.1 $token = $self->_get_next_token;
6305     redo B;
6306     } elsif ($token->{type} eq 'start tag') {
6307     if ($token->{tag_name} eq 'noframes') {
6308     $in_body->($insert_to_current);
6309     redo B;
6310     } else {
6311     #
6312     }
6313     } elsif ($token->{type} eq 'end tag') {
6314     if ($token->{tag_name} eq 'html') {
6315     $phase = 'trailing end';
6316     $token = $self->_get_next_token;
6317     redo B;
6318     } else {
6319     #
6320     }
6321     } else {
6322     #
6323     }
6324    
6325 wakaba 1.3 if (defined $token->{tag_name}) {
6326     $self->{parse_error}-> (type => 'after frameset:'.$token->{tag_name});
6327     } else {
6328     $self->{parse_error}-> (type => 'after frameset:#'.$token->{type});
6329     }
6330 wakaba 1.1 ## Ignore the token
6331     $token = $self->_get_next_token;
6332     redo B;
6333    
6334     ## ISSUE: An issue in spec there
6335     } else {
6336 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
6337 wakaba 1.1 }
6338     }
6339     } elsif ($phase eq 'trailing end') {
6340     ## states in the main stage is preserved yet # MUST
6341    
6342     if ($token->{type} eq 'DOCTYPE') {
6343 wakaba 1.3 $self->{parse_error}-> (type => 'after html:#DOCTYPE');
6344 wakaba 1.1 ## Ignore the token
6345     $token = $self->_get_next_token;
6346     redo B;
6347     } elsif ($token->{type} eq 'comment') {
6348     my $comment = $self->{document}->create_comment ($token->{data});
6349     $self->{document}->append_child ($comment);
6350     $token = $self->_get_next_token;
6351     redo B;
6352     } elsif ($token->{type} eq 'character') {
6353     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6354     my $data = $1;
6355     ## As if in the main phase.
6356     ## NOTE: The insertion mode in the main phase
6357     ## just before the phase has been changed to the trailing
6358     ## end phase is either "after body" or "after frameset".
6359     $reconstruct_active_formatting_elements->($insert_to_current)
6360     if $phase eq 'main';
6361    
6362 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
6363 wakaba 1.1
6364     unless (length $token->{data}) {
6365     $token = $self->_get_next_token;
6366     redo B;
6367     }
6368     }
6369    
6370 wakaba 1.3 $self->{parse_error}-> (type => 'after html:#character');
6371 wakaba 1.1 $phase = 'main';
6372     ## reprocess
6373     redo B;
6374     } elsif ($token->{type} eq 'start tag' or
6375     $token->{type} eq 'end tag') {
6376 wakaba 1.3 $self->{parse_error}-> (type => 'after html:'.$token->{tag_name});
6377 wakaba 1.1 $phase = 'main';
6378     ## reprocess
6379     redo B;
6380     } elsif ($token->{type} eq 'end-of-file') {
6381     ## Stop parsing
6382     last B;
6383     } else {
6384     die "$0: $token->{type}: Unknown token";
6385     }
6386     }
6387     } # B
6388    
6389     ## Stop parsing # MUST
6390    
6391     ## TODO: script stuffs
6392 wakaba 1.3 } # _tree_construct_main
6393    
6394     sub set_inner_html ($$$) {
6395     my $class = shift;
6396     my $node = shift;
6397     my $s = \$_[0];
6398     my $onerror = $_[1];
6399    
6400     my $nt = $node->node_type;
6401     if ($nt == 9) {
6402     # MUST
6403    
6404     ## Step 1 # MUST
6405     ## TODO: If the document has an active parser, ...
6406     ## ISSUE: There is an issue in the spec.
6407    
6408     ## Step 2 # MUST
6409     my @cn = @{$node->child_nodes};
6410     for (@cn) {
6411     $node->remove_child ($_);
6412     }
6413    
6414     ## Step 3, 4, 5 # MUST
6415     $class->parse_string ($$s => $node, $onerror);
6416     } elsif ($nt == 1) {
6417     ## TODO: If non-html element
6418    
6419     ## NOTE: Most of this code is copied from |parse_string|
6420    
6421     ## Step 1 # MUST
6422 wakaba 1.14 my $this_doc = $node->owner_document;
6423     my $doc = $this_doc->implementation->create_document;
6424 wakaba 1.18 $doc->manakai_is_html (1);
6425 wakaba 1.3 my $p = $class->new;
6426     $p->{document} = $doc;
6427    
6428     ## Step 9 # MUST
6429     my $i = 0;
6430     my $line = 1;
6431     my $column = 0;
6432     $p->{set_next_input_character} = sub {
6433     my $self = shift;
6434 wakaba 1.14
6435     pop @{$self->{prev_input_character}};
6436     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
6437    
6438 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
6439     $self->{next_input_character} = ord substr $$s, $i++, 1;
6440     $column++;
6441 wakaba 1.4
6442     if ($self->{next_input_character} == 0x000A) { # LF
6443     $line++;
6444     $column = 0;
6445     } elsif ($self->{next_input_character} == 0x000D) { # CR
6446 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6447 wakaba 1.3 $self->{next_input_character} = 0x000A; # LF # MUST
6448     $line++;
6449 wakaba 1.4 $column = 0;
6450 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
6451     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6452     } elsif ($self->{next_input_character} == 0x0000) { # NULL
6453 wakaba 1.14 $self->{parse_error}-> (type => 'NULL');
6454 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6455     }
6456     };
6457 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
6458     $p->{next_input_character} = -1;
6459 wakaba 1.3
6460     my $ponerror = $onerror || sub {
6461     my (%opt) = @_;
6462     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
6463     };
6464     $p->{parse_error} = sub {
6465     $ponerror->(@_, line => $line, column => $column);
6466     };
6467    
6468     $p->_initialize_tokenizer;
6469     $p->_initialize_tree_constructor;
6470    
6471     ## Step 2
6472     my $node_ln = $node->local_name;
6473     $p->{content_model_flag} = {
6474     title => 'RCDATA',
6475     textarea => 'RCDATA',
6476     style => 'CDATA',
6477     script => 'CDATA',
6478     xmp => 'CDATA',
6479     iframe => 'CDATA',
6480     noembed => 'CDATA',
6481     noframes => 'CDATA',
6482     noscript => 'CDATA',
6483     plaintext => 'PLAINTEXT',
6484     }->{$node_ln} || 'PCDATA';
6485     ## ISSUE: What is "the name of the element"? local name?
6486    
6487     $p->{inner_html_node} = [$node, $node_ln];
6488    
6489     ## Step 4
6490     my $root = $doc->create_element_ns
6491     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6492    
6493     ## Step 5 # MUST
6494     $doc->append_child ($root);
6495    
6496     ## Step 6 # MUST
6497     push @{$p->{open_elements}}, [$root, 'html'];
6498    
6499     undef $p->{head_element};
6500    
6501     ## Step 7 # MUST
6502     $p->_reset_insertion_mode;
6503    
6504     ## Step 8 # MUST
6505     my $anode = $node;
6506     AN: while (defined $anode) {
6507     if ($anode->node_type == 1) {
6508     my $nsuri = $anode->namespace_uri;
6509     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6510     if ($anode->local_name eq 'form') { ## TODO: case?
6511     $p->{form_element} = $anode;
6512     last AN;
6513     }
6514     }
6515     }
6516     $anode = $anode->parent_node;
6517     } # AN
6518    
6519     ## Step 3 # MUST
6520     ## Step 10 # MUST
6521     {
6522     my $self = $p;
6523     $token = $self->_get_next_token;
6524     }
6525     $p->_tree_construction_main;
6526    
6527     ## Step 11 # MUST
6528     my @cn = @{$node->child_nodes};
6529     for (@cn) {
6530     $node->remove_child ($_);
6531     }
6532     ## ISSUE: mutation events? read-only?
6533    
6534     ## Step 12 # MUST
6535     @cn = @{$root->child_nodes};
6536     for (@cn) {
6537 wakaba 1.14 $this_doc->adopt_node ($_);
6538 wakaba 1.3 $node->append_child ($_);
6539     }
6540 wakaba 1.14 ## ISSUE: mutation events?
6541 wakaba 1.3
6542     $p->_terminate_tree_constructor;
6543     } else {
6544     die "$0: |set_inner_html| is not defined for node of type $nt";
6545     }
6546     } # set_inner_html
6547    
6548     } # tree construction stage
6549 wakaba 1.1
6550     sub get_inner_html ($$$) {
6551 wakaba 1.3 my (undef, $node, $on_error) = @_;
6552 wakaba 1.1
6553     ## Step 1
6554     my $s = '';
6555    
6556     my $in_cdata;
6557     my $parent = $node;
6558     while (defined $parent) {
6559     if ($parent->node_type == 1 and
6560     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
6561     {
6562     style => 1, script => 1, xmp => 1, iframe => 1,
6563     noembed => 1, noframes => 1, noscript => 1,
6564     }->{$parent->local_name}) { ## TODO: case thingy
6565     $in_cdata = 1;
6566     }
6567     $parent = $parent->parent_node;
6568     }
6569    
6570     ## Step 2
6571     my @node = @{$node->child_nodes};
6572     C: while (@node) {
6573     my $child = shift @node;
6574     unless (ref $child) {
6575     if ($child eq 'cdata-out') {
6576     $in_cdata = 0;
6577     } else {
6578     $s .= $child; # end tag
6579     }
6580     next C;
6581     }
6582    
6583     my $nt = $child->node_type;
6584     if ($nt == 1) { # Element
6585     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
6586     $s .= '<' . $tag_name;
6587    
6588     ## ISSUE: Non-html elements
6589    
6590     my @attrs = @{$child->attributes}; # sort order MUST be stable
6591     for my $attr (@attrs) { # order is implementation dependent
6592     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
6593     $s .= ' ' . $attr_name . '="';
6594     my $attr_value = $attr->value;
6595     ## escape
6596     $attr_value =~ s/&/&amp;/g;
6597     $attr_value =~ s/</&lt;/g;
6598     $attr_value =~ s/>/&gt;/g;
6599     $attr_value =~ s/"/&quot;/g;
6600     $s .= $attr_value . '"';
6601     }
6602     $s .= '>';
6603    
6604     next C if {
6605     area => 1, base => 1, basefont => 1, bgsound => 1,
6606     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
6607     img => 1, input => 1, link => 1, meta => 1, param => 1,
6608     spacer => 1, wbr => 1,
6609     }->{$tag_name};
6610    
6611     if (not $in_cdata and {
6612     style => 1, script => 1, xmp => 1, iframe => 1,
6613     noembed => 1, noframes => 1, noscript => 1,
6614     }->{$tag_name}) {
6615     unshift @node, 'cdata-out';
6616     $in_cdata = 1;
6617     }
6618    
6619     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
6620     } elsif ($nt == 3 or $nt == 4) {
6621     if ($in_cdata) {
6622     $s .= $child->data;
6623     } else {
6624     my $value = $child->data;
6625     $value =~ s/&/&amp;/g;
6626     $value =~ s/</&lt;/g;
6627     $value =~ s/>/&gt;/g;
6628     $value =~ s/"/&quot;/g;
6629     $s .= $value;
6630     }
6631     } elsif ($nt == 8) {
6632     $s .= '<!--' . $child->data . '-->';
6633     } elsif ($nt == 10) {
6634     $s .= '<!DOCTYPE ' . $child->name . '>';
6635     } elsif ($nt == 5) { # entrefs
6636     push @node, @{$child->child_nodes};
6637     } else {
6638     $on_error->($child) if defined $on_error;
6639     }
6640     ## ISSUE: This code does not support PIs.
6641     } # C
6642    
6643     ## Step 3
6644     return \$s;
6645     } # get_inner_html
6646    
6647     1;
6648 wakaba 1.22 # $Date: 2007/06/23 14:34:39 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24