/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.21 - (hide annotations) (download)
Sat Jun 23 14:34:39 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.20: +6 -5 lines
++ whatpm/Whatpm/ChangeLog	23 Jun 2007 14:34:34 -0000
	* HTML.pm.src: HTML5 revision 918 (</head>, </body>, </html>).

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.21 our $VERSION=do{my @r=(q$Revision: 1.20 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 ## ISSUE:
6     ## var doc = implementation.createDocument (null, null, null);
7     ## doc.write ('');
8     ## alert (doc.compatMode);
9 wakaba 1.1
10     my $permitted_slash_tag_name = {
11     base => 1,
12     link => 1,
13     meta => 1,
14     hr => 1,
15     br => 1,
16     img=> 1,
17     embed => 1,
18     param => 1,
19     area => 1,
20     col => 1,
21     input => 1,
22     };
23    
24 wakaba 1.4 my $c1_entity_char = {
25 wakaba 1.9 0x80 => 0x20AC,
26     0x81 => 0xFFFD,
27     0x82 => 0x201A,
28     0x83 => 0x0192,
29     0x84 => 0x201E,
30     0x85 => 0x2026,
31     0x86 => 0x2020,
32     0x87 => 0x2021,
33     0x88 => 0x02C6,
34     0x89 => 0x2030,
35     0x8A => 0x0160,
36     0x8B => 0x2039,
37     0x8C => 0x0152,
38     0x8D => 0xFFFD,
39     0x8E => 0x017D,
40     0x8F => 0xFFFD,
41     0x90 => 0xFFFD,
42     0x91 => 0x2018,
43     0x92 => 0x2019,
44     0x93 => 0x201C,
45     0x94 => 0x201D,
46     0x95 => 0x2022,
47     0x96 => 0x2013,
48     0x97 => 0x2014,
49     0x98 => 0x02DC,
50     0x99 => 0x2122,
51     0x9A => 0x0161,
52     0x9B => 0x203A,
53     0x9C => 0x0153,
54     0x9D => 0xFFFD,
55     0x9E => 0x017E,
56     0x9F => 0x0178,
57 wakaba 1.4 }; # $c1_entity_char
58 wakaba 1.1
59     my $special_category = {
60     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
61     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
62     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
63     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
64     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
65     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
66     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
67     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
68     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
69     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
70     };
71     my $scoping_category = {
72     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
73     table => 1, td => 1, th => 1,
74     };
75     my $formatting_category = {
76     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
77     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
78     };
79     # $phrasing_category: all other elements
80    
81     sub parse_string ($$$;$) {
82     my $self = shift->new;
83     my $s = \$_[0];
84     $self->{document} = $_[1];
85    
86 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
87    
88 wakaba 1.1 my $i = 0;
89 wakaba 1.3 my $line = 1;
90     my $column = 0;
91 wakaba 1.1 $self->{set_next_input_character} = sub {
92     my $self = shift;
93 wakaba 1.13
94     pop @{$self->{prev_input_character}};
95     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
96    
97 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
98     $self->{next_input_character} = ord substr $$s, $i++, 1;
99 wakaba 1.3 $column++;
100 wakaba 1.1
101 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
102     $line++;
103     $column = 0;
104     } elsif ($self->{next_input_character} == 0x000D) { # CR
105 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
106 wakaba 1.1 $self->{next_input_character} = 0x000A; # LF # MUST
107 wakaba 1.3 $line++;
108 wakaba 1.4 $column = 0;
109 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
110     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
111     } elsif ($self->{next_input_character} == 0x0000) { # NULL
112 wakaba 1.8 $self->{parse_error}-> (type => 'NULL');
113 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
114     }
115     };
116 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
117     $self->{next_input_character} = -1;
118 wakaba 1.1
119 wakaba 1.3 my $onerror = $_[2] || sub {
120     my (%opt) = @_;
121     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
122     };
123     $self->{parse_error} = sub {
124     $onerror->(@_, line => $line, column => $column);
125 wakaba 1.1 };
126    
127     $self->_initialize_tokenizer;
128     $self->_initialize_tree_constructor;
129     $self->_construct_tree;
130     $self->_terminate_tree_constructor;
131    
132     return $self->{document};
133     } # parse_string
134    
135     sub new ($) {
136     my $class = shift;
137     my $self = bless {}, $class;
138     $self->{set_next_input_character} = sub {
139     $self->{next_input_character} = -1;
140     };
141     $self->{parse_error} = sub {
142     #
143     };
144     return $self;
145     } # new
146    
147     ## Implementations MUST act as if state machine in the spec
148    
149     sub _initialize_tokenizer ($) {
150     my $self = shift;
151     $self->{state} = 'data'; # MUST
152     $self->{content_model_flag} = 'PCDATA'; # be
153     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
154     undef $self->{current_attribute};
155     undef $self->{last_emitted_start_tag_name};
156     undef $self->{last_attribute_value_state};
157     $self->{char} = [];
158     # $self->{next_input_character}
159    
160     if (@{$self->{char}}) {
161     $self->{next_input_character} = shift @{$self->{char}};
162     } else {
163     $self->{set_next_input_character}->($self);
164     }
165    
166     $self->{token} = [];
167 wakaba 1.18 # $self->{escape}
168 wakaba 1.1 } # _initialize_tokenizer
169    
170     ## A token has:
171     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
172     ## 'character', or 'end-of-file'
173 wakaba 1.18 ## ->{name} (DOCTYPE, start tag (tag name), end tag (tag name))
174     ## ->{public_identifier} (DOCTYPE)
175     ## ->{system_identifier} (DOCTYPE)
176     ## ->{correct} == 1 or 0 (DOCTYPE)
177 wakaba 1.1 ## ->{attributes} isa HASH (start tag, end tag)
178     ## ->{data} (comment, character)
179    
180     ## Emitted token MUST immediately be handled by the tree construction state.
181    
182     ## Before each step, UA MAY check to see if either one of the scripts in
183     ## "list of scripts that will execute as soon as possible" or the first
184     ## script in the "list of scripts that will execute asynchronously",
185     ## has completed loading. If one has, then it MUST be executed
186     ## and removed from the list.
187    
188     sub _get_next_token ($) {
189     my $self = shift;
190     if (@{$self->{token}}) {
191     return shift @{$self->{token}};
192     }
193    
194     A: {
195     if ($self->{state} eq 'data') {
196     if ($self->{next_input_character} == 0x0026) { # &
197     if ($self->{content_model_flag} eq 'PCDATA' or
198     $self->{content_model_flag} eq 'RCDATA') {
199     $self->{state} = 'entity data';
200    
201     if (@{$self->{char}}) {
202     $self->{next_input_character} = shift @{$self->{char}};
203     } else {
204     $self->{set_next_input_character}->($self);
205     }
206    
207     redo A;
208     } else {
209     #
210     }
211 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
212     if ($self->{content_model_flag} eq 'RCDATA' or
213     $self->{content_model_flag} eq 'CDATA') {
214     unless ($self->{escape}) {
215     if ($self->{prev_input_character}->[0] == 0x002D and # -
216     $self->{prev_input_character}->[1] == 0x0021 and # !
217     $self->{prev_input_character}->[2] == 0x003C) { # <
218     $self->{escape} = 1;
219     }
220     }
221     }
222    
223     #
224 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
225 wakaba 1.13 if ($self->{content_model_flag} eq 'PCDATA' or
226     (($self->{content_model_flag} eq 'CDATA' or
227     $self->{content_model_flag} eq 'RCDATA') and
228     not $self->{escape})) {
229 wakaba 1.1 $self->{state} = 'tag open';
230    
231     if (@{$self->{char}}) {
232     $self->{next_input_character} = shift @{$self->{char}};
233     } else {
234     $self->{set_next_input_character}->($self);
235     }
236    
237     redo A;
238     } else {
239     #
240     }
241 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
242     if ($self->{escape} and
243     ($self->{content_model_flag} eq 'RCDATA' or
244     $self->{content_model_flag} eq 'CDATA')) {
245     if ($self->{prev_input_character}->[0] == 0x002D and # -
246     $self->{prev_input_character}->[1] == 0x002D) { # -
247     delete $self->{escape};
248     }
249     }
250    
251     #
252 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
253     return ({type => 'end-of-file'});
254     last A; ## TODO: ok?
255     }
256     # Anything else
257     my $token = {type => 'character',
258     data => chr $self->{next_input_character}};
259     ## Stay in the data state
260    
261     if (@{$self->{char}}) {
262     $self->{next_input_character} = shift @{$self->{char}};
263     } else {
264     $self->{set_next_input_character}->($self);
265     }
266    
267    
268     return ($token);
269    
270     redo A;
271     } elsif ($self->{state} eq 'entity data') {
272     ## (cannot happen in CDATA state)
273    
274     my $token = $self->_tokenize_attempt_to_consume_an_entity;
275    
276     $self->{state} = 'data';
277     # next-input-character is already done
278    
279     unless (defined $token) {
280     return ({type => 'character', data => '&'});
281     } else {
282     return ($token);
283     }
284    
285     redo A;
286     } elsif ($self->{state} eq 'tag open') {
287     if ($self->{content_model_flag} eq 'RCDATA' or
288     $self->{content_model_flag} eq 'CDATA') {
289     if ($self->{next_input_character} == 0x002F) { # /
290    
291     if (@{$self->{char}}) {
292     $self->{next_input_character} = shift @{$self->{char}};
293     } else {
294     $self->{set_next_input_character}->($self);
295     }
296    
297     $self->{state} = 'close tag open';
298     redo A;
299     } else {
300     ## reconsume
301     $self->{state} = 'data';
302    
303     return ({type => 'character', data => '<'});
304    
305     redo A;
306     }
307     } elsif ($self->{content_model_flag} eq 'PCDATA') {
308     if ($self->{next_input_character} == 0x0021) { # !
309     $self->{state} = 'markup declaration open';
310    
311     if (@{$self->{char}}) {
312     $self->{next_input_character} = shift @{$self->{char}};
313     } else {
314     $self->{set_next_input_character}->($self);
315     }
316    
317     redo A;
318     } elsif ($self->{next_input_character} == 0x002F) { # /
319     $self->{state} = 'close tag open';
320    
321     if (@{$self->{char}}) {
322     $self->{next_input_character} = shift @{$self->{char}};
323     } else {
324     $self->{set_next_input_character}->($self);
325     }
326    
327     redo A;
328     } elsif (0x0041 <= $self->{next_input_character} and
329     $self->{next_input_character} <= 0x005A) { # A..Z
330     $self->{current_token}
331     = {type => 'start tag',
332     tag_name => chr ($self->{next_input_character} + 0x0020)};
333     $self->{state} = 'tag name';
334    
335     if (@{$self->{char}}) {
336     $self->{next_input_character} = shift @{$self->{char}};
337     } else {
338     $self->{set_next_input_character}->($self);
339     }
340    
341     redo A;
342     } elsif (0x0061 <= $self->{next_input_character} and
343     $self->{next_input_character} <= 0x007A) { # a..z
344     $self->{current_token} = {type => 'start tag',
345     tag_name => chr ($self->{next_input_character})};
346     $self->{state} = 'tag name';
347    
348     if (@{$self->{char}}) {
349     $self->{next_input_character} = shift @{$self->{char}};
350     } else {
351     $self->{set_next_input_character}->($self);
352     }
353    
354     redo A;
355     } elsif ($self->{next_input_character} == 0x003E) { # >
356 wakaba 1.3 $self->{parse_error}-> (type => 'empty start tag');
357 wakaba 1.1 $self->{state} = 'data';
358    
359     if (@{$self->{char}}) {
360     $self->{next_input_character} = shift @{$self->{char}};
361     } else {
362     $self->{set_next_input_character}->($self);
363     }
364    
365    
366     return ({type => 'character', data => '<>'});
367    
368     redo A;
369     } elsif ($self->{next_input_character} == 0x003F) { # ?
370 wakaba 1.3 $self->{parse_error}-> (type => 'pio');
371 wakaba 1.1 $self->{state} = 'bogus comment';
372     ## $self->{next_input_character} is intentionally left as is
373     redo A;
374     } else {
375 wakaba 1.3 $self->{parse_error}-> (type => 'bare stago');
376 wakaba 1.1 $self->{state} = 'data';
377     ## reconsume
378    
379     return ({type => 'character', data => '<'});
380    
381     redo A;
382     }
383     } else {
384     die "$0: $self->{content_model_flag}: Unknown content model flag";
385     }
386     } elsif ($self->{state} eq 'close tag open') {
387     if ($self->{content_model_flag} eq 'RCDATA' or
388     $self->{content_model_flag} eq 'CDATA') {
389     my @next_char;
390     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
391     push @next_char, $self->{next_input_character};
392     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
393     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
394     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
395    
396     if (@{$self->{char}}) {
397     $self->{next_input_character} = shift @{$self->{char}};
398     } else {
399     $self->{set_next_input_character}->($self);
400     }
401    
402     next TAGNAME;
403     } else {
404     $self->{next_input_character} = shift @next_char; # reconsume
405     unshift @{$self->{char}}, (@next_char);
406     $self->{state} = 'data';
407    
408     return ({type => 'character', data => '</'});
409    
410     redo A;
411     }
412     }
413     push @next_char, $self->{next_input_character};
414    
415     unless ($self->{next_input_character} == 0x0009 or # HT
416     $self->{next_input_character} == 0x000A or # LF
417     $self->{next_input_character} == 0x000B or # VT
418     $self->{next_input_character} == 0x000C or # FF
419     $self->{next_input_character} == 0x0020 or # SP
420     $self->{next_input_character} == 0x003E or # >
421     $self->{next_input_character} == 0x002F or # /
422     $self->{next_input_character} == -1) {
423     $self->{next_input_character} = shift @next_char; # reconsume
424     unshift @{$self->{char}}, (@next_char);
425     $self->{state} = 'data';
426    
427     return ({type => 'character', data => '</'});
428    
429     redo A;
430     } else {
431     $self->{next_input_character} = shift @next_char;
432     unshift @{$self->{char}}, (@next_char);
433     # and consume...
434     }
435     }
436    
437     if (0x0041 <= $self->{next_input_character} and
438     $self->{next_input_character} <= 0x005A) { # A..Z
439     $self->{current_token} = {type => 'end tag',
440     tag_name => chr ($self->{next_input_character} + 0x0020)};
441     $self->{state} = 'tag name';
442    
443     if (@{$self->{char}}) {
444     $self->{next_input_character} = shift @{$self->{char}};
445     } else {
446     $self->{set_next_input_character}->($self);
447     }
448    
449     redo A;
450     } elsif (0x0061 <= $self->{next_input_character} and
451     $self->{next_input_character} <= 0x007A) { # a..z
452     $self->{current_token} = {type => 'end tag',
453     tag_name => chr ($self->{next_input_character})};
454     $self->{state} = 'tag name';
455    
456     if (@{$self->{char}}) {
457     $self->{next_input_character} = shift @{$self->{char}};
458     } else {
459     $self->{set_next_input_character}->($self);
460     }
461    
462     redo A;
463     } elsif ($self->{next_input_character} == 0x003E) { # >
464 wakaba 1.3 $self->{parse_error}-> (type => 'empty end tag');
465 wakaba 1.1 $self->{state} = 'data';
466    
467     if (@{$self->{char}}) {
468     $self->{next_input_character} = shift @{$self->{char}};
469     } else {
470     $self->{set_next_input_character}->($self);
471     }
472    
473     redo A;
474     } elsif ($self->{next_input_character} == -1) {
475 wakaba 1.3 $self->{parse_error}-> (type => 'bare etago');
476 wakaba 1.1 $self->{state} = 'data';
477     # reconsume
478    
479     return ({type => 'character', data => '</'});
480    
481     redo A;
482     } else {
483 wakaba 1.3 $self->{parse_error}-> (type => 'bogus end tag');
484 wakaba 1.1 $self->{state} = 'bogus comment';
485     ## $self->{next_input_character} is intentionally left as is
486     redo A;
487     }
488     } elsif ($self->{state} eq 'tag name') {
489     if ($self->{next_input_character} == 0x0009 or # HT
490     $self->{next_input_character} == 0x000A or # LF
491     $self->{next_input_character} == 0x000B or # VT
492     $self->{next_input_character} == 0x000C or # FF
493     $self->{next_input_character} == 0x0020) { # SP
494     $self->{state} = 'before attribute name';
495    
496     if (@{$self->{char}}) {
497     $self->{next_input_character} = shift @{$self->{char}};
498     } else {
499     $self->{set_next_input_character}->($self);
500     }
501    
502     redo A;
503     } elsif ($self->{next_input_character} == 0x003E) { # >
504     if ($self->{current_token}->{type} eq 'start tag') {
505     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
506     } elsif ($self->{current_token}->{type} eq 'end tag') {
507     $self->{content_model_flag} = 'PCDATA'; # MUST
508     if ($self->{current_token}->{attributes}) {
509 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
510 wakaba 1.1 }
511     } else {
512     die "$0: $self->{current_token}->{type}: Unknown token type";
513     }
514     $self->{state} = 'data';
515    
516     if (@{$self->{char}}) {
517     $self->{next_input_character} = shift @{$self->{char}};
518     } else {
519     $self->{set_next_input_character}->($self);
520     }
521    
522    
523     return ($self->{current_token}); # start tag or end tag
524     undef $self->{current_token};
525    
526     redo A;
527     } elsif (0x0041 <= $self->{next_input_character} and
528     $self->{next_input_character} <= 0x005A) { # A..Z
529     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
530     # start tag or end tag
531     ## Stay in this state
532    
533     if (@{$self->{char}}) {
534     $self->{next_input_character} = shift @{$self->{char}};
535     } else {
536     $self->{set_next_input_character}->($self);
537     }
538    
539     redo A;
540 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
541 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
542 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
543     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
544     } elsif ($self->{current_token}->{type} eq 'end tag') {
545     $self->{content_model_flag} = 'PCDATA'; # MUST
546     if ($self->{current_token}->{attributes}) {
547 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
548 wakaba 1.1 }
549     } else {
550     die "$0: $self->{current_token}->{type}: Unknown token type";
551     }
552     $self->{state} = 'data';
553     # reconsume
554    
555     return ($self->{current_token}); # start tag or end tag
556     undef $self->{current_token};
557    
558     redo A;
559     } elsif ($self->{next_input_character} == 0x002F) { # /
560    
561     if (@{$self->{char}}) {
562     $self->{next_input_character} = shift @{$self->{char}};
563     } else {
564     $self->{set_next_input_character}->($self);
565     }
566    
567     if ($self->{next_input_character} == 0x003E and # >
568     $self->{current_token}->{type} eq 'start tag' and
569     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
570     # permitted slash
571     #
572     } else {
573 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
574 wakaba 1.1 }
575     $self->{state} = 'before attribute name';
576     # next-input-character is already done
577     redo A;
578     } else {
579     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
580     # start tag or end tag
581     ## Stay in the state
582    
583     if (@{$self->{char}}) {
584     $self->{next_input_character} = shift @{$self->{char}};
585     } else {
586     $self->{set_next_input_character}->($self);
587     }
588    
589     redo A;
590     }
591     } elsif ($self->{state} eq 'before attribute name') {
592     if ($self->{next_input_character} == 0x0009 or # HT
593     $self->{next_input_character} == 0x000A or # LF
594     $self->{next_input_character} == 0x000B or # VT
595     $self->{next_input_character} == 0x000C or # FF
596     $self->{next_input_character} == 0x0020) { # SP
597     ## Stay in the state
598    
599     if (@{$self->{char}}) {
600     $self->{next_input_character} = shift @{$self->{char}};
601     } else {
602     $self->{set_next_input_character}->($self);
603     }
604    
605     redo A;
606     } elsif ($self->{next_input_character} == 0x003E) { # >
607     if ($self->{current_token}->{type} eq 'start tag') {
608     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
609     } elsif ($self->{current_token}->{type} eq 'end tag') {
610     $self->{content_model_flag} = 'PCDATA'; # MUST
611     if ($self->{current_token}->{attributes}) {
612 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
613 wakaba 1.1 }
614     } else {
615     die "$0: $self->{current_token}->{type}: Unknown token type";
616     }
617     $self->{state} = 'data';
618    
619     if (@{$self->{char}}) {
620     $self->{next_input_character} = shift @{$self->{char}};
621     } else {
622     $self->{set_next_input_character}->($self);
623     }
624    
625    
626     return ($self->{current_token}); # start tag or end tag
627     undef $self->{current_token};
628    
629     redo A;
630     } elsif (0x0041 <= $self->{next_input_character} and
631     $self->{next_input_character} <= 0x005A) { # A..Z
632     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
633     value => ''};
634     $self->{state} = 'attribute name';
635    
636     if (@{$self->{char}}) {
637     $self->{next_input_character} = shift @{$self->{char}};
638     } else {
639     $self->{set_next_input_character}->($self);
640     }
641    
642     redo A;
643     } elsif ($self->{next_input_character} == 0x002F) { # /
644    
645     if (@{$self->{char}}) {
646     $self->{next_input_character} = shift @{$self->{char}};
647     } else {
648     $self->{set_next_input_character}->($self);
649     }
650    
651     if ($self->{next_input_character} == 0x003E and # >
652     $self->{current_token}->{type} eq 'start tag' and
653     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
654     # permitted slash
655     #
656     } else {
657 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
658 wakaba 1.1 }
659     ## Stay in the state
660     # next-input-character is already done
661     redo A;
662 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
663 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
664 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
665     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
666     } elsif ($self->{current_token}->{type} eq 'end tag') {
667     $self->{content_model_flag} = 'PCDATA'; # MUST
668     if ($self->{current_token}->{attributes}) {
669 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
670 wakaba 1.1 }
671     } else {
672     die "$0: $self->{current_token}->{type}: Unknown token type";
673     }
674     $self->{state} = 'data';
675     # reconsume
676    
677     return ($self->{current_token}); # start tag or end tag
678     undef $self->{current_token};
679    
680     redo A;
681     } else {
682     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
683     value => ''};
684     $self->{state} = 'attribute name';
685    
686     if (@{$self->{char}}) {
687     $self->{next_input_character} = shift @{$self->{char}};
688     } else {
689     $self->{set_next_input_character}->($self);
690     }
691    
692     redo A;
693     }
694     } elsif ($self->{state} eq 'attribute name') {
695     my $before_leave = sub {
696     if (exists $self->{current_token}->{attributes} # start tag or end tag
697     ->{$self->{current_attribute}->{name}}) { # MUST
698 wakaba 1.3 $self->{parse_error}-> (type => 'dupulicate attribute');
699 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
700     } else {
701     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
702     = $self->{current_attribute};
703     }
704     }; # $before_leave
705    
706     if ($self->{next_input_character} == 0x0009 or # HT
707     $self->{next_input_character} == 0x000A or # LF
708     $self->{next_input_character} == 0x000B or # VT
709     $self->{next_input_character} == 0x000C or # FF
710     $self->{next_input_character} == 0x0020) { # SP
711     $before_leave->();
712     $self->{state} = 'after attribute name';
713    
714     if (@{$self->{char}}) {
715     $self->{next_input_character} = shift @{$self->{char}};
716     } else {
717     $self->{set_next_input_character}->($self);
718     }
719    
720     redo A;
721     } elsif ($self->{next_input_character} == 0x003D) { # =
722     $before_leave->();
723     $self->{state} = 'before attribute value';
724    
725     if (@{$self->{char}}) {
726     $self->{next_input_character} = shift @{$self->{char}};
727     } else {
728     $self->{set_next_input_character}->($self);
729     }
730    
731     redo A;
732     } elsif ($self->{next_input_character} == 0x003E) { # >
733     $before_leave->();
734     if ($self->{current_token}->{type} eq 'start tag') {
735     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
736     } elsif ($self->{current_token}->{type} eq 'end tag') {
737     $self->{content_model_flag} = 'PCDATA'; # MUST
738     if ($self->{current_token}->{attributes}) {
739 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
740 wakaba 1.1 }
741     } else {
742     die "$0: $self->{current_token}->{type}: Unknown token type";
743     }
744     $self->{state} = 'data';
745    
746     if (@{$self->{char}}) {
747     $self->{next_input_character} = shift @{$self->{char}};
748     } else {
749     $self->{set_next_input_character}->($self);
750     }
751    
752    
753     return ($self->{current_token}); # start tag or end tag
754     undef $self->{current_token};
755    
756     redo A;
757     } elsif (0x0041 <= $self->{next_input_character} and
758     $self->{next_input_character} <= 0x005A) { # A..Z
759     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
760     ## Stay in the state
761    
762     if (@{$self->{char}}) {
763     $self->{next_input_character} = shift @{$self->{char}};
764     } else {
765     $self->{set_next_input_character}->($self);
766     }
767    
768     redo A;
769     } elsif ($self->{next_input_character} == 0x002F) { # /
770     $before_leave->();
771    
772     if (@{$self->{char}}) {
773     $self->{next_input_character} = shift @{$self->{char}};
774     } else {
775     $self->{set_next_input_character}->($self);
776     }
777    
778     if ($self->{next_input_character} == 0x003E and # >
779     $self->{current_token}->{type} eq 'start tag' and
780     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
781     # permitted slash
782     #
783     } else {
784 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
785 wakaba 1.1 }
786     $self->{state} = 'before attribute name';
787     # next-input-character is already done
788     redo A;
789 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
790 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
791 wakaba 1.1 $before_leave->();
792     if ($self->{current_token}->{type} eq 'start tag') {
793     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
794     } elsif ($self->{current_token}->{type} eq 'end tag') {
795     $self->{content_model_flag} = 'PCDATA'; # MUST
796     if ($self->{current_token}->{attributes}) {
797 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
798 wakaba 1.1 }
799     } else {
800     die "$0: $self->{current_token}->{type}: Unknown token type";
801     }
802     $self->{state} = 'data';
803     # reconsume
804    
805     return ($self->{current_token}); # start tag or end tag
806     undef $self->{current_token};
807    
808     redo A;
809     } else {
810     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
811     ## Stay in the state
812    
813     if (@{$self->{char}}) {
814     $self->{next_input_character} = shift @{$self->{char}};
815     } else {
816     $self->{set_next_input_character}->($self);
817     }
818    
819     redo A;
820     }
821     } elsif ($self->{state} eq 'after attribute name') {
822     if ($self->{next_input_character} == 0x0009 or # HT
823     $self->{next_input_character} == 0x000A or # LF
824     $self->{next_input_character} == 0x000B or # VT
825     $self->{next_input_character} == 0x000C or # FF
826     $self->{next_input_character} == 0x0020) { # SP
827     ## Stay in the state
828    
829     if (@{$self->{char}}) {
830     $self->{next_input_character} = shift @{$self->{char}};
831     } else {
832     $self->{set_next_input_character}->($self);
833     }
834    
835     redo A;
836     } elsif ($self->{next_input_character} == 0x003D) { # =
837     $self->{state} = 'before attribute value';
838    
839     if (@{$self->{char}}) {
840     $self->{next_input_character} = shift @{$self->{char}};
841     } else {
842     $self->{set_next_input_character}->($self);
843     }
844    
845     redo A;
846     } elsif ($self->{next_input_character} == 0x003E) { # >
847     if ($self->{current_token}->{type} eq 'start tag') {
848     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
849     } elsif ($self->{current_token}->{type} eq 'end tag') {
850     $self->{content_model_flag} = 'PCDATA'; # MUST
851     if ($self->{current_token}->{attributes}) {
852 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
853 wakaba 1.1 }
854     } else {
855     die "$0: $self->{current_token}->{type}: Unknown token type";
856     }
857     $self->{state} = 'data';
858    
859     if (@{$self->{char}}) {
860     $self->{next_input_character} = shift @{$self->{char}};
861     } else {
862     $self->{set_next_input_character}->($self);
863     }
864    
865    
866     return ($self->{current_token}); # start tag or end tag
867     undef $self->{current_token};
868    
869     redo A;
870     } elsif (0x0041 <= $self->{next_input_character} and
871     $self->{next_input_character} <= 0x005A) { # A..Z
872     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
873     value => ''};
874     $self->{state} = 'attribute name';
875    
876     if (@{$self->{char}}) {
877     $self->{next_input_character} = shift @{$self->{char}};
878     } else {
879     $self->{set_next_input_character}->($self);
880     }
881    
882     redo A;
883     } elsif ($self->{next_input_character} == 0x002F) { # /
884    
885     if (@{$self->{char}}) {
886     $self->{next_input_character} = shift @{$self->{char}};
887     } else {
888     $self->{set_next_input_character}->($self);
889     }
890    
891     if ($self->{next_input_character} == 0x003E and # >
892     $self->{current_token}->{type} eq 'start tag' and
893     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
894     # permitted slash
895     #
896     } else {
897 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
898 wakaba 1.1 }
899     $self->{state} = 'before attribute name';
900     # next-input-character is already done
901     redo A;
902 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
903 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
904 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
905     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
906     } elsif ($self->{current_token}->{type} eq 'end tag') {
907     $self->{content_model_flag} = 'PCDATA'; # MUST
908     if ($self->{current_token}->{attributes}) {
909 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
910 wakaba 1.1 }
911     } else {
912     die "$0: $self->{current_token}->{type}: Unknown token type";
913     }
914     $self->{state} = 'data';
915     # reconsume
916    
917     return ($self->{current_token}); # start tag or end tag
918     undef $self->{current_token};
919    
920     redo A;
921     } else {
922     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
923     value => ''};
924     $self->{state} = 'attribute name';
925    
926     if (@{$self->{char}}) {
927     $self->{next_input_character} = shift @{$self->{char}};
928     } else {
929     $self->{set_next_input_character}->($self);
930     }
931    
932     redo A;
933     }
934     } elsif ($self->{state} eq 'before attribute value') {
935     if ($self->{next_input_character} == 0x0009 or # HT
936     $self->{next_input_character} == 0x000A or # LF
937     $self->{next_input_character} == 0x000B or # VT
938     $self->{next_input_character} == 0x000C or # FF
939     $self->{next_input_character} == 0x0020) { # SP
940     ## Stay in the state
941    
942     if (@{$self->{char}}) {
943     $self->{next_input_character} = shift @{$self->{char}};
944     } else {
945     $self->{set_next_input_character}->($self);
946     }
947    
948     redo A;
949     } elsif ($self->{next_input_character} == 0x0022) { # "
950     $self->{state} = 'attribute value (double-quoted)';
951    
952     if (@{$self->{char}}) {
953     $self->{next_input_character} = shift @{$self->{char}};
954     } else {
955     $self->{set_next_input_character}->($self);
956     }
957    
958     redo A;
959     } elsif ($self->{next_input_character} == 0x0026) { # &
960     $self->{state} = 'attribute value (unquoted)';
961     ## reconsume
962     redo A;
963     } elsif ($self->{next_input_character} == 0x0027) { # '
964     $self->{state} = 'attribute value (single-quoted)';
965    
966     if (@{$self->{char}}) {
967     $self->{next_input_character} = shift @{$self->{char}};
968     } else {
969     $self->{set_next_input_character}->($self);
970     }
971    
972     redo A;
973     } elsif ($self->{next_input_character} == 0x003E) { # >
974     if ($self->{current_token}->{type} eq 'start tag') {
975     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
976     } elsif ($self->{current_token}->{type} eq 'end tag') {
977     $self->{content_model_flag} = 'PCDATA'; # MUST
978     if ($self->{current_token}->{attributes}) {
979 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
980 wakaba 1.1 }
981     } else {
982     die "$0: $self->{current_token}->{type}: Unknown token type";
983     }
984     $self->{state} = 'data';
985    
986     if (@{$self->{char}}) {
987     $self->{next_input_character} = shift @{$self->{char}};
988     } else {
989     $self->{set_next_input_character}->($self);
990     }
991    
992    
993     return ($self->{current_token}); # start tag or end tag
994     undef $self->{current_token};
995    
996     redo A;
997 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
998 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
999 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1000     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1001     } elsif ($self->{current_token}->{type} eq 'end tag') {
1002     $self->{content_model_flag} = 'PCDATA'; # MUST
1003     if ($self->{current_token}->{attributes}) {
1004 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1005 wakaba 1.1 }
1006     } else {
1007     die "$0: $self->{current_token}->{type}: Unknown token type";
1008     }
1009     $self->{state} = 'data';
1010     ## reconsume
1011    
1012     return ($self->{current_token}); # start tag or end tag
1013     undef $self->{current_token};
1014    
1015     redo A;
1016     } else {
1017     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1018     $self->{state} = 'attribute value (unquoted)';
1019    
1020     if (@{$self->{char}}) {
1021     $self->{next_input_character} = shift @{$self->{char}};
1022     } else {
1023     $self->{set_next_input_character}->($self);
1024     }
1025    
1026     redo A;
1027     }
1028     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
1029     if ($self->{next_input_character} == 0x0022) { # "
1030     $self->{state} = 'before attribute name';
1031    
1032     if (@{$self->{char}}) {
1033     $self->{next_input_character} = shift @{$self->{char}};
1034     } else {
1035     $self->{set_next_input_character}->($self);
1036     }
1037    
1038     redo A;
1039     } elsif ($self->{next_input_character} == 0x0026) { # &
1040     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
1041     $self->{state} = 'entity in attribute value';
1042    
1043     if (@{$self->{char}}) {
1044     $self->{next_input_character} = shift @{$self->{char}};
1045     } else {
1046     $self->{set_next_input_character}->($self);
1047     }
1048    
1049     redo A;
1050     } elsif ($self->{next_input_character} == -1) {
1051 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1052 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1053     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1054     } elsif ($self->{current_token}->{type} eq 'end tag') {
1055     $self->{content_model_flag} = 'PCDATA'; # MUST
1056     if ($self->{current_token}->{attributes}) {
1057 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1058 wakaba 1.1 }
1059     } else {
1060     die "$0: $self->{current_token}->{type}: Unknown token type";
1061     }
1062     $self->{state} = 'data';
1063     ## reconsume
1064    
1065     return ($self->{current_token}); # start tag or end tag
1066     undef $self->{current_token};
1067    
1068     redo A;
1069     } else {
1070     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1071     ## Stay in the state
1072    
1073     if (@{$self->{char}}) {
1074     $self->{next_input_character} = shift @{$self->{char}};
1075     } else {
1076     $self->{set_next_input_character}->($self);
1077     }
1078    
1079     redo A;
1080     }
1081     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1082     if ($self->{next_input_character} == 0x0027) { # '
1083     $self->{state} = 'before attribute name';
1084    
1085     if (@{$self->{char}}) {
1086     $self->{next_input_character} = shift @{$self->{char}};
1087     } else {
1088     $self->{set_next_input_character}->($self);
1089     }
1090    
1091     redo A;
1092     } elsif ($self->{next_input_character} == 0x0026) { # &
1093     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1094     $self->{state} = 'entity in attribute value';
1095    
1096     if (@{$self->{char}}) {
1097     $self->{next_input_character} = shift @{$self->{char}};
1098     } else {
1099     $self->{set_next_input_character}->($self);
1100     }
1101    
1102     redo A;
1103     } elsif ($self->{next_input_character} == -1) {
1104 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1105 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1106     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1107     } elsif ($self->{current_token}->{type} eq 'end tag') {
1108     $self->{content_model_flag} = 'PCDATA'; # MUST
1109     if ($self->{current_token}->{attributes}) {
1110 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1111 wakaba 1.1 }
1112     } else {
1113     die "$0: $self->{current_token}->{type}: Unknown token type";
1114     }
1115     $self->{state} = 'data';
1116     ## reconsume
1117    
1118     return ($self->{current_token}); # start tag or end tag
1119     undef $self->{current_token};
1120    
1121     redo A;
1122     } else {
1123     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1124     ## Stay in the state
1125    
1126     if (@{$self->{char}}) {
1127     $self->{next_input_character} = shift @{$self->{char}};
1128     } else {
1129     $self->{set_next_input_character}->($self);
1130     }
1131    
1132     redo A;
1133     }
1134     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1135     if ($self->{next_input_character} == 0x0009 or # HT
1136     $self->{next_input_character} == 0x000A or # LF
1137     $self->{next_input_character} == 0x000B or # HT
1138     $self->{next_input_character} == 0x000C or # FF
1139     $self->{next_input_character} == 0x0020) { # SP
1140     $self->{state} = 'before attribute name';
1141    
1142     if (@{$self->{char}}) {
1143     $self->{next_input_character} = shift @{$self->{char}};
1144     } else {
1145     $self->{set_next_input_character}->($self);
1146     }
1147    
1148     redo A;
1149     } elsif ($self->{next_input_character} == 0x0026) { # &
1150     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1151     $self->{state} = 'entity in attribute value';
1152    
1153     if (@{$self->{char}}) {
1154     $self->{next_input_character} = shift @{$self->{char}};
1155     } else {
1156     $self->{set_next_input_character}->($self);
1157     }
1158    
1159     redo A;
1160     } elsif ($self->{next_input_character} == 0x003E) { # >
1161     if ($self->{current_token}->{type} eq 'start tag') {
1162     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1163     } elsif ($self->{current_token}->{type} eq 'end tag') {
1164     $self->{content_model_flag} = 'PCDATA'; # MUST
1165     if ($self->{current_token}->{attributes}) {
1166 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1167 wakaba 1.1 }
1168     } else {
1169     die "$0: $self->{current_token}->{type}: Unknown token type";
1170     }
1171     $self->{state} = 'data';
1172    
1173     if (@{$self->{char}}) {
1174     $self->{next_input_character} = shift @{$self->{char}};
1175     } else {
1176     $self->{set_next_input_character}->($self);
1177     }
1178    
1179    
1180     return ($self->{current_token}); # start tag or end tag
1181     undef $self->{current_token};
1182    
1183     redo A;
1184 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
1185 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
1186 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1187     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1188     } elsif ($self->{current_token}->{type} eq 'end tag') {
1189     $self->{content_model_flag} = 'PCDATA'; # MUST
1190     if ($self->{current_token}->{attributes}) {
1191 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1192 wakaba 1.1 }
1193     } else {
1194     die "$0: $self->{current_token}->{type}: Unknown token type";
1195     }
1196     $self->{state} = 'data';
1197     ## reconsume
1198    
1199     return ($self->{current_token}); # start tag or end tag
1200     undef $self->{current_token};
1201    
1202     redo A;
1203     } else {
1204     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1205     ## Stay in the state
1206    
1207     if (@{$self->{char}}) {
1208     $self->{next_input_character} = shift @{$self->{char}};
1209     } else {
1210     $self->{set_next_input_character}->($self);
1211     }
1212    
1213     redo A;
1214     }
1215     } elsif ($self->{state} eq 'entity in attribute value') {
1216     my $token = $self->_tokenize_attempt_to_consume_an_entity;
1217    
1218     unless (defined $token) {
1219     $self->{current_attribute}->{value} .= '&';
1220     } else {
1221     $self->{current_attribute}->{value} .= $token->{data};
1222     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1223     }
1224    
1225     $self->{state} = $self->{last_attribute_value_state};
1226     # next-input-character is already done
1227     redo A;
1228     } elsif ($self->{state} eq 'bogus comment') {
1229     ## (only happen if PCDATA state)
1230    
1231     my $token = {type => 'comment', data => ''};
1232    
1233     BC: {
1234     if ($self->{next_input_character} == 0x003E) { # >
1235     $self->{state} = 'data';
1236    
1237     if (@{$self->{char}}) {
1238     $self->{next_input_character} = shift @{$self->{char}};
1239     } else {
1240     $self->{set_next_input_character}->($self);
1241     }
1242    
1243    
1244     return ($token);
1245    
1246     redo A;
1247     } elsif ($self->{next_input_character} == -1) {
1248     $self->{state} = 'data';
1249     ## reconsume
1250    
1251     return ($token);
1252    
1253     redo A;
1254     } else {
1255     $token->{data} .= chr ($self->{next_input_character});
1256    
1257     if (@{$self->{char}}) {
1258     $self->{next_input_character} = shift @{$self->{char}};
1259     } else {
1260     $self->{set_next_input_character}->($self);
1261     }
1262    
1263     redo BC;
1264     }
1265     } # BC
1266     } elsif ($self->{state} eq 'markup declaration open') {
1267     ## (only happen if PCDATA state)
1268    
1269     my @next_char;
1270     push @next_char, $self->{next_input_character};
1271    
1272     if ($self->{next_input_character} == 0x002D) { # -
1273    
1274     if (@{$self->{char}}) {
1275     $self->{next_input_character} = shift @{$self->{char}};
1276     } else {
1277     $self->{set_next_input_character}->($self);
1278     }
1279    
1280     push @next_char, $self->{next_input_character};
1281     if ($self->{next_input_character} == 0x002D) { # -
1282     $self->{current_token} = {type => 'comment', data => ''};
1283     $self->{state} = 'comment';
1284    
1285     if (@{$self->{char}}) {
1286     $self->{next_input_character} = shift @{$self->{char}};
1287     } else {
1288     $self->{set_next_input_character}->($self);
1289     }
1290    
1291     redo A;
1292     }
1293     } elsif ($self->{next_input_character} == 0x0044 or # D
1294     $self->{next_input_character} == 0x0064) { # d
1295    
1296     if (@{$self->{char}}) {
1297     $self->{next_input_character} = shift @{$self->{char}};
1298     } else {
1299     $self->{set_next_input_character}->($self);
1300     }
1301    
1302     push @next_char, $self->{next_input_character};
1303     if ($self->{next_input_character} == 0x004F or # O
1304     $self->{next_input_character} == 0x006F) { # o
1305    
1306     if (@{$self->{char}}) {
1307     $self->{next_input_character} = shift @{$self->{char}};
1308     } else {
1309     $self->{set_next_input_character}->($self);
1310     }
1311    
1312     push @next_char, $self->{next_input_character};
1313     if ($self->{next_input_character} == 0x0043 or # C
1314     $self->{next_input_character} == 0x0063) { # c
1315    
1316     if (@{$self->{char}}) {
1317     $self->{next_input_character} = shift @{$self->{char}};
1318     } else {
1319     $self->{set_next_input_character}->($self);
1320     }
1321    
1322     push @next_char, $self->{next_input_character};
1323     if ($self->{next_input_character} == 0x0054 or # T
1324     $self->{next_input_character} == 0x0074) { # t
1325    
1326     if (@{$self->{char}}) {
1327     $self->{next_input_character} = shift @{$self->{char}};
1328     } else {
1329     $self->{set_next_input_character}->($self);
1330     }
1331    
1332     push @next_char, $self->{next_input_character};
1333     if ($self->{next_input_character} == 0x0059 or # Y
1334     $self->{next_input_character} == 0x0079) { # y
1335    
1336     if (@{$self->{char}}) {
1337     $self->{next_input_character} = shift @{$self->{char}};
1338     } else {
1339     $self->{set_next_input_character}->($self);
1340     }
1341    
1342     push @next_char, $self->{next_input_character};
1343     if ($self->{next_input_character} == 0x0050 or # P
1344     $self->{next_input_character} == 0x0070) { # p
1345    
1346     if (@{$self->{char}}) {
1347     $self->{next_input_character} = shift @{$self->{char}};
1348     } else {
1349     $self->{set_next_input_character}->($self);
1350     }
1351    
1352     push @next_char, $self->{next_input_character};
1353     if ($self->{next_input_character} == 0x0045 or # E
1354     $self->{next_input_character} == 0x0065) { # e
1355     ## ISSUE: What a stupid code this is!
1356     $self->{state} = 'DOCTYPE';
1357    
1358     if (@{$self->{char}}) {
1359     $self->{next_input_character} = shift @{$self->{char}};
1360     } else {
1361     $self->{set_next_input_character}->($self);
1362     }
1363    
1364     redo A;
1365     }
1366     }
1367     }
1368     }
1369     }
1370     }
1371     }
1372    
1373 wakaba 1.3 $self->{parse_error}-> (type => 'bogus comment open');
1374 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1375     unshift @{$self->{char}}, (@next_char);
1376     $self->{state} = 'bogus comment';
1377     redo A;
1378    
1379     ## ISSUE: typos in spec: chacacters, is is a parse error
1380     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1381     } elsif ($self->{state} eq 'comment') {
1382     if ($self->{next_input_character} == 0x002D) { # -
1383     $self->{state} = 'comment dash';
1384    
1385     if (@{$self->{char}}) {
1386     $self->{next_input_character} = shift @{$self->{char}};
1387     } else {
1388     $self->{set_next_input_character}->($self);
1389     }
1390    
1391     redo A;
1392     } elsif ($self->{next_input_character} == -1) {
1393 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1394 wakaba 1.1 $self->{state} = 'data';
1395     ## reconsume
1396    
1397     return ($self->{current_token}); # comment
1398     undef $self->{current_token};
1399    
1400     redo A;
1401     } else {
1402     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1403     ## Stay in the state
1404    
1405     if (@{$self->{char}}) {
1406     $self->{next_input_character} = shift @{$self->{char}};
1407     } else {
1408     $self->{set_next_input_character}->($self);
1409     }
1410    
1411     redo A;
1412     }
1413     } elsif ($self->{state} eq 'comment dash') {
1414     if ($self->{next_input_character} == 0x002D) { # -
1415     $self->{state} = 'comment end';
1416    
1417     if (@{$self->{char}}) {
1418     $self->{next_input_character} = shift @{$self->{char}};
1419     } else {
1420     $self->{set_next_input_character}->($self);
1421     }
1422    
1423     redo A;
1424     } elsif ($self->{next_input_character} == -1) {
1425 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1426 wakaba 1.1 $self->{state} = 'data';
1427     ## reconsume
1428    
1429     return ($self->{current_token}); # comment
1430     undef $self->{current_token};
1431    
1432     redo A;
1433     } else {
1434     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1435     $self->{state} = 'comment';
1436    
1437     if (@{$self->{char}}) {
1438     $self->{next_input_character} = shift @{$self->{char}};
1439     } else {
1440     $self->{set_next_input_character}->($self);
1441     }
1442    
1443     redo A;
1444     }
1445     } elsif ($self->{state} eq 'comment end') {
1446     if ($self->{next_input_character} == 0x003E) { # >
1447     $self->{state} = 'data';
1448    
1449     if (@{$self->{char}}) {
1450     $self->{next_input_character} = shift @{$self->{char}};
1451     } else {
1452     $self->{set_next_input_character}->($self);
1453     }
1454    
1455    
1456     return ($self->{current_token}); # comment
1457     undef $self->{current_token};
1458    
1459     redo A;
1460     } elsif ($self->{next_input_character} == 0x002D) { # -
1461 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1462 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1463     ## Stay in the state
1464    
1465     if (@{$self->{char}}) {
1466     $self->{next_input_character} = shift @{$self->{char}};
1467     } else {
1468     $self->{set_next_input_character}->($self);
1469     }
1470    
1471     redo A;
1472     } elsif ($self->{next_input_character} == -1) {
1473 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1474 wakaba 1.1 $self->{state} = 'data';
1475     ## reconsume
1476    
1477     return ($self->{current_token}); # comment
1478     undef $self->{current_token};
1479    
1480     redo A;
1481     } else {
1482 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1483 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1484     $self->{state} = 'comment';
1485    
1486     if (@{$self->{char}}) {
1487     $self->{next_input_character} = shift @{$self->{char}};
1488     } else {
1489     $self->{set_next_input_character}->($self);
1490     }
1491    
1492     redo A;
1493     }
1494     } elsif ($self->{state} eq 'DOCTYPE') {
1495     if ($self->{next_input_character} == 0x0009 or # HT
1496     $self->{next_input_character} == 0x000A or # LF
1497     $self->{next_input_character} == 0x000B or # VT
1498     $self->{next_input_character} == 0x000C or # FF
1499     $self->{next_input_character} == 0x0020) { # SP
1500     $self->{state} = 'before DOCTYPE name';
1501    
1502     if (@{$self->{char}}) {
1503     $self->{next_input_character} = shift @{$self->{char}};
1504     } else {
1505     $self->{set_next_input_character}->($self);
1506     }
1507    
1508     redo A;
1509     } else {
1510 wakaba 1.3 $self->{parse_error}-> (type => 'no space before DOCTYPE name');
1511 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1512     ## reconsume
1513     redo A;
1514     }
1515     } elsif ($self->{state} eq 'before DOCTYPE name') {
1516     if ($self->{next_input_character} == 0x0009 or # HT
1517     $self->{next_input_character} == 0x000A or # LF
1518     $self->{next_input_character} == 0x000B or # VT
1519     $self->{next_input_character} == 0x000C or # FF
1520     $self->{next_input_character} == 0x0020) { # SP
1521     ## Stay in the state
1522    
1523     if (@{$self->{char}}) {
1524     $self->{next_input_character} = shift @{$self->{char}};
1525     } else {
1526     $self->{set_next_input_character}->($self);
1527     }
1528    
1529     redo A;
1530 wakaba 1.18 } elsif ($self->{next_input_character} == 0x003E) { # >
1531     $self->{parse_error}-> (type => 'no DOCTYPE name');
1532     $self->{state} = 'data';
1533    
1534     if (@{$self->{char}}) {
1535     $self->{next_input_character} = shift @{$self->{char}};
1536     } else {
1537     $self->{set_next_input_character}->($self);
1538     }
1539    
1540    
1541     return ({type => 'DOCTYPE'}); # incorrect
1542    
1543     redo A;
1544     } elsif ($self->{next_input_character} == -1) {
1545     $self->{parse_error}-> (type => 'no DOCTYPE name');
1546     $self->{state} = 'data';
1547     ## reconsume
1548    
1549     return ({type => 'DOCTYPE'}); # incorrect
1550    
1551     redo A;
1552     } else {
1553     $self->{current_token}
1554     = {type => 'DOCTYPE',
1555     name => chr ($self->{next_input_character}),
1556     correct => 1};
1557 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1558 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1559    
1560     if (@{$self->{char}}) {
1561     $self->{next_input_character} = shift @{$self->{char}};
1562     } else {
1563     $self->{set_next_input_character}->($self);
1564     }
1565    
1566     redo A;
1567 wakaba 1.18 }
1568     } elsif ($self->{state} eq 'DOCTYPE name') {
1569     ## ISSUE: Redundant "First," in the spec.
1570     if ($self->{next_input_character} == 0x0009 or # HT
1571     $self->{next_input_character} == 0x000A or # LF
1572     $self->{next_input_character} == 0x000B or # VT
1573     $self->{next_input_character} == 0x000C or # FF
1574     $self->{next_input_character} == 0x0020) { # SP
1575     $self->{state} = 'after DOCTYPE name';
1576    
1577     if (@{$self->{char}}) {
1578     $self->{next_input_character} = shift @{$self->{char}};
1579     } else {
1580     $self->{set_next_input_character}->($self);
1581     }
1582    
1583     redo A;
1584 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
1585     $self->{state} = 'data';
1586    
1587     if (@{$self->{char}}) {
1588     $self->{next_input_character} = shift @{$self->{char}};
1589     } else {
1590     $self->{set_next_input_character}->($self);
1591     }
1592    
1593    
1594 wakaba 1.18 return ($self->{current_token}); # DOCTYPE
1595     undef $self->{current_token};
1596 wakaba 1.1
1597     redo A;
1598 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1599     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1600 wakaba 1.1 $self->{state} = 'data';
1601     ## reconsume
1602    
1603 wakaba 1.18 delete $self->{current_token}->{correct};
1604     return ($self->{current_token}); # DOCTYPE
1605     undef $self->{current_token};
1606 wakaba 1.1
1607     redo A;
1608     } else {
1609 wakaba 1.18 $self->{current_token}->{name}
1610     .= chr ($self->{next_input_character}); # DOCTYPE
1611     ## Stay in the state
1612 wakaba 1.1
1613     if (@{$self->{char}}) {
1614     $self->{next_input_character} = shift @{$self->{char}};
1615     } else {
1616     $self->{set_next_input_character}->($self);
1617     }
1618    
1619     redo A;
1620     }
1621 wakaba 1.18 } elsif ($self->{state} eq 'after DOCTYPE name') {
1622 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1623     $self->{next_input_character} == 0x000A or # LF
1624     $self->{next_input_character} == 0x000B or # VT
1625     $self->{next_input_character} == 0x000C or # FF
1626     $self->{next_input_character} == 0x0020) { # SP
1627 wakaba 1.18 ## Stay in the state
1628 wakaba 1.1
1629     if (@{$self->{char}}) {
1630     $self->{next_input_character} = shift @{$self->{char}};
1631     } else {
1632     $self->{set_next_input_character}->($self);
1633     }
1634    
1635     redo A;
1636     } elsif ($self->{next_input_character} == 0x003E) { # >
1637     $self->{state} = 'data';
1638    
1639     if (@{$self->{char}}) {
1640     $self->{next_input_character} = shift @{$self->{char}};
1641     } else {
1642     $self->{set_next_input_character}->($self);
1643     }
1644    
1645    
1646     return ($self->{current_token}); # DOCTYPE
1647     undef $self->{current_token};
1648    
1649     redo A;
1650 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1651     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1652     $self->{state} = 'data';
1653     ## reconsume
1654    
1655     delete $self->{current_token}->{correct};
1656     return ($self->{current_token}); # DOCTYPE
1657     undef $self->{current_token};
1658    
1659     redo A;
1660     } elsif ($self->{next_input_character} == 0x0050 or # P
1661     $self->{next_input_character} == 0x0070) { # p
1662    
1663     if (@{$self->{char}}) {
1664     $self->{next_input_character} = shift @{$self->{char}};
1665     } else {
1666     $self->{set_next_input_character}->($self);
1667     }
1668    
1669     if ($self->{next_input_character} == 0x0055 or # U
1670     $self->{next_input_character} == 0x0075) { # u
1671    
1672     if (@{$self->{char}}) {
1673     $self->{next_input_character} = shift @{$self->{char}};
1674     } else {
1675     $self->{set_next_input_character}->($self);
1676     }
1677    
1678     if ($self->{next_input_character} == 0x0042 or # B
1679     $self->{next_input_character} == 0x0062) { # b
1680    
1681     if (@{$self->{char}}) {
1682     $self->{next_input_character} = shift @{$self->{char}};
1683     } else {
1684     $self->{set_next_input_character}->($self);
1685     }
1686    
1687     if ($self->{next_input_character} == 0x004C or # L
1688     $self->{next_input_character} == 0x006C) { # l
1689    
1690     if (@{$self->{char}}) {
1691     $self->{next_input_character} = shift @{$self->{char}};
1692     } else {
1693     $self->{set_next_input_character}->($self);
1694     }
1695    
1696     if ($self->{next_input_character} == 0x0049 or # I
1697     $self->{next_input_character} == 0x0069) { # i
1698    
1699     if (@{$self->{char}}) {
1700     $self->{next_input_character} = shift @{$self->{char}};
1701     } else {
1702     $self->{set_next_input_character}->($self);
1703     }
1704    
1705     if ($self->{next_input_character} == 0x0043 or # C
1706     $self->{next_input_character} == 0x0063) { # c
1707     $self->{state} = 'before DOCTYPE public identifier';
1708    
1709     if (@{$self->{char}}) {
1710     $self->{next_input_character} = shift @{$self->{char}};
1711     } else {
1712     $self->{set_next_input_character}->($self);
1713     }
1714    
1715     redo A;
1716     }
1717     }
1718     }
1719     }
1720     }
1721    
1722     #
1723     } elsif ($self->{next_input_character} == 0x0053 or # S
1724     $self->{next_input_character} == 0x0073) { # s
1725    
1726     if (@{$self->{char}}) {
1727     $self->{next_input_character} = shift @{$self->{char}};
1728     } else {
1729     $self->{set_next_input_character}->($self);
1730     }
1731    
1732     if ($self->{next_input_character} == 0x0059 or # Y
1733     $self->{next_input_character} == 0x0079) { # y
1734    
1735     if (@{$self->{char}}) {
1736     $self->{next_input_character} = shift @{$self->{char}};
1737     } else {
1738     $self->{set_next_input_character}->($self);
1739     }
1740    
1741     if ($self->{next_input_character} == 0x0053 or # S
1742     $self->{next_input_character} == 0x0073) { # s
1743    
1744     if (@{$self->{char}}) {
1745     $self->{next_input_character} = shift @{$self->{char}};
1746     } else {
1747     $self->{set_next_input_character}->($self);
1748     }
1749    
1750     if ($self->{next_input_character} == 0x0054 or # T
1751     $self->{next_input_character} == 0x0074) { # t
1752    
1753     if (@{$self->{char}}) {
1754     $self->{next_input_character} = shift @{$self->{char}};
1755     } else {
1756     $self->{set_next_input_character}->($self);
1757     }
1758    
1759     if ($self->{next_input_character} == 0x0045 or # E
1760     $self->{next_input_character} == 0x0065) { # e
1761    
1762     if (@{$self->{char}}) {
1763     $self->{next_input_character} = shift @{$self->{char}};
1764     } else {
1765     $self->{set_next_input_character}->($self);
1766     }
1767    
1768     if ($self->{next_input_character} == 0x004D or # M
1769     $self->{next_input_character} == 0x006D) { # m
1770     $self->{state} = 'before DOCTYPE system identifier';
1771    
1772     if (@{$self->{char}}) {
1773     $self->{next_input_character} = shift @{$self->{char}};
1774     } else {
1775     $self->{set_next_input_character}->($self);
1776     }
1777    
1778     redo A;
1779     }
1780     }
1781     }
1782     }
1783     }
1784    
1785     #
1786     } else {
1787    
1788     if (@{$self->{char}}) {
1789     $self->{next_input_character} = shift @{$self->{char}};
1790     } else {
1791     $self->{set_next_input_character}->($self);
1792     }
1793    
1794     #
1795     }
1796    
1797     $self->{parse_error}-> (type => 'string after DOCTYPE name');
1798     $self->{state} = 'bogus DOCTYPE';
1799     # next-input-character is already done
1800     redo A;
1801     } elsif ($self->{state} eq 'before DOCTYPE public identifier') {
1802     if ({
1803     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1804     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1805     }->{$self->{next_input_character}}) {
1806 wakaba 1.1 ## Stay in the state
1807    
1808     if (@{$self->{char}}) {
1809     $self->{next_input_character} = shift @{$self->{char}};
1810     } else {
1811     $self->{set_next_input_character}->($self);
1812     }
1813    
1814     redo A;
1815 wakaba 1.18 } elsif ($self->{next_input_character} eq 0x0022) { # "
1816     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1817     $self->{state} = 'DOCTYPE public identifier (double-quoted)';
1818    
1819     if (@{$self->{char}}) {
1820     $self->{next_input_character} = shift @{$self->{char}};
1821     } else {
1822     $self->{set_next_input_character}->($self);
1823     }
1824    
1825     redo A;
1826     } elsif ($self->{next_input_character} eq 0x0027) { # '
1827     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1828     $self->{state} = 'DOCTYPE public identifier (single-quoted)';
1829    
1830     if (@{$self->{char}}) {
1831     $self->{next_input_character} = shift @{$self->{char}};
1832     } else {
1833     $self->{set_next_input_character}->($self);
1834     }
1835    
1836     redo A;
1837     } elsif ($self->{next_input_character} eq 0x003E) { # >
1838     $self->{parse_error}-> (type => 'no PUBLIC literal');
1839    
1840     $self->{state} = 'data';
1841    
1842     if (@{$self->{char}}) {
1843     $self->{next_input_character} = shift @{$self->{char}};
1844     } else {
1845     $self->{set_next_input_character}->($self);
1846     }
1847    
1848    
1849     delete $self->{current_token}->{correct};
1850     return ($self->{current_token}); # DOCTYPE
1851     undef $self->{current_token};
1852    
1853     redo A;
1854 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
1855 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1856 wakaba 1.18
1857 wakaba 1.1 $self->{state} = 'data';
1858     ## reconsume
1859    
1860 wakaba 1.18 delete $self->{current_token}->{correct};
1861     return ($self->{current_token}); # DOCTYPE
1862 wakaba 1.1 undef $self->{current_token};
1863    
1864     redo A;
1865     } else {
1866 wakaba 1.18 $self->{parse_error}-> (type => 'string after PUBLIC');
1867     $self->{state} = 'bogus DOCTYPE';
1868    
1869     if (@{$self->{char}}) {
1870     $self->{next_input_character} = shift @{$self->{char}};
1871     } else {
1872     $self->{set_next_input_character}->($self);
1873     }
1874    
1875     redo A;
1876     }
1877     } elsif ($self->{state} eq 'DOCTYPE public identifier (double-quoted)') {
1878     if ($self->{next_input_character} == 0x0022) { # "
1879     $self->{state} = 'after DOCTYPE public identifier';
1880    
1881     if (@{$self->{char}}) {
1882     $self->{next_input_character} = shift @{$self->{char}};
1883     } else {
1884     $self->{set_next_input_character}->($self);
1885     }
1886    
1887     redo A;
1888     } elsif ($self->{next_input_character} == -1) {
1889     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
1890    
1891     $self->{state} = 'data';
1892     ## reconsume
1893    
1894     delete $self->{current_token}->{correct};
1895     return ($self->{current_token}); # DOCTYPE
1896     undef $self->{current_token};
1897    
1898     redo A;
1899     } else {
1900     $self->{current_token}->{public_identifier} # DOCTYPE
1901     .= chr $self->{next_input_character};
1902     ## Stay in the state
1903    
1904     if (@{$self->{char}}) {
1905     $self->{next_input_character} = shift @{$self->{char}};
1906     } else {
1907     $self->{set_next_input_character}->($self);
1908     }
1909    
1910     redo A;
1911     }
1912     } elsif ($self->{state} eq 'DOCTYPE public identifier (single-quoted)') {
1913     if ($self->{next_input_character} == 0x0027) { # '
1914     $self->{state} = 'after DOCTYPE public identifier';
1915    
1916     if (@{$self->{char}}) {
1917     $self->{next_input_character} = shift @{$self->{char}};
1918     } else {
1919     $self->{set_next_input_character}->($self);
1920     }
1921    
1922     redo A;
1923     } elsif ($self->{next_input_character} == -1) {
1924     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
1925    
1926     $self->{state} = 'data';
1927     ## reconsume
1928    
1929     delete $self->{current_token}->{correct};
1930     return ($self->{current_token}); # DOCTYPE
1931     undef $self->{current_token};
1932    
1933     redo A;
1934     } else {
1935     $self->{current_token}->{public_identifier} # DOCTYPE
1936     .= chr $self->{next_input_character};
1937     ## Stay in the state
1938    
1939     if (@{$self->{char}}) {
1940     $self->{next_input_character} = shift @{$self->{char}};
1941     } else {
1942     $self->{set_next_input_character}->($self);
1943     }
1944    
1945     redo A;
1946     }
1947     } elsif ($self->{state} eq 'after DOCTYPE public identifier') {
1948     if ({
1949     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1950     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1951     }->{$self->{next_input_character}}) {
1952 wakaba 1.1 ## Stay in the state
1953    
1954     if (@{$self->{char}}) {
1955     $self->{next_input_character} = shift @{$self->{char}};
1956     } else {
1957     $self->{set_next_input_character}->($self);
1958     }
1959    
1960     redo A;
1961 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
1962     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1963     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
1964    
1965     if (@{$self->{char}}) {
1966     $self->{next_input_character} = shift @{$self->{char}};
1967     } else {
1968     $self->{set_next_input_character}->($self);
1969     }
1970    
1971     redo A;
1972     } elsif ($self->{next_input_character} == 0x0027) { # '
1973     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1974     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
1975    
1976     if (@{$self->{char}}) {
1977     $self->{next_input_character} = shift @{$self->{char}};
1978     } else {
1979     $self->{set_next_input_character}->($self);
1980     }
1981    
1982     redo A;
1983     } elsif ($self->{next_input_character} == 0x003E) { # >
1984     $self->{state} = 'data';
1985    
1986     if (@{$self->{char}}) {
1987     $self->{next_input_character} = shift @{$self->{char}};
1988     } else {
1989     $self->{set_next_input_character}->($self);
1990     }
1991    
1992    
1993     return ($self->{current_token}); # DOCTYPE
1994     undef $self->{current_token};
1995    
1996     redo A;
1997     } elsif ($self->{next_input_character} == -1) {
1998     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1999    
2000     $self->{state} = 'data';
2001     ## recomsume
2002    
2003     delete $self->{current_token}->{correct};
2004     return ($self->{current_token}); # DOCTYPE
2005     undef $self->{current_token};
2006    
2007     redo A;
2008     } else {
2009     $self->{parse_error}-> (type => 'string after PUBLIC literal');
2010     $self->{state} = 'bogus DOCTYPE';
2011    
2012     if (@{$self->{char}}) {
2013     $self->{next_input_character} = shift @{$self->{char}};
2014     } else {
2015     $self->{set_next_input_character}->($self);
2016     }
2017    
2018     redo A;
2019 wakaba 1.1 }
2020 wakaba 1.18 } elsif ($self->{state} eq 'before DOCTYPE system identifier') {
2021     if ({
2022     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2023     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2024     }->{$self->{next_input_character}}) {
2025 wakaba 1.1 ## Stay in the state
2026    
2027     if (@{$self->{char}}) {
2028     $self->{next_input_character} = shift @{$self->{char}};
2029     } else {
2030     $self->{set_next_input_character}->($self);
2031     }
2032    
2033     redo A;
2034 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
2035     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2036     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
2037    
2038     if (@{$self->{char}}) {
2039     $self->{next_input_character} = shift @{$self->{char}};
2040     } else {
2041     $self->{set_next_input_character}->($self);
2042     }
2043    
2044     redo A;
2045     } elsif ($self->{next_input_character} == 0x0027) { # '
2046     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2047     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
2048    
2049     if (@{$self->{char}}) {
2050     $self->{next_input_character} = shift @{$self->{char}};
2051     } else {
2052     $self->{set_next_input_character}->($self);
2053     }
2054    
2055     redo A;
2056 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
2057 wakaba 1.18 $self->{parse_error}-> (type => 'no SYSTEM literal');
2058 wakaba 1.1 $self->{state} = 'data';
2059    
2060     if (@{$self->{char}}) {
2061     $self->{next_input_character} = shift @{$self->{char}};
2062     } else {
2063     $self->{set_next_input_character}->($self);
2064     }
2065    
2066    
2067 wakaba 1.18 delete $self->{current_token}->{correct};
2068 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2069     undef $self->{current_token};
2070    
2071     redo A;
2072     } elsif ($self->{next_input_character} == -1) {
2073 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2074 wakaba 1.18
2075     $self->{state} = 'data';
2076     ## recomsume
2077    
2078     delete $self->{current_token}->{correct};
2079     return ($self->{current_token}); # DOCTYPE
2080     undef $self->{current_token};
2081    
2082     redo A;
2083     } else {
2084     $self->{parse_error}-> (type => 'string after PUBLIC literal');
2085     $self->{state} = 'bogus DOCTYPE';
2086    
2087     if (@{$self->{char}}) {
2088     $self->{next_input_character} = shift @{$self->{char}};
2089     } else {
2090     $self->{set_next_input_character}->($self);
2091     }
2092    
2093     redo A;
2094     }
2095     } elsif ($self->{state} eq 'DOCTYPE system identifier (double-quoted)') {
2096     if ($self->{next_input_character} == 0x0022) { # "
2097     $self->{state} = 'after DOCTYPE system identifier';
2098    
2099     if (@{$self->{char}}) {
2100     $self->{next_input_character} = shift @{$self->{char}};
2101     } else {
2102     $self->{set_next_input_character}->($self);
2103     }
2104    
2105     redo A;
2106     } elsif ($self->{next_input_character} == -1) {
2107     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2108    
2109 wakaba 1.1 $self->{state} = 'data';
2110     ## reconsume
2111    
2112 wakaba 1.18 delete $self->{current_token}->{correct};
2113 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2114     undef $self->{current_token};
2115    
2116     redo A;
2117     } else {
2118 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2119     .= chr $self->{next_input_character};
2120     ## Stay in the state
2121    
2122     if (@{$self->{char}}) {
2123     $self->{next_input_character} = shift @{$self->{char}};
2124     } else {
2125     $self->{set_next_input_character}->($self);
2126     }
2127    
2128     redo A;
2129     }
2130     } elsif ($self->{state} eq 'DOCTYPE system identifier (single-quoted)') {
2131     if ($self->{next_input_character} == 0x0027) { # '
2132     $self->{state} = 'after DOCTYPE system identifier';
2133    
2134     if (@{$self->{char}}) {
2135     $self->{next_input_character} = shift @{$self->{char}};
2136     } else {
2137     $self->{set_next_input_character}->($self);
2138     }
2139    
2140     redo A;
2141     } elsif ($self->{next_input_character} == -1) {
2142     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2143    
2144     $self->{state} = 'data';
2145     ## reconsume
2146    
2147     delete $self->{current_token}->{correct};
2148     return ($self->{current_token}); # DOCTYPE
2149     undef $self->{current_token};
2150    
2151     redo A;
2152     } else {
2153     $self->{current_token}->{system_identifier} # DOCTYPE
2154     .= chr $self->{next_input_character};
2155     ## Stay in the state
2156    
2157     if (@{$self->{char}}) {
2158     $self->{next_input_character} = shift @{$self->{char}};
2159     } else {
2160     $self->{set_next_input_character}->($self);
2161     }
2162    
2163     redo A;
2164     }
2165     } elsif ($self->{state} eq 'after DOCTYPE system identifier') {
2166     if ({
2167     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2168     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2169     }->{$self->{next_input_character}}) {
2170     ## Stay in the state
2171    
2172     if (@{$self->{char}}) {
2173     $self->{next_input_character} = shift @{$self->{char}};
2174     } else {
2175     $self->{set_next_input_character}->($self);
2176     }
2177    
2178     redo A;
2179     } elsif ($self->{next_input_character} == 0x003E) { # >
2180     $self->{state} = 'data';
2181    
2182     if (@{$self->{char}}) {
2183     $self->{next_input_character} = shift @{$self->{char}};
2184     } else {
2185     $self->{set_next_input_character}->($self);
2186     }
2187    
2188    
2189     return ($self->{current_token}); # DOCTYPE
2190     undef $self->{current_token};
2191    
2192     redo A;
2193     } elsif ($self->{next_input_character} == -1) {
2194     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2195    
2196     $self->{state} = 'data';
2197     ## recomsume
2198    
2199     delete $self->{current_token}->{correct};
2200     return ($self->{current_token}); # DOCTYPE
2201     undef $self->{current_token};
2202    
2203     redo A;
2204     } else {
2205     $self->{parse_error}-> (type => 'string after SYSTEM literal');
2206 wakaba 1.1 $self->{state} = 'bogus DOCTYPE';
2207    
2208     if (@{$self->{char}}) {
2209     $self->{next_input_character} = shift @{$self->{char}};
2210     } else {
2211     $self->{set_next_input_character}->($self);
2212     }
2213    
2214     redo A;
2215     }
2216     } elsif ($self->{state} eq 'bogus DOCTYPE') {
2217     if ($self->{next_input_character} == 0x003E) { # >
2218     $self->{state} = 'data';
2219    
2220     if (@{$self->{char}}) {
2221     $self->{next_input_character} = shift @{$self->{char}};
2222     } else {
2223     $self->{set_next_input_character}->($self);
2224     }
2225    
2226    
2227 wakaba 1.18 delete $self->{current_token}->{correct};
2228 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2229     undef $self->{current_token};
2230    
2231     redo A;
2232     } elsif ($self->{next_input_character} == -1) {
2233 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2234 wakaba 1.1 $self->{state} = 'data';
2235     ## reconsume
2236    
2237 wakaba 1.18 delete $self->{current_token}->{correct};
2238 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2239     undef $self->{current_token};
2240    
2241     redo A;
2242     } else {
2243     ## Stay in the state
2244    
2245     if (@{$self->{char}}) {
2246     $self->{next_input_character} = shift @{$self->{char}};
2247     } else {
2248     $self->{set_next_input_character}->($self);
2249     }
2250    
2251     redo A;
2252     }
2253     } else {
2254     die "$0: $self->{state}: Unknown state";
2255     }
2256     } # A
2257    
2258     die "$0: _get_next_token: unexpected case";
2259     } # _get_next_token
2260    
2261     sub _tokenize_attempt_to_consume_an_entity ($) {
2262     my $self = shift;
2263 wakaba 1.20
2264     if ({
2265     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2266     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2267     }->{$self->{next_input_character}}) {
2268     ## Don't consume
2269     ## No error
2270     return undef;
2271     } elsif ($self->{next_input_character} == 0x0023) { # #
2272 wakaba 1.1
2273     if (@{$self->{char}}) {
2274     $self->{next_input_character} = shift @{$self->{char}};
2275     } else {
2276     $self->{set_next_input_character}->($self);
2277     }
2278    
2279     if ($self->{next_input_character} == 0x0078 or # x
2280     $self->{next_input_character} == 0x0058) { # X
2281 wakaba 1.4 my $num;
2282 wakaba 1.1 X: {
2283     my $x_char = $self->{next_input_character};
2284    
2285     if (@{$self->{char}}) {
2286     $self->{next_input_character} = shift @{$self->{char}};
2287     } else {
2288     $self->{set_next_input_character}->($self);
2289     }
2290    
2291     if (0x0030 <= $self->{next_input_character} and
2292     $self->{next_input_character} <= 0x0039) { # 0..9
2293     $num ||= 0;
2294     $num *= 0x10;
2295     $num += $self->{next_input_character} - 0x0030;
2296     redo X;
2297     } elsif (0x0061 <= $self->{next_input_character} and
2298     $self->{next_input_character} <= 0x0066) { # a..f
2299     ## ISSUE: the spec says U+0078, which is apparently incorrect
2300     $num ||= 0;
2301     $num *= 0x10;
2302     $num += $self->{next_input_character} - 0x0060 + 9;
2303     redo X;
2304     } elsif (0x0041 <= $self->{next_input_character} and
2305     $self->{next_input_character} <= 0x0046) { # A..F
2306     ## ISSUE: the spec says U+0058, which is apparently incorrect
2307     $num ||= 0;
2308     $num *= 0x10;
2309     $num += $self->{next_input_character} - 0x0040 + 9;
2310     redo X;
2311     } elsif (not defined $num) { # no hexadecimal digit
2312 wakaba 1.3 $self->{parse_error}-> (type => 'bare hcro');
2313 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
2314     unshift @{$self->{char}}, ($x_char);
2315     return undef;
2316     } elsif ($self->{next_input_character} == 0x003B) { # ;
2317    
2318     if (@{$self->{char}}) {
2319     $self->{next_input_character} = shift @{$self->{char}};
2320     } else {
2321     $self->{set_next_input_character}->($self);
2322     }
2323    
2324     } else {
2325 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2326 wakaba 1.1 }
2327    
2328     ## TODO: check the definition for |a valid Unicode character|.
2329 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
2330 wakaba 1.1 if ($num > 1114111 or $num == 0) {
2331     $num = 0xFFFD; # REPLACEMENT CHARACTER
2332     ## ISSUE: Why this is not an error?
2333 wakaba 1.4 } elsif (0x80 <= $num and $num <= 0x9F) {
2334 wakaba 1.8 $self->{parse_error}-> (type => sprintf 'c1 entity:U+%04X', $num);
2335 wakaba 1.4 $num = $c1_entity_char->{$num};
2336 wakaba 1.1 }
2337    
2338     return {type => 'character', data => chr $num};
2339     } # X
2340     } elsif (0x0030 <= $self->{next_input_character} and
2341     $self->{next_input_character} <= 0x0039) { # 0..9
2342     my $code = $self->{next_input_character} - 0x0030;
2343    
2344     if (@{$self->{char}}) {
2345     $self->{next_input_character} = shift @{$self->{char}};
2346     } else {
2347     $self->{set_next_input_character}->($self);
2348     }
2349    
2350    
2351     while (0x0030 <= $self->{next_input_character} and
2352     $self->{next_input_character} <= 0x0039) { # 0..9
2353     $code *= 10;
2354     $code += $self->{next_input_character} - 0x0030;
2355    
2356    
2357     if (@{$self->{char}}) {
2358     $self->{next_input_character} = shift @{$self->{char}};
2359     } else {
2360     $self->{set_next_input_character}->($self);
2361     }
2362    
2363     }
2364    
2365     if ($self->{next_input_character} == 0x003B) { # ;
2366    
2367     if (@{$self->{char}}) {
2368     $self->{next_input_character} = shift @{$self->{char}};
2369     } else {
2370     $self->{set_next_input_character}->($self);
2371     }
2372    
2373     } else {
2374 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2375 wakaba 1.1 }
2376    
2377     ## TODO: check the definition for |a valid Unicode character|.
2378     if ($code > 1114111 or $code == 0) {
2379     $code = 0xFFFD; # REPLACEMENT CHARACTER
2380     ## ISSUE: Why this is not an error?
2381 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2382 wakaba 1.8 $self->{parse_error}-> (type => sprintf 'c1 entity:U+%04X', $code);
2383 wakaba 1.4 $code = $c1_entity_char->{$code};
2384 wakaba 1.1 }
2385    
2386     return {type => 'character', data => chr $code};
2387     } else {
2388 wakaba 1.3 $self->{parse_error}-> (type => 'bare nero');
2389 wakaba 1.1 unshift @{$self->{char}}, ($self->{next_input_character});
2390     $self->{next_input_character} = 0x0023; # #
2391     return undef;
2392     }
2393     } elsif ((0x0041 <= $self->{next_input_character} and
2394     $self->{next_input_character} <= 0x005A) or
2395     (0x0061 <= $self->{next_input_character} and
2396     $self->{next_input_character} <= 0x007A)) {
2397     my $entity_name = chr $self->{next_input_character};
2398    
2399     if (@{$self->{char}}) {
2400     $self->{next_input_character} = shift @{$self->{char}};
2401     } else {
2402     $self->{set_next_input_character}->($self);
2403     }
2404    
2405    
2406     my $value = $entity_name;
2407     my $match;
2408 wakaba 1.16 require Whatpm::_NamedEntityList;
2409     our $EntityChar;
2410 wakaba 1.1
2411     while (length $entity_name < 10 and
2412     ## NOTE: Some number greater than the maximum length of entity name
2413 wakaba 1.16 ((0x0041 <= $self->{next_input_character} and # a
2414     $self->{next_input_character} <= 0x005A) or # x
2415     (0x0061 <= $self->{next_input_character} and # a
2416     $self->{next_input_character} <= 0x007A) or # z
2417     (0x0030 <= $self->{next_input_character} and # 0
2418     $self->{next_input_character} <= 0x0039) or # 9
2419     $self->{next_input_character} == 0x003B)) { # ;
2420 wakaba 1.1 $entity_name .= chr $self->{next_input_character};
2421 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2422     $value = $EntityChar->{$entity_name};
2423     if ($self->{next_input_character} == 0x003B) { # ;
2424     $match = 1;
2425    
2426     if (@{$self->{char}}) {
2427     $self->{next_input_character} = shift @{$self->{char}};
2428     } else {
2429     $self->{set_next_input_character}->($self);
2430     }
2431    
2432     last;
2433     } else {
2434     $match = -1;
2435     }
2436 wakaba 1.1 } else {
2437     $value .= chr $self->{next_input_character};
2438     }
2439    
2440     if (@{$self->{char}}) {
2441     $self->{next_input_character} = shift @{$self->{char}};
2442     } else {
2443     $self->{set_next_input_character}->($self);
2444     }
2445    
2446     }
2447    
2448 wakaba 1.16 if ($match > 0) {
2449     return {type => 'character', data => $value};
2450     } elsif ($match < 0) {
2451     $self->{parse_error}-> (type => 'refc');
2452 wakaba 1.1 return {type => 'character', data => $value};
2453     } else {
2454 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2455 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
2456     unshift @{$self->{token}}, ({type => 'character', data => $value});
2457     return undef;
2458     }
2459     } else {
2460     ## no characters are consumed
2461 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2462 wakaba 1.1 return undef;
2463     }
2464     } # _tokenize_attempt_to_consume_an_entity
2465    
2466     sub _initialize_tree_constructor ($) {
2467     my $self = shift;
2468     ## NOTE: $self->{document} MUST be specified before this method is called
2469     $self->{document}->strict_error_checking (0);
2470     ## TODO: Turn mutation events off # MUST
2471     ## TODO: Turn loose Document option (manakai extension) on
2472 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2473 wakaba 1.1 } # _initialize_tree_constructor
2474    
2475     sub _terminate_tree_constructor ($) {
2476     my $self = shift;
2477     $self->{document}->strict_error_checking (1);
2478     ## TODO: Turn mutation events on
2479     } # _terminate_tree_constructor
2480    
2481     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2482    
2483 wakaba 1.3 { # tree construction stage
2484     my $token;
2485    
2486 wakaba 1.1 sub _construct_tree ($) {
2487     my ($self) = @_;
2488    
2489     ## When an interactive UA render the $self->{document} available
2490     ## to the user, or when it begin accepting user input, are
2491     ## not defined.
2492    
2493     ## Append a character: collect it and all subsequent consecutive
2494     ## characters and insert one Text node whose data is concatenation
2495     ## of all those characters. # MUST
2496    
2497     $token = $self->_get_next_token;
2498    
2499 wakaba 1.3 $self->{insertion_mode} = 'before head';
2500     undef $self->{form_element};
2501     undef $self->{head_element};
2502     $self->{open_elements} = [];
2503     undef $self->{inner_html_node};
2504    
2505     $self->_tree_construction_initial; # MUST
2506     $self->_tree_construction_root_element;
2507     $self->_tree_construction_main;
2508     } # _construct_tree
2509    
2510     sub _tree_construction_initial ($) {
2511     my $self = shift;
2512 wakaba 1.18 INITIAL: {
2513     if ($token->{type} eq 'DOCTYPE') {
2514     ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2515     ## error, switch to a conformance checking mode for another
2516     ## language.
2517     my $doctype_name = $token->{name};
2518     $doctype_name = '' unless defined $doctype_name;
2519     $doctype_name =~ tr/a-z/A-Z/;
2520     if (not defined $token->{name} or # <!DOCTYPE>
2521     defined $token->{public_identifier} or
2522     defined $token->{system_identifier}) {
2523     $self->{parse_error}-> (type => 'not HTML5');
2524     } elsif ($doctype_name ne 'HTML') {
2525     ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2526     $self->{parse_error}-> (type => 'not HTML5');
2527     }
2528    
2529     my $doctype = $self->{document}->create_document_type_definition
2530     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2531     $doctype->public_id ($token->{public_identifier})
2532     if defined $token->{public_identifier};
2533     $doctype->system_id ($token->{system_identifier})
2534     if defined $token->{system_identifier};
2535     ## NOTE: Other DocumentType attributes are null or empty lists.
2536     ## ISSUE: internalSubset = null??
2537     $self->{document}->append_child ($doctype);
2538    
2539     if (not $token->{correct} or $doctype_name ne 'HTML') {
2540     $self->{document}->manakai_compat_mode ('quirks');
2541     } elsif (defined $token->{public_identifier}) {
2542     my $pubid = $token->{public_identifier};
2543     $pubid =~ tr/a-z/A-z/;
2544     if ({
2545     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2546     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2547     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2548     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2549     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2550     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2551     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2552     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2553     "-//IETF//DTD HTML 2.0//EN" => 1,
2554     "-//IETF//DTD HTML 2.1E//EN" => 1,
2555     "-//IETF//DTD HTML 3.0//EN" => 1,
2556     "-//IETF//DTD HTML 3.0//EN//" => 1,
2557     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2558     "-//IETF//DTD HTML 3.2//EN" => 1,
2559     "-//IETF//DTD HTML 3//EN" => 1,
2560     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2561     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2562     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2563     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2564     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2565     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2566     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2567     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2568     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2569     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2570     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2571     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2572     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2573     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2574     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2575     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2576     "-//IETF//DTD HTML STRICT//EN" => 1,
2577     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2578     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2579     "-//IETF//DTD HTML//EN" => 1,
2580     "-//IETF//DTD HTML//EN//2.0" => 1,
2581     "-//IETF//DTD HTML//EN//3.0" => 1,
2582     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2583     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2584     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2585     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2586     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2587     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2588     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2589     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2590     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2591     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2592     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2593     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2594     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2595     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2596     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2597     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2598     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2599     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2600     "-//W3C//DTD HTML 3.2//EN" => 1,
2601     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2602     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2603     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2604     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2605     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2606     "-//W3C//DTD W3 HTML//EN" => 1,
2607     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2608     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2609     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2610     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2611     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2612     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2613     "HTML" => 1,
2614     }->{$pubid}) {
2615     $self->{document}->manakai_compat_mode ('quirks');
2616     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2617     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2618     if (defined $token->{system_identifier}) {
2619     $self->{document}->manakai_compat_mode ('quirks');
2620     } else {
2621     $self->{document}->manakai_compat_mode ('limited quirks');
2622 wakaba 1.3 }
2623 wakaba 1.18 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 Frameset//EN" or
2624     $pubid eq "-//W3C//DTD XHTML 1.0 Transitional//EN") {
2625     $self->{document}->manakai_compat_mode ('limited quirks');
2626     }
2627     }
2628     if (defined $token->{system_identifier}) {
2629     my $sysid = $token->{system_identifier};
2630     $sysid =~ tr/A-Z/a-z/;
2631     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2632     $self->{document}->manakai_compat_mode ('quirks');
2633     }
2634     }
2635    
2636     ## Go to the root element phase.
2637     $token = $self->_get_next_token;
2638     return;
2639     } elsif ({
2640     'start tag' => 1,
2641     'end tag' => 1,
2642     'end-of-file' => 1,
2643     }->{$token->{type}}) {
2644     $self->{parse_error}-> (type => 'no DOCTYPE');
2645     $self->{document}->manakai_compat_mode ('quirks');
2646     ## Go to the root element phase
2647     ## reprocess
2648     return;
2649     } elsif ($token->{type} eq 'character') {
2650     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2651     ## Ignore the token
2652     unless (length $token->{data}) {
2653     ## Stay in the phase
2654     $token = $self->_get_next_token;
2655     redo INITIAL;
2656 wakaba 1.3 }
2657     }
2658 wakaba 1.18
2659     $self->{parse_error}-> (type => 'no DOCTYPE');
2660     $self->{document}->manakai_compat_mode ('quirks');
2661     ## Go to the root element phase
2662     ## reprocess
2663     return;
2664     } elsif ($token->{type} eq 'comment') {
2665     my $comment = $self->{document}->create_comment ($token->{data});
2666     $self->{document}->append_child ($comment);
2667    
2668     ## Stay in the phase.
2669     $token = $self->_get_next_token;
2670     redo INITIAL;
2671     } else {
2672     die "$0: $token->{type}: Unknown token";
2673     }
2674     } # INITIAL
2675 wakaba 1.3 } # _tree_construction_initial
2676    
2677     sub _tree_construction_root_element ($) {
2678     my $self = shift;
2679    
2680     B: {
2681     if ($token->{type} eq 'DOCTYPE') {
2682     $self->{parse_error}-> (type => 'in html:#DOCTYPE');
2683     ## Ignore the token
2684     ## Stay in the phase
2685     $token = $self->_get_next_token;
2686     redo B;
2687     } elsif ($token->{type} eq 'comment') {
2688     my $comment = $self->{document}->create_comment ($token->{data});
2689     $self->{document}->append_child ($comment);
2690     ## Stay in the phase
2691     $token = $self->_get_next_token;
2692     redo B;
2693     } elsif ($token->{type} eq 'character') {
2694     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2695     $self->{document}->manakai_append_text ($1);
2696     ## ISSUE: DOM3 Core does not allow Document > Text
2697     unless (length $token->{data}) {
2698     ## Stay in the phase
2699     $token = $self->_get_next_token;
2700     redo B;
2701     }
2702     }
2703     #
2704     } elsif ({
2705     'start tag' => 1,
2706     'end tag' => 1,
2707     'end-of-file' => 1,
2708     }->{$token->{type}}) {
2709     ## ISSUE: There is an issue in the spec
2710     #
2711     } else {
2712     die "$0: $token->{type}: Unknown token";
2713     }
2714     my $root_element;
2715     $root_element = $self->{document}->create_element_ns
2716     (q<http://www.w3.org/1999/xhtml>, [undef, 'html']);
2717    
2718     $self->{document}->append_child ($root_element);
2719     push @{$self->{open_elements}}, [$root_element, 'html'];
2720     #$phase = 'main';
2721     ## reprocess
2722     #redo B;
2723     return;
2724     } # B
2725     } # _tree_construction_root_element
2726    
2727     sub _reset_insertion_mode ($) {
2728     my $self = shift;
2729    
2730     ## Step 1
2731     my $last;
2732    
2733     ## Step 2
2734     my $i = -1;
2735     my $node = $self->{open_elements}->[$i];
2736    
2737     ## Step 3
2738     S3: {
2739     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
2740     if (defined $self->{inner_html_node}) {
2741     if ($self->{inner_html_node}->[1] eq 'td' or
2742     $self->{inner_html_node}->[1] eq 'th') {
2743     #
2744     } else {
2745     $node = $self->{inner_html_node};
2746     }
2747     }
2748    
2749     ## Step 4..13
2750     my $new_mode = {
2751     select => 'in select',
2752     td => 'in cell',
2753     th => 'in cell',
2754     tr => 'in row',
2755     tbody => 'in table body',
2756     thead => 'in table head',
2757     tfoot => 'in table foot',
2758     caption => 'in caption',
2759     colgroup => 'in column group',
2760     table => 'in table',
2761     head => 'in body', # not in head!
2762     body => 'in body',
2763     frameset => 'in frameset',
2764     }->{$node->[1]};
2765     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2766    
2767     ## Step 14
2768     if ($node->[1] eq 'html') {
2769     unless (defined $self->{head_element}) {
2770     $self->{insertion_mode} = 'before head';
2771     } else {
2772     $self->{insertion_mode} = 'after head';
2773     }
2774     return;
2775     }
2776    
2777     ## Step 15
2778     $self->{insertion_mode} = 'in body' and return if $last;
2779    
2780     ## Step 16
2781     $i--;
2782     $node = $self->{open_elements}->[$i];
2783    
2784     ## Step 17
2785     redo S3;
2786     } # S3
2787     } # _reset_insertion_mode
2788    
2789     sub _tree_construction_main ($) {
2790     my $self = shift;
2791    
2792     my $phase = 'main';
2793 wakaba 1.1
2794     my $active_formatting_elements = [];
2795    
2796     my $reconstruct_active_formatting_elements = sub { # MUST
2797     my $insert = shift;
2798    
2799     ## Step 1
2800     return unless @$active_formatting_elements;
2801    
2802     ## Step 3
2803     my $i = -1;
2804     my $entry = $active_formatting_elements->[$i];
2805    
2806     ## Step 2
2807     return if $entry->[0] eq '#marker';
2808 wakaba 1.3 for (@{$self->{open_elements}}) {
2809 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2810     return;
2811     }
2812     }
2813    
2814     S4: {
2815     ## Step 4
2816     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2817    
2818     ## Step 5
2819     $i--;
2820     $entry = $active_formatting_elements->[$i];
2821    
2822     ## Step 6
2823     if ($entry->[0] eq '#marker') {
2824     #
2825     } else {
2826     my $in_open_elements;
2827 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
2828 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2829     $in_open_elements = 1;
2830     last OE;
2831     }
2832     }
2833     if ($in_open_elements) {
2834     #
2835     } else {
2836     redo S4;
2837     }
2838     }
2839    
2840     ## Step 7
2841     $i++;
2842     $entry = $active_formatting_elements->[$i];
2843     } # S4
2844    
2845     S7: {
2846     ## Step 8
2847     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2848    
2849     ## Step 9
2850     $insert->($clone->[0]);
2851 wakaba 1.3 push @{$self->{open_elements}}, $clone;
2852 wakaba 1.1
2853     ## Step 10
2854 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2855 wakaba 1.1
2856     ## Step 11
2857     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2858     ## Step 7'
2859     $i++;
2860     $entry = $active_formatting_elements->[$i];
2861    
2862     redo S7;
2863     }
2864     } # S7
2865     }; # $reconstruct_active_formatting_elements
2866    
2867     my $clear_up_to_marker = sub {
2868     for (reverse 0..$#$active_formatting_elements) {
2869     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2870     splice @$active_formatting_elements, $_;
2871     return;
2872     }
2873     }
2874     }; # $clear_up_to_marker
2875    
2876     my $style_start_tag = sub {
2877     my $style_el;
2878     $style_el = $self->{document}->create_element_ns
2879     (q<http://www.w3.org/1999/xhtml>, [undef, 'style']);
2880    
2881 wakaba 1.6 for my $attr_name (keys %{ $token->{attributes}}) {
2882     $style_el->set_attribute_ns (undef, [undef, $attr_name],
2883     $token->{attributes} ->{$attr_name}->{value});
2884     }
2885    
2886 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
2887     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
2888     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2889 wakaba 1.1 ->append_child ($style_el);
2890     $self->{content_model_flag} = 'CDATA';
2891 wakaba 1.13 delete $self->{escape}; # MUST
2892 wakaba 1.1
2893     my $text = '';
2894     $token = $self->_get_next_token;
2895     while ($token->{type} eq 'character') {
2896     $text .= $token->{data};
2897     $token = $self->_get_next_token;
2898     } # stop if non-character token or tokenizer stops tokenising
2899     if (length $text) {
2900     $style_el->manakai_append_text ($text);
2901     }
2902    
2903     $self->{content_model_flag} = 'PCDATA';
2904    
2905     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
2906     ## Ignore the token
2907     } else {
2908 wakaba 1.3 $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
2909 wakaba 1.1 ## ISSUE: And ignore?
2910     }
2911     $token = $self->_get_next_token;
2912     }; # $style_start_tag
2913    
2914     my $script_start_tag = sub {
2915     my $script_el;
2916    
2917     $script_el = $self->{document}->create_element_ns
2918     (q<http://www.w3.org/1999/xhtml>, [undef, 'script']);
2919    
2920     for my $attr_name (keys %{ $token->{attributes}}) {
2921     $script_el->set_attribute_ns (undef, [undef, $attr_name],
2922     $token->{attributes} ->{$attr_name}->{value});
2923     }
2924    
2925     ## TODO: mark as "parser-inserted"
2926    
2927     $self->{content_model_flag} = 'CDATA';
2928 wakaba 1.13 delete $self->{escape}; # MUST
2929 wakaba 1.1
2930     my $text = '';
2931     $token = $self->_get_next_token;
2932     while ($token->{type} eq 'character') {
2933     $text .= $token->{data};
2934     $token = $self->_get_next_token;
2935     } # stop if non-character token or tokenizer stops tokenising
2936     if (length $text) {
2937     $script_el->manakai_append_text ($text);
2938     }
2939    
2940     $self->{content_model_flag} = 'PCDATA';
2941    
2942     if ($token->{type} eq 'end tag' and
2943     $token->{tag_name} eq 'script') {
2944     ## Ignore the token
2945     } else {
2946 wakaba 1.3 $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
2947 wakaba 1.1 ## ISSUE: And ignore?
2948     ## TODO: mark as "already executed"
2949     }
2950    
2951 wakaba 1.3 if (defined $self->{inner_html_node}) {
2952     ## TODO: mark as "already executed"
2953     } else {
2954 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
2955     ## TODO: insertion point = just before the next input character
2956    
2957 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
2958     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
2959 wakaba 1.1
2960     ## TODO: insertion point = $old_insertion_point (might be "undefined")
2961    
2962     ## TODO: if there is a script that will execute as soon as the parser resume, then...
2963     }
2964    
2965     $token = $self->_get_next_token;
2966     }; # $script_start_tag
2967    
2968     my $formatting_end_tag = sub {
2969     my $tag_name = shift;
2970    
2971     FET: {
2972     ## Step 1
2973     my $formatting_element;
2974     my $formatting_element_i_in_active;
2975     AFE: for (reverse 0..$#$active_formatting_elements) {
2976     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2977     $formatting_element = $active_formatting_elements->[$_];
2978     $formatting_element_i_in_active = $_;
2979     last AFE;
2980     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2981     last AFE;
2982     }
2983     } # AFE
2984     unless (defined $formatting_element) {
2985 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$tag_name);
2986 wakaba 1.1 ## Ignore the token
2987     $token = $self->_get_next_token;
2988     return;
2989     }
2990     ## has an element in scope
2991     my $in_scope = 1;
2992     my $formatting_element_i_in_open;
2993 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2994     my $node = $self->{open_elements}->[$_];
2995 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
2996     if ($in_scope) {
2997     $formatting_element_i_in_open = $_;
2998     last INSCOPE;
2999     } else { # in open elements but not in scope
3000 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3001 wakaba 1.1 ## Ignore the token
3002     $token = $self->_get_next_token;
3003     return;
3004     }
3005     } elsif ({
3006     table => 1, caption => 1, td => 1, th => 1,
3007     button => 1, marquee => 1, object => 1, html => 1,
3008     }->{$node->[1]}) {
3009     $in_scope = 0;
3010     }
3011     } # INSCOPE
3012     unless (defined $formatting_element_i_in_open) {
3013 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3014 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3015     $token = $self->_get_next_token; ## TODO: ok?
3016     return;
3017     }
3018 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3019 wakaba 1.4 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3020 wakaba 1.1 }
3021    
3022     ## Step 2
3023     my $furthest_block;
3024     my $furthest_block_i_in_open;
3025 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3026     my $node = $self->{open_elements}->[$_];
3027 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
3028     #not $phrasing_category->{$node->[1]} and
3029     ($special_category->{$node->[1]} or
3030     $scoping_category->{$node->[1]})) {
3031     $furthest_block = $node;
3032     $furthest_block_i_in_open = $_;
3033     } elsif ($node->[0] eq $formatting_element->[0]) {
3034     last OE;
3035     }
3036     } # OE
3037    
3038     ## Step 3
3039     unless (defined $furthest_block) { # MUST
3040 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3041 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3042     $token = $self->_get_next_token;
3043     return;
3044     }
3045    
3046     ## Step 4
3047 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3048 wakaba 1.1
3049     ## Step 5
3050     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3051     if (defined $furthest_block_parent) {
3052     $furthest_block_parent->remove_child ($furthest_block->[0]);
3053     }
3054    
3055     ## Step 6
3056     my $bookmark_prev_el
3057     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3058     ->[0];
3059    
3060     ## Step 7
3061     my $node = $furthest_block;
3062     my $node_i_in_open = $furthest_block_i_in_open;
3063     my $last_node = $furthest_block;
3064     S7: {
3065     ## Step 1
3066     $node_i_in_open--;
3067 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3068 wakaba 1.1
3069     ## Step 2
3070     my $node_i_in_active;
3071     S7S2: {
3072     for (reverse 0..$#$active_formatting_elements) {
3073     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3074     $node_i_in_active = $_;
3075     last S7S2;
3076     }
3077     }
3078 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3079 wakaba 1.1 redo S7;
3080     } # S7S2
3081    
3082     ## Step 3
3083     last S7 if $node->[0] eq $formatting_element->[0];
3084    
3085     ## Step 4
3086     if ($last_node->[0] eq $furthest_block->[0]) {
3087     $bookmark_prev_el = $node->[0];
3088     }
3089    
3090     ## Step 5
3091     if ($node->[0]->has_child_nodes ()) {
3092     my $clone = [$node->[0]->clone_node (0), $node->[1]];
3093     $active_formatting_elements->[$node_i_in_active] = $clone;
3094 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3095 wakaba 1.1 $node = $clone;
3096     }
3097    
3098     ## Step 6
3099     $node->[0]->append_child ($last_node->[0]);
3100    
3101     ## Step 7
3102     $last_node = $node;
3103    
3104     ## Step 8
3105     redo S7;
3106     } # S7
3107    
3108     ## Step 8
3109     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3110    
3111     ## Step 9
3112     my $clone = [$formatting_element->[0]->clone_node (0),
3113     $formatting_element->[1]];
3114    
3115     ## Step 10
3116     my @cn = @{$furthest_block->[0]->child_nodes};
3117     $clone->[0]->append_child ($_) for @cn;
3118    
3119     ## Step 11
3120     $furthest_block->[0]->append_child ($clone->[0]);
3121    
3122     ## Step 12
3123     my $i;
3124     AFE: for (reverse 0..$#$active_formatting_elements) {
3125     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3126     splice @$active_formatting_elements, $_, 1;
3127     $i-- and last AFE if defined $i;
3128     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3129     $i = $_;
3130     }
3131     } # AFE
3132     splice @$active_formatting_elements, $i + 1, 0, $clone;
3133    
3134     ## Step 13
3135     undef $i;
3136 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3137     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3138     splice @{$self->{open_elements}}, $_, 1;
3139 wakaba 1.1 $i-- and last OE if defined $i;
3140 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3141 wakaba 1.1 $i = $_;
3142     }
3143     } # OE
3144 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3145 wakaba 1.1
3146     ## Step 14
3147     redo FET;
3148     } # FET
3149     }; # $formatting_end_tag
3150    
3151     my $insert_to_current = sub {
3152 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
3153 wakaba 1.1 }; # $insert_to_current
3154    
3155     my $insert_to_foster = sub {
3156     my $child = shift;
3157     if ({
3158     table => 1, tbody => 1, tfoot => 1,
3159     thead => 1, tr => 1,
3160 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3161 wakaba 1.1 # MUST
3162     my $foster_parent_element;
3163     my $next_sibling;
3164 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3165     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3166     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3167 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3168     $foster_parent_element = $parent;
3169 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3170 wakaba 1.1 } else {
3171     $foster_parent_element
3172 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3173 wakaba 1.1 }
3174     last OE;
3175     }
3176     } # OE
3177 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3178 wakaba 1.1 unless defined $foster_parent_element;
3179     $foster_parent_element->insert_before
3180     ($child, $next_sibling);
3181     } else {
3182 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
3183 wakaba 1.1 }
3184     }; # $insert_to_foster
3185    
3186     my $in_body = sub {
3187     my $insert = shift;
3188     if ($token->{type} eq 'start tag') {
3189     if ($token->{tag_name} eq 'script') {
3190     $script_start_tag->();
3191     return;
3192     } elsif ($token->{tag_name} eq 'style') {
3193     $style_start_tag->();
3194     return;
3195     } elsif ({
3196     base => 1, link => 1, meta => 1,
3197     }->{$token->{tag_name}}) {
3198 wakaba 1.3 $self->{parse_error}-> (type => 'in body:'.$token->{tag_name});
3199 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
3200     my $el;
3201    
3202     $el = $self->{document}->create_element_ns
3203     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3204    
3205     for my $attr_name (keys %{ $token->{attributes}}) {
3206     $el->set_attribute_ns (undef, [undef, $attr_name],
3207     $token->{attributes} ->{$attr_name}->{value});
3208     }
3209    
3210 wakaba 1.3 if (defined $self->{head_element}) {
3211     $self->{head_element}->append_child ($el);
3212 wakaba 1.1 } else {
3213     $insert->($el);
3214     }
3215    
3216     $token = $self->_get_next_token;
3217     return;
3218     } elsif ($token->{tag_name} eq 'title') {
3219 wakaba 1.3 $self->{parse_error}-> (type => 'in body:title');
3220 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
3221     my $title_el;
3222    
3223     $title_el = $self->{document}->create_element_ns
3224     (q<http://www.w3.org/1999/xhtml>, [undef, 'title']);
3225    
3226     for my $attr_name (keys %{ $token->{attributes}}) {
3227     $title_el->set_attribute_ns (undef, [undef, $attr_name],
3228     $token->{attributes} ->{$attr_name}->{value});
3229     }
3230    
3231 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3232 wakaba 1.1 ->append_child ($title_el);
3233     $self->{content_model_flag} = 'RCDATA';
3234 wakaba 1.13 delete $self->{escape}; # MUST
3235 wakaba 1.1
3236     my $text = '';
3237     $token = $self->_get_next_token;
3238     while ($token->{type} eq 'character') {
3239     $text .= $token->{data};
3240     $token = $self->_get_next_token;
3241     }
3242     if (length $text) {
3243     $title_el->manakai_append_text ($text);
3244     }
3245    
3246     $self->{content_model_flag} = 'PCDATA';
3247    
3248     if ($token->{type} eq 'end tag' and
3249     $token->{tag_name} eq 'title') {
3250     ## Ignore the token
3251     } else {
3252 wakaba 1.3 $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
3253 wakaba 1.1 ## ISSUE: And ignore?
3254     }
3255     $token = $self->_get_next_token;
3256     return;
3257     } elsif ($token->{tag_name} eq 'body') {
3258 wakaba 1.3 $self->{parse_error}-> (type => 'in body:body');
3259 wakaba 1.1
3260 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
3261     $self->{open_elements}->[1]->[1] ne 'body') {
3262 wakaba 1.1 ## Ignore the token
3263     } else {
3264 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
3265 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
3266     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
3267     $body_el->set_attribute_ns
3268     (undef, [undef, $attr_name],
3269     $token->{attributes}->{$attr_name}->{value});
3270     }
3271     }
3272     }
3273     $token = $self->_get_next_token;
3274     return;
3275     } elsif ({
3276     address => 1, blockquote => 1, center => 1, dir => 1,
3277     div => 1, dl => 1, fieldset => 1, listing => 1,
3278     menu => 1, ol => 1, p => 1, ul => 1,
3279     pre => 1,
3280     }->{$token->{tag_name}}) {
3281     ## has a p element in scope
3282 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3283 wakaba 1.1 if ($_->[1] eq 'p') {
3284     unshift @{$self->{token}}, $token;
3285     $token = {type => 'end tag', tag_name => 'p'};
3286     return;
3287     } elsif ({
3288     table => 1, caption => 1, td => 1, th => 1,
3289     button => 1, marquee => 1, object => 1, html => 1,
3290     }->{$_->[1]}) {
3291     last INSCOPE;
3292     }
3293     } # INSCOPE
3294    
3295    
3296     {
3297     my $el;
3298    
3299     $el = $self->{document}->create_element_ns
3300     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3301    
3302     for my $attr_name (keys %{ $token->{attributes}}) {
3303     $el->set_attribute_ns (undef, [undef, $attr_name],
3304     $token->{attributes} ->{$attr_name}->{value});
3305     }
3306    
3307     $insert->($el);
3308 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3309 wakaba 1.1 }
3310    
3311     if ($token->{tag_name} eq 'pre') {
3312     $token = $self->_get_next_token;
3313     if ($token->{type} eq 'character') {
3314     $token->{data} =~ s/^\x0A//;
3315     unless (length $token->{data}) {
3316     $token = $self->_get_next_token;
3317     }
3318     }
3319     } else {
3320     $token = $self->_get_next_token;
3321     }
3322     return;
3323     } elsif ($token->{tag_name} eq 'form') {
3324 wakaba 1.3 if (defined $self->{form_element}) {
3325     $self->{parse_error}-> (type => 'in form:form');
3326 wakaba 1.1 ## Ignore the token
3327 wakaba 1.7 $token = $self->_get_next_token;
3328     return;
3329 wakaba 1.1 } else {
3330     ## has a p element in scope
3331 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3332 wakaba 1.1 if ($_->[1] eq 'p') {
3333     unshift @{$self->{token}}, $token;
3334     $token = {type => 'end tag', tag_name => 'p'};
3335     return;
3336     } elsif ({
3337     table => 1, caption => 1, td => 1, th => 1,
3338     button => 1, marquee => 1, object => 1, html => 1,
3339     }->{$_->[1]}) {
3340     last INSCOPE;
3341     }
3342     } # INSCOPE
3343    
3344    
3345     {
3346     my $el;
3347    
3348     $el = $self->{document}->create_element_ns
3349     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3350    
3351     for my $attr_name (keys %{ $token->{attributes}}) {
3352     $el->set_attribute_ns (undef, [undef, $attr_name],
3353     $token->{attributes} ->{$attr_name}->{value});
3354     }
3355    
3356     $insert->($el);
3357 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3358 wakaba 1.1 }
3359    
3360 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
3361 wakaba 1.1 $token = $self->_get_next_token;
3362     return;
3363     }
3364     } elsif ($token->{tag_name} eq 'li') {
3365     ## has a p element in scope
3366 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3367 wakaba 1.1 if ($_->[1] eq 'p') {
3368     unshift @{$self->{token}}, $token;
3369     $token = {type => 'end tag', tag_name => 'p'};
3370     return;
3371     } elsif ({
3372     table => 1, caption => 1, td => 1, th => 1,
3373     button => 1, marquee => 1, object => 1, html => 1,
3374     }->{$_->[1]}) {
3375     last INSCOPE;
3376     }
3377     } # INSCOPE
3378    
3379     ## Step 1
3380     my $i = -1;
3381 wakaba 1.3 my $node = $self->{open_elements}->[$i];
3382 wakaba 1.1 LI: {
3383     ## Step 2
3384     if ($node->[1] eq 'li') {
3385 wakaba 1.8 if ($i != -1) {
3386     $self->{parse_error}-> (type => 'end tag missing:'.
3387     $self->{open_elements}->[-1]->[1]);
3388     ## TODO: test
3389     }
3390 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3391 wakaba 1.1 last LI;
3392     }
3393    
3394     ## Step 3
3395     if (not $formatting_category->{$node->[1]} and
3396     #not $phrasing_category->{$node->[1]} and
3397     ($special_category->{$node->[1]} or
3398     $scoping_category->{$node->[1]}) and
3399     $node->[1] ne 'address' and $node->[1] ne 'div') {
3400     last LI;
3401     }
3402    
3403     ## Step 4
3404     $i--;
3405 wakaba 1.3 $node = $self->{open_elements}->[$i];
3406 wakaba 1.1 redo LI;
3407     } # LI
3408    
3409    
3410     {
3411     my $el;
3412    
3413     $el = $self->{document}->create_element_ns
3414     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3415    
3416     for my $attr_name (keys %{ $token->{attributes}}) {
3417     $el->set_attribute_ns (undef, [undef, $attr_name],
3418     $token->{attributes} ->{$attr_name}->{value});
3419     }
3420    
3421     $insert->($el);
3422 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3423 wakaba 1.1 }
3424    
3425     $token = $self->_get_next_token;
3426     return;
3427     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
3428     ## has a p element in scope
3429 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3430 wakaba 1.1 if ($_->[1] eq 'p') {
3431     unshift @{$self->{token}}, $token;
3432     $token = {type => 'end tag', tag_name => 'p'};
3433     return;
3434     } elsif ({
3435     table => 1, caption => 1, td => 1, th => 1,
3436     button => 1, marquee => 1, object => 1, html => 1,
3437     }->{$_->[1]}) {
3438     last INSCOPE;
3439     }
3440     } # INSCOPE
3441    
3442     ## Step 1
3443     my $i = -1;
3444 wakaba 1.3 my $node = $self->{open_elements}->[$i];
3445 wakaba 1.1 LI: {
3446     ## Step 2
3447     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
3448 wakaba 1.8 if ($i != -1) {
3449     $self->{parse_error}-> (type => 'end tag missing:'.
3450     $self->{open_elements}->[-1]->[1]);
3451     ## TODO: test
3452     }
3453 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3454 wakaba 1.1 last LI;
3455     }
3456    
3457     ## Step 3
3458     if (not $formatting_category->{$node->[1]} and
3459     #not $phrasing_category->{$node->[1]} and
3460     ($special_category->{$node->[1]} or
3461     $scoping_category->{$node->[1]}) and
3462     $node->[1] ne 'address' and $node->[1] ne 'div') {
3463     last LI;
3464     }
3465    
3466     ## Step 4
3467     $i--;
3468 wakaba 1.3 $node = $self->{open_elements}->[$i];
3469 wakaba 1.1 redo LI;
3470     } # LI
3471    
3472    
3473     {
3474     my $el;
3475    
3476     $el = $self->{document}->create_element_ns
3477     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3478    
3479     for my $attr_name (keys %{ $token->{attributes}}) {
3480     $el->set_attribute_ns (undef, [undef, $attr_name],
3481     $token->{attributes} ->{$attr_name}->{value});
3482     }
3483    
3484     $insert->($el);
3485 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3486 wakaba 1.1 }
3487    
3488     $token = $self->_get_next_token;
3489     return;
3490     } elsif ($token->{tag_name} eq 'plaintext') {
3491     ## has a p element in scope
3492 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3493 wakaba 1.1 if ($_->[1] eq 'p') {
3494     unshift @{$self->{token}}, $token;
3495     $token = {type => 'end tag', tag_name => 'p'};
3496     return;
3497     } elsif ({
3498     table => 1, caption => 1, td => 1, th => 1,
3499     button => 1, marquee => 1, object => 1, html => 1,
3500     }->{$_->[1]}) {
3501     last INSCOPE;
3502     }
3503     } # INSCOPE
3504    
3505    
3506     {
3507     my $el;
3508    
3509     $el = $self->{document}->create_element_ns
3510     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3511    
3512     for my $attr_name (keys %{ $token->{attributes}}) {
3513     $el->set_attribute_ns (undef, [undef, $attr_name],
3514     $token->{attributes} ->{$attr_name}->{value});
3515     }
3516    
3517     $insert->($el);
3518 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3519 wakaba 1.1 }
3520    
3521    
3522     $self->{content_model_flag} = 'PLAINTEXT';
3523    
3524     $token = $self->_get_next_token;
3525     return;
3526     } elsif ({
3527     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3528     }->{$token->{tag_name}}) {
3529     ## has a p element in scope
3530 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3531     my $node = $self->{open_elements}->[$_];
3532 wakaba 1.1 if ($node->[1] eq 'p') {
3533     unshift @{$self->{token}}, $token;
3534     $token = {type => 'end tag', tag_name => 'p'};
3535     return;
3536     } elsif ({
3537     table => 1, caption => 1, td => 1, th => 1,
3538     button => 1, marquee => 1, object => 1, html => 1,
3539     }->{$node->[1]}) {
3540     last INSCOPE;
3541     }
3542     } # INSCOPE
3543    
3544     ## has an element in scope
3545     my $i;
3546 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3547     my $node = $self->{open_elements}->[$_];
3548 wakaba 1.1 if ({
3549     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3550     }->{$node->[1]}) {
3551     $i = $_;
3552     last INSCOPE;
3553     } elsif ({
3554     table => 1, caption => 1, td => 1, th => 1,
3555     button => 1, marquee => 1, object => 1, html => 1,
3556     }->{$node->[1]}) {
3557     last INSCOPE;
3558     }
3559     } # INSCOPE
3560    
3561     if (defined $i) {
3562 wakaba 1.3 $self->{parse_error}-> (type => 'in hn:hn');
3563     splice @{$self->{open_elements}}, $i;
3564 wakaba 1.1 }
3565    
3566    
3567     {
3568     my $el;
3569    
3570     $el = $self->{document}->create_element_ns
3571     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3572    
3573     for my $attr_name (keys %{ $token->{attributes}}) {
3574     $el->set_attribute_ns (undef, [undef, $attr_name],
3575     $token->{attributes} ->{$attr_name}->{value});
3576     }
3577    
3578     $insert->($el);
3579 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3580 wakaba 1.1 }
3581    
3582    
3583     $token = $self->_get_next_token;
3584     return;
3585     } elsif ($token->{tag_name} eq 'a') {
3586     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
3587     my $node = $active_formatting_elements->[$i];
3588     if ($node->[1] eq 'a') {
3589 wakaba 1.3 $self->{parse_error}-> (type => 'in a:a');
3590 wakaba 1.1
3591     unshift @{$self->{token}}, $token;
3592     $token = {type => 'end tag', tag_name => 'a'};
3593     $formatting_end_tag->($token->{tag_name});
3594    
3595     AFE2: for (reverse 0..$#$active_formatting_elements) {
3596     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3597     splice @$active_formatting_elements, $_, 1;
3598     last AFE2;
3599     }
3600     } # AFE2
3601 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3602     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
3603     splice @{$self->{open_elements}}, $_, 1;
3604 wakaba 1.1 last OE;
3605     }
3606     } # OE
3607     last AFE;
3608     } elsif ($node->[0] eq '#marker') {
3609     last AFE;
3610     }
3611     } # AFE
3612    
3613     $reconstruct_active_formatting_elements->($insert_to_current);
3614    
3615    
3616     {
3617     my $el;
3618    
3619     $el = $self->{document}->create_element_ns
3620     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3621    
3622     for my $attr_name (keys %{ $token->{attributes}}) {
3623     $el->set_attribute_ns (undef, [undef, $attr_name],
3624     $token->{attributes} ->{$attr_name}->{value});
3625     }
3626    
3627     $insert->($el);
3628 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3629 wakaba 1.1 }
3630    
3631 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
3632 wakaba 1.1
3633     $token = $self->_get_next_token;
3634     return;
3635     } elsif ({
3636     b => 1, big => 1, em => 1, font => 1, i => 1,
3637 wakaba 1.19 s => 1, small => 1, strile => 1,
3638 wakaba 1.1 strong => 1, tt => 1, u => 1,
3639     }->{$token->{tag_name}}) {
3640     $reconstruct_active_formatting_elements->($insert_to_current);
3641    
3642    
3643     {
3644     my $el;
3645    
3646     $el = $self->{document}->create_element_ns
3647     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3648    
3649     for my $attr_name (keys %{ $token->{attributes}}) {
3650     $el->set_attribute_ns (undef, [undef, $attr_name],
3651     $token->{attributes} ->{$attr_name}->{value});
3652     }
3653    
3654     $insert->($el);
3655 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3656 wakaba 1.1 }
3657    
3658 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
3659 wakaba 1.1
3660     $token = $self->_get_next_token;
3661     return;
3662 wakaba 1.19 } elsif ($token->{tag_name} eq 'nobr') {
3663     $reconstruct_active_formatting_elements->($insert_to_current);
3664    
3665     ## has a |nobr| element in scope
3666     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3667     my $node = $self->{open_elements}->[$_];
3668     if ($node->[1] eq 'nobr') {
3669     unshift @{$self->{token}}, $token;
3670     $token = {type => 'end tag', tag_name => 'nobr'};
3671     return;
3672     } elsif ({
3673     table => 1, caption => 1, td => 1, th => 1,
3674     button => 1, marquee => 1, object => 1, html => 1,
3675     }->{$node->[1]}) {
3676     last INSCOPE;
3677     }
3678     } # INSCOPE
3679    
3680    
3681     {
3682     my $el;
3683    
3684     $el = $self->{document}->create_element_ns
3685     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3686    
3687     for my $attr_name (keys %{ $token->{attributes}}) {
3688     $el->set_attribute_ns (undef, [undef, $attr_name],
3689     $token->{attributes} ->{$attr_name}->{value});
3690     }
3691    
3692     $insert->($el);
3693     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3694     }
3695    
3696     push @$active_formatting_elements, $self->{open_elements}->[-1];
3697    
3698     $token = $self->_get_next_token;
3699     return;
3700 wakaba 1.1 } elsif ($token->{tag_name} eq 'button') {
3701     ## has a button element in scope
3702 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3703     my $node = $self->{open_elements}->[$_];
3704 wakaba 1.1 if ($node->[1] eq 'button') {
3705 wakaba 1.3 $self->{parse_error}-> (type => 'in button:button');
3706 wakaba 1.1 unshift @{$self->{token}}, $token;
3707     $token = {type => 'end tag', tag_name => 'button'};
3708     return;
3709     } elsif ({
3710     table => 1, caption => 1, td => 1, th => 1,
3711     button => 1, marquee => 1, object => 1, html => 1,
3712     }->{$node->[1]}) {
3713     last INSCOPE;
3714     }
3715     } # INSCOPE
3716    
3717     $reconstruct_active_formatting_elements->($insert_to_current);
3718    
3719    
3720     {
3721     my $el;
3722    
3723     $el = $self->{document}->create_element_ns
3724     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3725    
3726     for my $attr_name (keys %{ $token->{attributes}}) {
3727     $el->set_attribute_ns (undef, [undef, $attr_name],
3728     $token->{attributes} ->{$attr_name}->{value});
3729     }
3730    
3731     $insert->($el);
3732 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3733 wakaba 1.1 }
3734    
3735     push @$active_formatting_elements, ['#marker', ''];
3736    
3737     $token = $self->_get_next_token;
3738     return;
3739     } elsif ($token->{tag_name} eq 'marquee' or
3740     $token->{tag_name} eq 'object') {
3741     $reconstruct_active_formatting_elements->($insert_to_current);
3742    
3743    
3744     {
3745     my $el;
3746    
3747     $el = $self->{document}->create_element_ns
3748     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3749    
3750     for my $attr_name (keys %{ $token->{attributes}}) {
3751     $el->set_attribute_ns (undef, [undef, $attr_name],
3752     $token->{attributes} ->{$attr_name}->{value});
3753     }
3754    
3755     $insert->($el);
3756 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3757 wakaba 1.1 }
3758    
3759     push @$active_formatting_elements, ['#marker', ''];
3760    
3761     $token = $self->_get_next_token;
3762     return;
3763     } elsif ($token->{tag_name} eq 'xmp') {
3764     $reconstruct_active_formatting_elements->($insert_to_current);
3765    
3766    
3767     {
3768     my $el;
3769    
3770     $el = $self->{document}->create_element_ns
3771     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3772    
3773     for my $attr_name (keys %{ $token->{attributes}}) {
3774     $el->set_attribute_ns (undef, [undef, $attr_name],
3775     $token->{attributes} ->{$attr_name}->{value});
3776     }
3777    
3778     $insert->($el);
3779 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3780 wakaba 1.1 }
3781    
3782    
3783     $self->{content_model_flag} = 'CDATA';
3784 wakaba 1.13 delete $self->{escape}; # MUST
3785 wakaba 1.1
3786     $token = $self->_get_next_token;
3787     return;
3788     } elsif ($token->{tag_name} eq 'table') {
3789     ## has a p element in scope
3790 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3791 wakaba 1.1 if ($_->[1] eq 'p') {
3792     unshift @{$self->{token}}, $token;
3793     $token = {type => 'end tag', tag_name => 'p'};
3794     return;
3795     } elsif ({
3796     table => 1, caption => 1, td => 1, th => 1,
3797     button => 1, marquee => 1, object => 1, html => 1,
3798     }->{$_->[1]}) {
3799     last INSCOPE;
3800     }
3801     } # INSCOPE
3802    
3803    
3804     {
3805     my $el;
3806    
3807     $el = $self->{document}->create_element_ns
3808     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3809    
3810     for my $attr_name (keys %{ $token->{attributes}}) {
3811     $el->set_attribute_ns (undef, [undef, $attr_name],
3812     $token->{attributes} ->{$attr_name}->{value});
3813     }
3814    
3815     $insert->($el);
3816 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3817 wakaba 1.1 }
3818    
3819    
3820 wakaba 1.3 $self->{insertion_mode} = 'in table';
3821 wakaba 1.1
3822     $token = $self->_get_next_token;
3823     return;
3824     } elsif ({
3825     area => 1, basefont => 1, bgsound => 1, br => 1,
3826     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
3827     image => 1,
3828     }->{$token->{tag_name}}) {
3829     if ($token->{tag_name} eq 'image') {
3830 wakaba 1.3 $self->{parse_error}-> (type => 'image');
3831 wakaba 1.1 $token->{tag_name} = 'img';
3832     }
3833    
3834     $reconstruct_active_formatting_elements->($insert_to_current);
3835    
3836    
3837     {
3838     my $el;
3839    
3840     $el = $self->{document}->create_element_ns
3841     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3842    
3843     for my $attr_name (keys %{ $token->{attributes}}) {
3844     $el->set_attribute_ns (undef, [undef, $attr_name],
3845     $token->{attributes} ->{$attr_name}->{value});
3846     }
3847    
3848     $insert->($el);
3849 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3850 wakaba 1.1 }
3851    
3852 wakaba 1.3 pop @{$self->{open_elements}};
3853 wakaba 1.1
3854     $token = $self->_get_next_token;
3855     return;
3856     } elsif ($token->{tag_name} eq 'hr') {
3857     ## has a p element in scope
3858 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
3859 wakaba 1.1 if ($_->[1] eq 'p') {
3860     unshift @{$self->{token}}, $token;
3861     $token = {type => 'end tag', tag_name => 'p'};
3862     return;
3863     } elsif ({
3864     table => 1, caption => 1, td => 1, th => 1,
3865     button => 1, marquee => 1, object => 1, html => 1,
3866     }->{$_->[1]}) {
3867     last INSCOPE;
3868     }
3869     } # INSCOPE
3870    
3871    
3872     {
3873     my $el;
3874    
3875     $el = $self->{document}->create_element_ns
3876     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3877    
3878     for my $attr_name (keys %{ $token->{attributes}}) {
3879     $el->set_attribute_ns (undef, [undef, $attr_name],
3880     $token->{attributes} ->{$attr_name}->{value});
3881     }
3882    
3883     $insert->($el);
3884 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3885 wakaba 1.1 }
3886    
3887 wakaba 1.3 pop @{$self->{open_elements}};
3888 wakaba 1.1
3889     $token = $self->_get_next_token;
3890     return;
3891     } elsif ($token->{tag_name} eq 'input') {
3892     $reconstruct_active_formatting_elements->($insert_to_current);
3893    
3894    
3895     {
3896     my $el;
3897    
3898     $el = $self->{document}->create_element_ns
3899     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3900    
3901     for my $attr_name (keys %{ $token->{attributes}}) {
3902     $el->set_attribute_ns (undef, [undef, $attr_name],
3903     $token->{attributes} ->{$attr_name}->{value});
3904     }
3905    
3906     $insert->($el);
3907 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3908 wakaba 1.1 }
3909    
3910 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
3911     pop @{$self->{open_elements}};
3912 wakaba 1.1
3913     $token = $self->_get_next_token;
3914     return;
3915     } elsif ($token->{tag_name} eq 'isindex') {
3916 wakaba 1.3 $self->{parse_error}-> (type => 'isindex');
3917 wakaba 1.1
3918 wakaba 1.3 if (defined $self->{form_element}) {
3919 wakaba 1.1 ## Ignore the token
3920     $token = $self->_get_next_token;
3921     return;
3922     } else {
3923     my $at = $token->{attributes};
3924     $at->{name} = {name => 'name', value => 'isindex'};
3925     my @tokens = (
3926     {type => 'start tag', tag_name => 'form'},
3927     {type => 'start tag', tag_name => 'hr'},
3928     {type => 'start tag', tag_name => 'p'},
3929     {type => 'start tag', tag_name => 'label'},
3930     {type => 'character',
3931     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
3932     ## TODO: make this configurable
3933     {type => 'start tag', tag_name => 'input', attributes => $at},
3934     #{type => 'character', data => ''}, # SHOULD
3935     {type => 'end tag', tag_name => 'label'},
3936     {type => 'end tag', tag_name => 'p'},
3937     {type => 'start tag', tag_name => 'hr'},
3938     {type => 'end tag', tag_name => 'form'},
3939     );
3940     $token = shift @tokens;
3941     unshift @{$self->{token}}, (@tokens);
3942     return;
3943     }
3944     } elsif ({
3945     textarea => 1,
3946 wakaba 1.5 iframe => 1,
3947 wakaba 1.1 noembed => 1,
3948     noframes => 1,
3949     noscript => 0, ## TODO: 1 if scripting is enabled
3950     }->{$token->{tag_name}}) {
3951     my $tag_name = $token->{tag_name};
3952     my $el;
3953    
3954     $el = $self->{document}->create_element_ns
3955     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3956    
3957     for my $attr_name (keys %{ $token->{attributes}}) {
3958     $el->set_attribute_ns (undef, [undef, $attr_name],
3959     $token->{attributes} ->{$attr_name}->{value});
3960     }
3961    
3962    
3963     if ($token->{tag_name} eq 'textarea') {
3964 wakaba 1.3 ## TODO: $self->{form_element} if defined
3965 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
3966     } else {
3967     $self->{content_model_flag} = 'CDATA';
3968     }
3969 wakaba 1.13 delete $self->{escape}; # MUST
3970 wakaba 1.1
3971     $insert->($el);
3972    
3973     my $text = '';
3974 wakaba 1.8 if ($token->{tag_name} eq 'textarea') {
3975     $token = $self->_get_next_token;
3976     if ($token->{type} eq 'character') {
3977     $token->{data} =~ s/^\x0A//;
3978     unless (length $token->{data}) {
3979     $token = $self->_get_next_token;
3980     }
3981     }
3982     } else {
3983     $token = $self->_get_next_token;
3984     }
3985 wakaba 1.1 while ($token->{type} eq 'character') {
3986     $text .= $token->{data};
3987     $token = $self->_get_next_token;
3988     }
3989     if (length $text) {
3990     $el->manakai_append_text ($text);
3991     }
3992    
3993     $self->{content_model_flag} = 'PCDATA';
3994    
3995     if ($token->{type} eq 'end tag' and
3996     $token->{tag_name} eq $tag_name) {
3997     ## Ignore the token
3998     } else {
3999 wakaba 1.10 if ($token->{tag_name} eq 'textarea') {
4000     $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
4001     } else {
4002 wakaba 1.3 $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
4003     }
4004 wakaba 1.1 ## ISSUE: And ignore?
4005     }
4006     $token = $self->_get_next_token;
4007     return;
4008     } elsif ($token->{tag_name} eq 'select') {
4009     $reconstruct_active_formatting_elements->($insert_to_current);
4010    
4011    
4012     {
4013     my $el;
4014    
4015     $el = $self->{document}->create_element_ns
4016     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4017    
4018     for my $attr_name (keys %{ $token->{attributes}}) {
4019     $el->set_attribute_ns (undef, [undef, $attr_name],
4020     $token->{attributes} ->{$attr_name}->{value});
4021     }
4022    
4023     $insert->($el);
4024 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4025 wakaba 1.1 }
4026    
4027    
4028 wakaba 1.3 $self->{insertion_mode} = 'in select';
4029 wakaba 1.1 $token = $self->_get_next_token;
4030     return;
4031     } elsif ({
4032     caption => 1, col => 1, colgroup => 1, frame => 1,
4033     frameset => 1, head => 1, option => 1, optgroup => 1,
4034     tbody => 1, td => 1, tfoot => 1, th => 1,
4035     thead => 1, tr => 1,
4036     }->{$token->{tag_name}}) {
4037 wakaba 1.3 $self->{parse_error}-> (type => 'in body:'.$token->{tag_name});
4038 wakaba 1.1 ## Ignore the token
4039     $token = $self->_get_next_token;
4040     return;
4041    
4042     ## ISSUE: An issue on HTML5 new elements in the spec.
4043     } else {
4044     $reconstruct_active_formatting_elements->($insert_to_current);
4045    
4046    
4047     {
4048     my $el;
4049    
4050     $el = $self->{document}->create_element_ns
4051     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4052    
4053     for my $attr_name (keys %{ $token->{attributes}}) {
4054     $el->set_attribute_ns (undef, [undef, $attr_name],
4055     $token->{attributes} ->{$attr_name}->{value});
4056     }
4057    
4058     $insert->($el);
4059 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4060 wakaba 1.1 }
4061    
4062    
4063     $token = $self->_get_next_token;
4064     return;
4065     }
4066     } elsif ($token->{type} eq 'end tag') {
4067     if ($token->{tag_name} eq 'body') {
4068 wakaba 1.20 if (@{$self->{open_elements}} > 1 and
4069     $self->{open_elements}->[1]->[1] eq 'body') {
4070     for (@{$self->{open_elements}}) {
4071     unless ({
4072     dd => 1, dt => 1, li => 1, p => 1, td => 1,
4073     th => 1, tr => 1, body => 1, html => 1,
4074     }->{$_->[1]}) {
4075     $self->{parse_error}-> (type => 'not closed:'.$_->[1]);
4076     }
4077 wakaba 1.1 }
4078 wakaba 1.20
4079 wakaba 1.3 $self->{insertion_mode} = 'after body';
4080 wakaba 1.1 $token = $self->_get_next_token;
4081     return;
4082     } else {
4083 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4084 wakaba 1.1 ## Ignore the token
4085     $token = $self->_get_next_token;
4086     return;
4087     }
4088     } elsif ($token->{tag_name} eq 'html') {
4089 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
4090 wakaba 1.1 ## ISSUE: There is an issue in the spec.
4091 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
4092     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
4093 wakaba 1.1 }
4094 wakaba 1.3 $self->{insertion_mode} = 'after body';
4095 wakaba 1.1 ## reprocess
4096     return;
4097     } else {
4098 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4099 wakaba 1.1 ## Ignore the token
4100     $token = $self->_get_next_token;
4101     return;
4102     }
4103     } elsif ({
4104     address => 1, blockquote => 1, center => 1, dir => 1,
4105     div => 1, dl => 1, fieldset => 1, listing => 1,
4106     menu => 1, ol => 1, pre => 1, ul => 1,
4107     p => 1,
4108     dd => 1, dt => 1, li => 1,
4109     button => 1, marquee => 1, object => 1,
4110     }->{$token->{tag_name}}) {
4111     ## has an element in scope
4112     my $i;
4113 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4114     my $node = $self->{open_elements}->[$_];
4115 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4116     ## generate implied end tags
4117     if ({
4118     dd => ($token->{tag_name} ne 'dd'),
4119     dt => ($token->{tag_name} ne 'dt'),
4120     li => ($token->{tag_name} ne 'li'),
4121     p => ($token->{tag_name} ne 'p'),
4122     td => 1, th => 1, tr => 1,
4123 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4124 wakaba 1.1 unshift @{$self->{token}}, $token;
4125     $token = {type => 'end tag',
4126 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4127 wakaba 1.1 return;
4128     }
4129     $i = $_;
4130     last INSCOPE unless $token->{tag_name} eq 'p';
4131     } elsif ({
4132     table => 1, caption => 1, td => 1, th => 1,
4133     button => 1, marquee => 1, object => 1, html => 1,
4134     }->{$node->[1]}) {
4135     last INSCOPE;
4136     }
4137     } # INSCOPE
4138    
4139 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4140     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4141 wakaba 1.1 }
4142    
4143 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
4144 wakaba 1.1 $clear_up_to_marker->()
4145     if {
4146     button => 1, marquee => 1, object => 1,
4147     }->{$token->{tag_name}};
4148 wakaba 1.12 $token = $self->_get_next_token;
4149     return;
4150     } elsif ($token->{tag_name} eq 'form') {
4151     ## has an element in scope
4152     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4153     my $node = $self->{open_elements}->[$_];
4154     if ($node->[1] eq $token->{tag_name}) {
4155     ## generate implied end tags
4156     if ({
4157     dd => 1, dt => 1, li => 1, p => 1,
4158     td => 1, th => 1, tr => 1,
4159     }->{$self->{open_elements}->[-1]->[1]}) {
4160     unshift @{$self->{token}}, $token;
4161     $token = {type => 'end tag',
4162     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4163     return;
4164     }
4165     last INSCOPE;
4166     } elsif ({
4167     table => 1, caption => 1, td => 1, th => 1,
4168     button => 1, marquee => 1, object => 1, html => 1,
4169     }->{$node->[1]}) {
4170     last INSCOPE;
4171     }
4172     } # INSCOPE
4173    
4174     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
4175     pop @{$self->{open_elements}};
4176     } else {
4177     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4178     }
4179    
4180     undef $self->{form_element};
4181 wakaba 1.1 $token = $self->_get_next_token;
4182     return;
4183     } elsif ({
4184     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
4185     }->{$token->{tag_name}}) {
4186     ## has an element in scope
4187     my $i;
4188 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4189     my $node = $self->{open_elements}->[$_];
4190 wakaba 1.1 if ({
4191     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
4192     }->{$node->[1]}) {
4193     ## generate implied end tags
4194     if ({
4195     dd => 1, dt => 1, li => 1, p => 1,
4196     td => 1, th => 1, tr => 1,
4197 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4198 wakaba 1.1 unshift @{$self->{token}}, $token;
4199     $token = {type => 'end tag',
4200 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4201 wakaba 1.1 return;
4202     }
4203     $i = $_;
4204     last INSCOPE;
4205     } elsif ({
4206     table => 1, caption => 1, td => 1, th => 1,
4207     button => 1, marquee => 1, object => 1, html => 1,
4208     }->{$node->[1]}) {
4209     last INSCOPE;
4210     }
4211     } # INSCOPE
4212    
4213 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4214     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4215 wakaba 1.1 }
4216    
4217 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
4218 wakaba 1.1 $token = $self->_get_next_token;
4219     return;
4220     } elsif ({
4221     a => 1,
4222     b => 1, big => 1, em => 1, font => 1, i => 1,
4223     nobr => 1, s => 1, small => 1, strile => 1,
4224     strong => 1, tt => 1, u => 1,
4225     }->{$token->{tag_name}}) {
4226     $formatting_end_tag->($token->{tag_name});
4227 wakaba 1.8 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
4228 wakaba 1.1 return;
4229     } elsif ({
4230     caption => 1, col => 1, colgroup => 1, frame => 1,
4231     frameset => 1, head => 1, option => 1, optgroup => 1,
4232     tbody => 1, td => 1, tfoot => 1, th => 1,
4233     thead => 1, tr => 1,
4234     area => 1, basefont => 1, bgsound => 1, br => 1,
4235     embed => 1, hr => 1, iframe => 1, image => 1,
4236 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
4237 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
4238     table => 1, textarea => 1, wbr => 1,
4239     noscript => 0, ## TODO: if scripting is enabled
4240     }->{$token->{tag_name}}) {
4241 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4242 wakaba 1.1 ## Ignore the token
4243     $token = $self->_get_next_token;
4244     return;
4245    
4246     ## ISSUE: Issue on HTML5 new elements in spec
4247    
4248     } else {
4249     ## Step 1
4250     my $node_i = -1;
4251 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
4252 wakaba 1.1
4253     ## Step 2
4254     S2: {
4255     if ($node->[1] eq $token->{tag_name}) {
4256     ## Step 1
4257     ## generate implied end tags
4258     if ({
4259     dd => 1, dt => 1, li => 1, p => 1,
4260     td => 1, th => 1, tr => 1,
4261 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4262 wakaba 1.1 unshift @{$self->{token}}, $token;
4263     $token = {type => 'end tag',
4264 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4265 wakaba 1.1 return;
4266     }
4267    
4268     ## Step 2
4269 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
4270     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4271 wakaba 1.1 }
4272    
4273     ## Step 3
4274 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
4275    
4276     $token = $self->_get_next_token;
4277 wakaba 1.1 last S2;
4278     } else {
4279     ## Step 3
4280     if (not $formatting_category->{$node->[1]} and
4281     #not $phrasing_category->{$node->[1]} and
4282     ($special_category->{$node->[1]} or
4283     $scoping_category->{$node->[1]})) {
4284 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:'.$node->[1]);
4285 wakaba 1.1 ## Ignore the token
4286     $token = $self->_get_next_token;
4287     last S2;
4288     }
4289     }
4290    
4291     ## Step 4
4292     $node_i--;
4293 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
4294 wakaba 1.1
4295     ## Step 5;
4296     redo S2;
4297     } # S2
4298 wakaba 1.3 return;
4299 wakaba 1.1 }
4300     }
4301     }; # $in_body
4302    
4303     B: {
4304 wakaba 1.3 if ($phase eq 'main') {
4305 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
4306 wakaba 1.3 $self->{parse_error}-> (type => 'in html:#DOCTYPE');
4307 wakaba 1.1 ## Ignore the token
4308     ## Stay in the phase
4309     $token = $self->_get_next_token;
4310     redo B;
4311     } elsif ($token->{type} eq 'start tag' and
4312     $token->{tag_name} eq 'html') {
4313     ## TODO: unless it is the first start tag token, parse-error
4314 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
4315 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
4316     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
4317     $top_el->set_attribute_ns
4318     (undef, [undef, $attr_name],
4319     $token->{attributes}->{$attr_name}->{value});
4320     }
4321     }
4322     $token = $self->_get_next_token;
4323     redo B;
4324     } elsif ($token->{type} eq 'end-of-file') {
4325     ## Generate implied end tags
4326     if ({
4327     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
4328 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4329 wakaba 1.1 unshift @{$self->{token}}, $token;
4330 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
4331 wakaba 1.1 redo B;
4332     }
4333    
4334 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
4335     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
4336     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4337     } elsif (defined $self->{inner_html_node} and
4338     @{$self->{open_elements}} > 1 and
4339     $self->{open_elements}->[1]->[1] ne 'body') {
4340     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4341 wakaba 1.1 }
4342    
4343     ## Stop parsing
4344     last B;
4345    
4346     ## ISSUE: There is an issue in the spec.
4347     } else {
4348 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
4349 wakaba 1.1 if ($token->{type} eq 'character') {
4350     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4351 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4352 wakaba 1.1 unless (length $token->{data}) {
4353     $token = $self->_get_next_token;
4354     redo B;
4355     }
4356     }
4357     ## As if <head>
4358    
4359 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4360 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4361    
4362 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4363     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4364     $self->{insertion_mode} = 'in head';
4365 wakaba 1.1 ## reprocess
4366     redo B;
4367     } elsif ($token->{type} eq 'comment') {
4368     my $comment = $self->{document}->create_comment ($token->{data});
4369 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4370 wakaba 1.1 $token = $self->_get_next_token;
4371     redo B;
4372     } elsif ($token->{type} eq 'start tag') {
4373     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
4374    
4375 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4376 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4377    
4378     for my $attr_name (keys %{ $attr}) {
4379 wakaba 1.3 $self->{head_element}->set_attribute_ns (undef, [undef, $attr_name],
4380 wakaba 1.1 $attr ->{$attr_name}->{value});
4381     }
4382    
4383 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4384     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4385     $self->{insertion_mode} = 'in head';
4386 wakaba 1.1 if ($token->{tag_name} eq 'head') {
4387     $token = $self->_get_next_token;
4388     #} elsif ({
4389     # base => 1, link => 1, meta => 1,
4390     # script => 1, style => 1, title => 1,
4391     # }->{$token->{tag_name}}) {
4392     # ## reprocess
4393     } else {
4394     ## reprocess
4395     }
4396     redo B;
4397     } elsif ($token->{type} eq 'end tag') {
4398 wakaba 1.21 if ({head => 1, body => 1, html => 1}->{$token->{tag_name}}) {
4399 wakaba 1.1 ## As if <head>
4400    
4401 wakaba 1.3 $self->{head_element} = $self->{document}->create_element_ns
4402 wakaba 1.1 (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
4403    
4404 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4405     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
4406     $self->{insertion_mode} = 'in head';
4407 wakaba 1.1 ## reprocess
4408     redo B;
4409     } else {
4410 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4411 wakaba 1.21 ## Ignore the token ## ISSUE: An issue in the spec.
4412 wakaba 1.1 $token = $self->_get_next_token;
4413     redo B;
4414     }
4415     } else {
4416     die "$0: $token->{type}: Unknown type";
4417     }
4418 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
4419 wakaba 1.1 if ($token->{type} eq 'character') {
4420     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4421 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4422 wakaba 1.1 unless (length $token->{data}) {
4423     $token = $self->_get_next_token;
4424     redo B;
4425     }
4426     }
4427    
4428     #
4429     } elsif ($token->{type} eq 'comment') {
4430     my $comment = $self->{document}->create_comment ($token->{data});
4431 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4432 wakaba 1.1 $token = $self->_get_next_token;
4433     redo B;
4434     } elsif ($token->{type} eq 'start tag') {
4435     if ($token->{tag_name} eq 'title') {
4436     ## NOTE: There is an "as if in head" code clone
4437     my $title_el;
4438    
4439     $title_el = $self->{document}->create_element_ns
4440     (q<http://www.w3.org/1999/xhtml>, [undef, 'title']);
4441    
4442     for my $attr_name (keys %{ $token->{attributes}}) {
4443     $title_el->set_attribute_ns (undef, [undef, $attr_name],
4444     $token->{attributes} ->{$attr_name}->{value});
4445     }
4446    
4447 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
4448 wakaba 1.1 ->append_child ($title_el);
4449     $self->{content_model_flag} = 'RCDATA';
4450 wakaba 1.13 delete $self->{escape}; # MUST
4451 wakaba 1.1
4452     my $text = '';
4453     $token = $self->_get_next_token;
4454     while ($token->{type} eq 'character') {
4455     $text .= $token->{data};
4456     $token = $self->_get_next_token;
4457     }
4458     if (length $text) {
4459     $title_el->manakai_append_text ($text);
4460     }
4461    
4462     $self->{content_model_flag} = 'PCDATA';
4463    
4464     if ($token->{type} eq 'end tag' and
4465     $token->{tag_name} eq 'title') {
4466     ## Ignore the token
4467     } else {
4468 wakaba 1.3 $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
4469 wakaba 1.1 ## ISSUE: And ignore?
4470     }
4471     $token = $self->_get_next_token;
4472     redo B;
4473     } elsif ($token->{tag_name} eq 'style') {
4474     $style_start_tag->();
4475     redo B;
4476     } elsif ($token->{tag_name} eq 'script') {
4477     $script_start_tag->();
4478     redo B;
4479     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
4480     ## NOTE: There are "as if in head" code clones
4481     my $el;
4482    
4483     $el = $self->{document}->create_element_ns
4484     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4485    
4486     for my $attr_name (keys %{ $token->{attributes}}) {
4487     $el->set_attribute_ns (undef, [undef, $attr_name],
4488     $token->{attributes} ->{$attr_name}->{value});
4489     }
4490    
4491 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
4492 wakaba 1.1 ->append_child ($el);
4493    
4494     $token = $self->_get_next_token;
4495     redo B;
4496     } elsif ($token->{tag_name} eq 'head') {
4497 wakaba 1.3 $self->{parse_error}-> (type => 'in head:head');
4498 wakaba 1.1 ## Ignore the token
4499     $token = $self->_get_next_token;
4500     redo B;
4501     } else {
4502     #
4503     }
4504     } elsif ($token->{type} eq 'end tag') {
4505     if ($token->{tag_name} eq 'head') {
4506 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
4507     pop @{$self->{open_elements}};
4508 wakaba 1.1 } else {
4509 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:head');
4510 wakaba 1.1 }
4511 wakaba 1.3 $self->{insertion_mode} = 'after head';
4512 wakaba 1.1 $token = $self->_get_next_token;
4513     redo B;
4514 wakaba 1.21 } elsif ($token->{tag_name} eq 'body' or
4515     $token->{tag_name} eq 'html') {
4516 wakaba 1.1 #
4517     } else {
4518 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4519 wakaba 1.1 ## Ignore the token
4520     $token = $self->_get_next_token;
4521     redo B;
4522     }
4523     } else {
4524     #
4525     }
4526    
4527 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
4528 wakaba 1.1 ## As if </head>
4529 wakaba 1.3 pop @{$self->{open_elements}};
4530 wakaba 1.1 }
4531 wakaba 1.3 $self->{insertion_mode} = 'after head';
4532 wakaba 1.1 ## reprocess
4533     redo B;
4534    
4535     ## ISSUE: An issue in the spec.
4536 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
4537 wakaba 1.1 if ($token->{type} eq 'character') {
4538     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4539 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4540 wakaba 1.1 unless (length $token->{data}) {
4541     $token = $self->_get_next_token;
4542     redo B;
4543     }
4544     }
4545    
4546     #
4547     } elsif ($token->{type} eq 'comment') {
4548     my $comment = $self->{document}->create_comment ($token->{data});
4549 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4550 wakaba 1.1 $token = $self->_get_next_token;
4551     redo B;
4552     } elsif ($token->{type} eq 'start tag') {
4553     if ($token->{tag_name} eq 'body') {
4554    
4555     {
4556     my $el;
4557    
4558     $el = $self->{document}->create_element_ns
4559     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
4560    
4561     for my $attr_name (keys %{ $token->{attributes}}) {
4562     $el->set_attribute_ns (undef, [undef, $attr_name],
4563     $token->{attributes} ->{$attr_name}->{value});
4564     }
4565    
4566 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4567     push @{$self->{open_elements}}, [$el, 'body'];
4568 wakaba 1.1 }
4569    
4570 wakaba 1.3 $self->{insertion_mode} = 'in body';
4571 wakaba 1.1 $token = $self->_get_next_token;
4572     redo B;
4573     } elsif ($token->{tag_name} eq 'frameset') {
4574    
4575     {
4576     my $el;
4577    
4578     $el = $self->{document}->create_element_ns
4579     (q<http://www.w3.org/1999/xhtml>, [undef, 'frameset']);
4580    
4581     for my $attr_name (keys %{ $token->{attributes}}) {
4582     $el->set_attribute_ns (undef, [undef, $attr_name],
4583     $token->{attributes} ->{$attr_name}->{value});
4584     }
4585    
4586 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4587     push @{$self->{open_elements}}, [$el, 'frameset'];
4588 wakaba 1.1 }
4589    
4590 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
4591 wakaba 1.1 $token = $self->_get_next_token;
4592     redo B;
4593     } elsif ({
4594     base => 1, link => 1, meta => 1,
4595 wakaba 1.3 script => 1, style => 1, title => 1,
4596 wakaba 1.1 }->{$token->{tag_name}}) {
4597 wakaba 1.3 $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
4598     $self->{insertion_mode} = 'in head';
4599 wakaba 1.1 ## reprocess
4600     redo B;
4601     } else {
4602     #
4603     }
4604     } else {
4605     #
4606     }
4607    
4608     ## As if <body>
4609    
4610     {
4611     my $el;
4612    
4613     $el = $self->{document}->create_element_ns
4614     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
4615    
4616 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4617     push @{$self->{open_elements}}, [$el, 'body'];
4618 wakaba 1.1 }
4619    
4620 wakaba 1.3 $self->{insertion_mode} = 'in body';
4621 wakaba 1.1 ## reprocess
4622     redo B;
4623 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
4624 wakaba 1.1 if ($token->{type} eq 'character') {
4625     ## NOTE: There is a code clone of "character in body".
4626     $reconstruct_active_formatting_elements->($insert_to_current);
4627    
4628 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4629 wakaba 1.1
4630     $token = $self->_get_next_token;
4631     redo B;
4632     } elsif ($token->{type} eq 'comment') {
4633     ## NOTE: There is a code clone of "comment in body".
4634     my $comment = $self->{document}->create_comment ($token->{data});
4635 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4636 wakaba 1.1 $token = $self->_get_next_token;
4637     redo B;
4638     } else {
4639     $in_body->($insert_to_current);
4640     redo B;
4641     }
4642 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
4643 wakaba 1.1 if ($token->{type} eq 'character') {
4644     ## NOTE: There are "character in table" code clones.
4645     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4646 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4647 wakaba 1.1
4648     unless (length $token->{data}) {
4649     $token = $self->_get_next_token;
4650     redo B;
4651     }
4652     }
4653    
4654 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
4655    
4656 wakaba 1.1 ## As if in body, but insert into foster parent element
4657     ## ISSUE: Spec says that "whenever a node would be inserted
4658     ## into the current node" while characters might not be
4659     ## result in a new Text node.
4660     $reconstruct_active_formatting_elements->($insert_to_foster);
4661    
4662     if ({
4663     table => 1, tbody => 1, tfoot => 1,
4664     thead => 1, tr => 1,
4665 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4666 wakaba 1.1 # MUST
4667     my $foster_parent_element;
4668     my $next_sibling;
4669     my $prev_sibling;
4670 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4671     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4672     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4673 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4674     $foster_parent_element = $parent;
4675 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4676 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4677     } else {
4678 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4679 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4680     }
4681     last OE;
4682     }
4683     } # OE
4684 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4685 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4686     unless defined $foster_parent_element;
4687     if (defined $prev_sibling and
4688     $prev_sibling->node_type == 3) {
4689     $prev_sibling->manakai_append_text ($token->{data});
4690     } else {
4691     $foster_parent_element->insert_before
4692     ($self->{document}->create_text_node ($token->{data}),
4693     $next_sibling);
4694     }
4695     } else {
4696 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4697 wakaba 1.1 }
4698    
4699     $token = $self->_get_next_token;
4700     redo B;
4701     } elsif ($token->{type} eq 'comment') {
4702     my $comment = $self->{document}->create_comment ($token->{data});
4703 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4704 wakaba 1.1 $token = $self->_get_next_token;
4705     redo B;
4706     } elsif ($token->{type} eq 'start tag') {
4707     if ({
4708     caption => 1,
4709     colgroup => 1,
4710     tbody => 1, tfoot => 1, thead => 1,
4711     }->{$token->{tag_name}}) {
4712     ## Clear back to table context
4713 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
4714     $self->{open_elements}->[-1]->[1] ne 'html') {
4715     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4716     pop @{$self->{open_elements}};
4717 wakaba 1.1 }
4718    
4719     push @$active_formatting_elements, ['#marker', '']
4720     if $token->{tag_name} eq 'caption';
4721    
4722    
4723     {
4724     my $el;
4725    
4726     $el = $self->{document}->create_element_ns
4727     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4728    
4729     for my $attr_name (keys %{ $token->{attributes}}) {
4730     $el->set_attribute_ns (undef, [undef, $attr_name],
4731     $token->{attributes} ->{$attr_name}->{value});
4732     }
4733    
4734 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4735     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4736 wakaba 1.1 }
4737    
4738 wakaba 1.3 $self->{insertion_mode} = {
4739 wakaba 1.1 caption => 'in caption',
4740     colgroup => 'in column group',
4741     tbody => 'in table body',
4742     tfoot => 'in table body',
4743     thead => 'in table body',
4744     }->{$token->{tag_name}};
4745     $token = $self->_get_next_token;
4746     redo B;
4747     } elsif ({
4748     col => 1,
4749     td => 1, th => 1, tr => 1,
4750     }->{$token->{tag_name}}) {
4751     ## Clear back to table context
4752 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
4753     $self->{open_elements}->[-1]->[1] ne 'html') {
4754     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4755     pop @{$self->{open_elements}};
4756 wakaba 1.1 }
4757    
4758    
4759     {
4760     my $el;
4761    
4762     $el = $self->{document}->create_element_ns
4763     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name} eq 'col' ? 'colgroup' : 'tbody']);
4764    
4765 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
4766     push @{$self->{open_elements}}, [$el, $token->{tag_name} eq 'col' ? 'colgroup' : 'tbody'];
4767 wakaba 1.1 }
4768    
4769 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
4770 wakaba 1.1 ? 'in column group' : 'in table body';
4771     ## reprocess
4772     redo B;
4773     } elsif ($token->{tag_name} eq 'table') {
4774     ## NOTE: There are code clones for this "table in table"
4775 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4776 wakaba 1.1
4777     ## As if </table>
4778     ## have a table element in table scope
4779     my $i;
4780 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4781     my $node = $self->{open_elements}->[$_];
4782 wakaba 1.1 if ($node->[1] eq 'table') {
4783     $i = $_;
4784     last INSCOPE;
4785     } elsif ({
4786     table => 1, html => 1,
4787     }->{$node->[1]}) {
4788     last INSCOPE;
4789     }
4790     } # INSCOPE
4791     unless (defined $i) {
4792 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
4793 wakaba 1.1 ## Ignore tokens </table><table>
4794     $token = $self->_get_next_token;
4795     redo B;
4796     }
4797    
4798     ## generate implied end tags
4799     if ({
4800     dd => 1, dt => 1, li => 1, p => 1,
4801     td => 1, th => 1, tr => 1,
4802 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4803 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
4804     $token = {type => 'end tag', tag_name => 'table'};
4805     unshift @{$self->{token}}, $token;
4806     $token = {type => 'end tag',
4807 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4808 wakaba 1.1 redo B;
4809     }
4810    
4811 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4812     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4813 wakaba 1.1 }
4814    
4815 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4816 wakaba 1.1
4817 wakaba 1.3 $self->_reset_insertion_mode;
4818 wakaba 1.1
4819     ## reprocess
4820     redo B;
4821     } else {
4822     #
4823     }
4824     } elsif ($token->{type} eq 'end tag') {
4825     if ($token->{tag_name} eq 'table') {
4826     ## have a table element in table scope
4827     my $i;
4828 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4829     my $node = $self->{open_elements}->[$_];
4830 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4831     $i = $_;
4832     last INSCOPE;
4833     } elsif ({
4834     table => 1, html => 1,
4835     }->{$node->[1]}) {
4836     last INSCOPE;
4837     }
4838     } # INSCOPE
4839     unless (defined $i) {
4840 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4841 wakaba 1.1 ## Ignore the token
4842     $token = $self->_get_next_token;
4843     redo B;
4844     }
4845    
4846     ## generate implied end tags
4847     if ({
4848     dd => 1, dt => 1, li => 1, p => 1,
4849     td => 1, th => 1, tr => 1,
4850 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4851 wakaba 1.1 unshift @{$self->{token}}, $token;
4852     $token = {type => 'end tag',
4853 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4854 wakaba 1.1 redo B;
4855     }
4856    
4857 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4858     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4859 wakaba 1.1 }
4860    
4861 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4862 wakaba 1.1
4863 wakaba 1.3 $self->_reset_insertion_mode;
4864 wakaba 1.1
4865     $token = $self->_get_next_token;
4866     redo B;
4867     } elsif ({
4868     body => 1, caption => 1, col => 1, colgroup => 1,
4869     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
4870     thead => 1, tr => 1,
4871     }->{$token->{tag_name}}) {
4872 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4873 wakaba 1.1 ## Ignore the token
4874     $token = $self->_get_next_token;
4875     redo B;
4876     } else {
4877     #
4878     }
4879     } else {
4880     #
4881     }
4882    
4883 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
4884 wakaba 1.1 $in_body->($insert_to_foster);
4885     redo B;
4886 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
4887 wakaba 1.1 if ($token->{type} eq 'character') {
4888     ## NOTE: This is a code clone of "character in body".
4889     $reconstruct_active_formatting_elements->($insert_to_current);
4890    
4891 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4892 wakaba 1.1
4893     $token = $self->_get_next_token;
4894     redo B;
4895     } elsif ($token->{type} eq 'comment') {
4896     ## NOTE: This is a code clone of "comment in body".
4897     my $comment = $self->{document}->create_comment ($token->{data});
4898 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4899 wakaba 1.1 $token = $self->_get_next_token;
4900     redo B;
4901     } elsif ($token->{type} eq 'start tag') {
4902     if ({
4903     caption => 1, col => 1, colgroup => 1, tbody => 1,
4904     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4905     }->{$token->{tag_name}}) {
4906 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:caption');
4907 wakaba 1.1
4908     ## As if </caption>
4909     ## have a table element in table scope
4910     my $i;
4911 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4912     my $node = $self->{open_elements}->[$_];
4913 wakaba 1.1 if ($node->[1] eq 'caption') {
4914     $i = $_;
4915     last INSCOPE;
4916     } elsif ({
4917     table => 1, html => 1,
4918     }->{$node->[1]}) {
4919     last INSCOPE;
4920     }
4921     } # INSCOPE
4922     unless (defined $i) {
4923 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:caption');
4924 wakaba 1.1 ## Ignore the token
4925     $token = $self->_get_next_token;
4926     redo B;
4927     }
4928    
4929     ## generate implied end tags
4930     if ({
4931     dd => 1, dt => 1, li => 1, p => 1,
4932     td => 1, th => 1, tr => 1,
4933 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4934 wakaba 1.1 unshift @{$self->{token}}, $token; # <?>
4935     $token = {type => 'end tag', tag_name => 'caption'};
4936     unshift @{$self->{token}}, $token;
4937     $token = {type => 'end tag',
4938 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4939 wakaba 1.1 redo B;
4940     }
4941    
4942 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4943     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4944 wakaba 1.1 }
4945    
4946 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4947 wakaba 1.1
4948     $clear_up_to_marker->();
4949    
4950 wakaba 1.3 $self->{insertion_mode} = 'in table';
4951 wakaba 1.1
4952     ## reprocess
4953     redo B;
4954     } else {
4955     #
4956     }
4957     } elsif ($token->{type} eq 'end tag') {
4958     if ($token->{tag_name} eq 'caption') {
4959     ## have a table element in table scope
4960     my $i;
4961 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4962     my $node = $self->{open_elements}->[$_];
4963 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4964     $i = $_;
4965     last INSCOPE;
4966     } elsif ({
4967     table => 1, html => 1,
4968     }->{$node->[1]}) {
4969     last INSCOPE;
4970     }
4971     } # INSCOPE
4972     unless (defined $i) {
4973 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4974 wakaba 1.1 ## Ignore the token
4975     $token = $self->_get_next_token;
4976     redo B;
4977     }
4978    
4979     ## generate implied end tags
4980     if ({
4981     dd => 1, dt => 1, li => 1, p => 1,
4982     td => 1, th => 1, tr => 1,
4983 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4984 wakaba 1.1 unshift @{$self->{token}}, $token;
4985     $token = {type => 'end tag',
4986 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4987 wakaba 1.1 redo B;
4988     }
4989    
4990 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4991     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4992 wakaba 1.1 }
4993    
4994 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4995 wakaba 1.1
4996     $clear_up_to_marker->();
4997    
4998 wakaba 1.3 $self->{insertion_mode} = 'in table';
4999 wakaba 1.1
5000     $token = $self->_get_next_token;
5001     redo B;
5002     } elsif ($token->{tag_name} eq 'table') {
5003 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:caption');
5004 wakaba 1.1
5005     ## As if </caption>
5006     ## have a table element in table scope
5007     my $i;
5008 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5009     my $node = $self->{open_elements}->[$_];
5010 wakaba 1.1 if ($node->[1] eq 'caption') {
5011     $i = $_;
5012     last INSCOPE;
5013     } elsif ({
5014     table => 1, html => 1,
5015     }->{$node->[1]}) {
5016     last INSCOPE;
5017     }
5018     } # INSCOPE
5019     unless (defined $i) {
5020 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:caption');
5021 wakaba 1.1 ## Ignore the token
5022     $token = $self->_get_next_token;
5023     redo B;
5024     }
5025    
5026     ## generate implied end tags
5027     if ({
5028     dd => 1, dt => 1, li => 1, p => 1,
5029     td => 1, th => 1, tr => 1,
5030 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5031 wakaba 1.1 unshift @{$self->{token}}, $token; # </table>
5032     $token = {type => 'end tag', tag_name => 'caption'};
5033     unshift @{$self->{token}}, $token;
5034     $token = {type => 'end tag',
5035 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5036 wakaba 1.1 redo B;
5037     }
5038    
5039 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
5040     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5041 wakaba 1.1 }
5042    
5043 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5044 wakaba 1.1
5045     $clear_up_to_marker->();
5046    
5047 wakaba 1.3 $self->{insertion_mode} = 'in table';
5048 wakaba 1.1
5049     ## reprocess
5050     redo B;
5051     } elsif ({
5052     body => 1, col => 1, colgroup => 1,
5053     html => 1, tbody => 1, td => 1, tfoot => 1,
5054     th => 1, thead => 1, tr => 1,
5055     }->{$token->{tag_name}}) {
5056 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5057 wakaba 1.1 ## Ignore the token
5058     redo B;
5059     } else {
5060     #
5061     }
5062     } else {
5063     #
5064     }
5065    
5066     $in_body->($insert_to_current);
5067     redo B;
5068 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
5069 wakaba 1.1 if ($token->{type} eq 'character') {
5070     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5071 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5072 wakaba 1.1 unless (length $token->{data}) {
5073     $token = $self->_get_next_token;
5074     redo B;
5075     }
5076     }
5077    
5078     #
5079     } elsif ($token->{type} eq 'comment') {
5080     my $comment = $self->{document}->create_comment ($token->{data});
5081 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5082 wakaba 1.1 $token = $self->_get_next_token;
5083     redo B;
5084     } elsif ($token->{type} eq 'start tag') {
5085     if ($token->{tag_name} eq 'col') {
5086    
5087     {
5088     my $el;
5089    
5090     $el = $self->{document}->create_element_ns
5091     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5092    
5093     for my $attr_name (keys %{ $token->{attributes}}) {
5094     $el->set_attribute_ns (undef, [undef, $attr_name],
5095     $token->{attributes} ->{$attr_name}->{value});
5096     }
5097    
5098 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5099     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5100 wakaba 1.1 }
5101    
5102 wakaba 1.3 pop @{$self->{open_elements}};
5103 wakaba 1.1 $token = $self->_get_next_token;
5104     redo B;
5105     } else {
5106     #
5107     }
5108     } elsif ($token->{type} eq 'end tag') {
5109     if ($token->{tag_name} eq 'colgroup') {
5110 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
5111     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
5112 wakaba 1.1 ## Ignore the token
5113     $token = $self->_get_next_token;
5114     redo B;
5115     } else {
5116 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
5117     $self->{insertion_mode} = 'in table';
5118 wakaba 1.1 $token = $self->_get_next_token;
5119     redo B;
5120     }
5121     } elsif ($token->{tag_name} eq 'col') {
5122 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:col');
5123 wakaba 1.1 ## Ignore the token
5124     $token = $self->_get_next_token;
5125     redo B;
5126     } else {
5127     #
5128     }
5129     } else {
5130     #
5131     }
5132    
5133     ## As if </colgroup>
5134 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
5135     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
5136 wakaba 1.1 ## Ignore the token
5137     $token = $self->_get_next_token;
5138     redo B;
5139     } else {
5140 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
5141     $self->{insertion_mode} = 'in table';
5142 wakaba 1.1 ## reprocess
5143     redo B;
5144     }
5145 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
5146 wakaba 1.1 if ($token->{type} eq 'character') {
5147     ## NOTE: This is a "character in table" code clone.
5148     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5149 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5150 wakaba 1.1
5151     unless (length $token->{data}) {
5152     $token = $self->_get_next_token;
5153     redo B;
5154     }
5155     }
5156    
5157 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
5158    
5159 wakaba 1.1 ## As if in body, but insert into foster parent element
5160     ## ISSUE: Spec says that "whenever a node would be inserted
5161     ## into the current node" while characters might not be
5162     ## result in a new Text node.
5163     $reconstruct_active_formatting_elements->($insert_to_foster);
5164    
5165     if ({
5166     table => 1, tbody => 1, tfoot => 1,
5167     thead => 1, tr => 1,
5168 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5169 wakaba 1.1 # MUST
5170     my $foster_parent_element;
5171     my $next_sibling;
5172     my $prev_sibling;
5173 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
5174     if ($self->{open_elements}->[$_]->[1] eq 'table') {
5175     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5176 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
5177     $foster_parent_element = $parent;
5178 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
5179 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
5180     } else {
5181 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5182 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
5183     }
5184     last OE;
5185     }
5186     } # OE
5187 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5188 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
5189     unless defined $foster_parent_element;
5190     if (defined $prev_sibling and
5191     $prev_sibling->node_type == 3) {
5192     $prev_sibling->manakai_append_text ($token->{data});
5193     } else {
5194     $foster_parent_element->insert_before
5195     ($self->{document}->create_text_node ($token->{data}),
5196     $next_sibling);
5197     }
5198     } else {
5199 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5200 wakaba 1.1 }
5201    
5202     $token = $self->_get_next_token;
5203     redo B;
5204     } elsif ($token->{type} eq 'comment') {
5205     ## Copied from 'in table'
5206     my $comment = $self->{document}->create_comment ($token->{data});
5207 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5208 wakaba 1.1 $token = $self->_get_next_token;
5209     redo B;
5210     } elsif ($token->{type} eq 'start tag') {
5211     if ({
5212     tr => 1,
5213     th => 1, td => 1,
5214     }->{$token->{tag_name}}) {
5215 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
5216     $self->{parse_error}-> (type => 'missing start tag:tr');
5217     }
5218    
5219 wakaba 1.1 ## Clear back to table body context
5220     while (not {
5221     tbody => 1, tfoot => 1, thead => 1, html => 1,
5222 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5223     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5224     pop @{$self->{open_elements}};
5225 wakaba 1.1 }
5226    
5227 wakaba 1.3 $self->{insertion_mode} = 'in row';
5228 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
5229    
5230     {
5231     my $el;
5232    
5233     $el = $self->{document}->create_element_ns
5234     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5235    
5236     for my $attr_name (keys %{ $token->{attributes}}) {
5237     $el->set_attribute_ns (undef, [undef, $attr_name],
5238     $token->{attributes} ->{$attr_name}->{value});
5239     }
5240    
5241 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5242     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5243 wakaba 1.1 }
5244    
5245     $token = $self->_get_next_token;
5246     } else {
5247    
5248     {
5249     my $el;
5250    
5251     $el = $self->{document}->create_element_ns
5252     (q<http://www.w3.org/1999/xhtml>, [undef, 'tr']);
5253    
5254 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5255     push @{$self->{open_elements}}, [$el, 'tr'];
5256 wakaba 1.1 }
5257    
5258     ## reprocess
5259     }
5260     redo B;
5261     } elsif ({
5262     caption => 1, col => 1, colgroup => 1,
5263     tbody => 1, tfoot => 1, thead => 1,
5264     }->{$token->{tag_name}}) {
5265     ## have an element in table scope
5266     my $i;
5267 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5268     my $node = $self->{open_elements}->[$_];
5269 wakaba 1.1 if ({
5270     tbody => 1, thead => 1, tfoot => 1,
5271     }->{$node->[1]}) {
5272     $i = $_;
5273     last INSCOPE;
5274     } elsif ({
5275     table => 1, html => 1,
5276     }->{$node->[1]}) {
5277     last INSCOPE;
5278     }
5279     } # INSCOPE
5280     unless (defined $i) {
5281 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5282 wakaba 1.1 ## Ignore the token
5283     $token = $self->_get_next_token;
5284     redo B;
5285     }
5286    
5287     ## Clear back to table body context
5288     while (not {
5289     tbody => 1, tfoot => 1, thead => 1, html => 1,
5290 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5291     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5292     pop @{$self->{open_elements}};
5293 wakaba 1.1 }
5294    
5295     ## As if <{current node}>
5296     ## have an element in table scope
5297     ## true by definition
5298    
5299     ## Clear back to table body context
5300     ## nop by definition
5301    
5302 wakaba 1.3 pop @{$self->{open_elements}};
5303     $self->{insertion_mode} = 'in table';
5304 wakaba 1.1 ## reprocess
5305     redo B;
5306     } elsif ($token->{tag_name} eq 'table') {
5307     ## NOTE: This is a code clone of "table in table"
5308 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:table');
5309 wakaba 1.1
5310     ## As if </table>
5311     ## have a table element in table scope
5312     my $i;
5313 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5314     my $node = $self->{open_elements}->[$_];
5315 wakaba 1.1 if ($node->[1] eq 'table') {
5316     $i = $_;
5317     last INSCOPE;
5318     } elsif ({
5319     table => 1, html => 1,
5320     }->{$node->[1]}) {
5321     last INSCOPE;
5322     }
5323     } # INSCOPE
5324     unless (defined $i) {
5325 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
5326 wakaba 1.1 ## Ignore tokens </table><table>
5327     $token = $self->_get_next_token;
5328     redo B;
5329     }
5330    
5331     ## generate implied end tags
5332     if ({
5333     dd => 1, dt => 1, li => 1, p => 1,
5334     td => 1, th => 1, tr => 1,
5335 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5336 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
5337     $token = {type => 'end tag', tag_name => 'table'};
5338     unshift @{$self->{token}}, $token;
5339     $token = {type => 'end tag',
5340 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5341 wakaba 1.1 redo B;
5342     }
5343    
5344 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5345     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5346 wakaba 1.1 }
5347    
5348 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5349 wakaba 1.1
5350 wakaba 1.3 $self->_reset_insertion_mode;
5351 wakaba 1.1
5352     ## reprocess
5353     redo B;
5354     } else {
5355     #
5356     }
5357     } elsif ($token->{type} eq 'end tag') {
5358     if ({
5359     tbody => 1, tfoot => 1, thead => 1,
5360     }->{$token->{tag_name}}) {
5361     ## have an element in table scope
5362     my $i;
5363 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5364     my $node = $self->{open_elements}->[$_];
5365 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5366     $i = $_;
5367     last INSCOPE;
5368     } elsif ({
5369     table => 1, html => 1,
5370     }->{$node->[1]}) {
5371     last INSCOPE;
5372     }
5373     } # INSCOPE
5374     unless (defined $i) {
5375 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5376 wakaba 1.1 ## Ignore the token
5377     $token = $self->_get_next_token;
5378     redo B;
5379     }
5380    
5381     ## Clear back to table body context
5382     while (not {
5383     tbody => 1, tfoot => 1, thead => 1, html => 1,
5384 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5385     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5386     pop @{$self->{open_elements}};
5387 wakaba 1.1 }
5388    
5389 wakaba 1.3 pop @{$self->{open_elements}};
5390     $self->{insertion_mode} = 'in table';
5391 wakaba 1.1 $token = $self->_get_next_token;
5392     redo B;
5393     } elsif ($token->{tag_name} eq 'table') {
5394     ## have an element in table scope
5395     my $i;
5396 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5397     my $node = $self->{open_elements}->[$_];
5398 wakaba 1.1 if ({
5399     tbody => 1, thead => 1, tfoot => 1,
5400     }->{$node->[1]}) {
5401     $i = $_;
5402     last INSCOPE;
5403     } elsif ({
5404     table => 1, html => 1,
5405     }->{$node->[1]}) {
5406     last INSCOPE;
5407     }
5408     } # INSCOPE
5409     unless (defined $i) {
5410 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5411 wakaba 1.1 ## Ignore the token
5412     $token = $self->_get_next_token;
5413     redo B;
5414     }
5415    
5416     ## Clear back to table body context
5417     while (not {
5418     tbody => 1, tfoot => 1, thead => 1, html => 1,
5419 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5420     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5421     pop @{$self->{open_elements}};
5422 wakaba 1.1 }
5423    
5424     ## As if <{current node}>
5425     ## have an element in table scope
5426     ## true by definition
5427    
5428     ## Clear back to table body context
5429     ## nop by definition
5430    
5431 wakaba 1.3 pop @{$self->{open_elements}};
5432     $self->{insertion_mode} = 'in table';
5433 wakaba 1.1 ## reprocess
5434     redo B;
5435     } elsif ({
5436     body => 1, caption => 1, col => 1, colgroup => 1,
5437     html => 1, td => 1, th => 1, tr => 1,
5438     }->{$token->{tag_name}}) {
5439 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5440 wakaba 1.1 ## Ignore the token
5441     $token = $self->_get_next_token;
5442     redo B;
5443     } else {
5444     #
5445     }
5446     } else {
5447     #
5448     }
5449    
5450     ## As if in table
5451 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
5452 wakaba 1.1 $in_body->($insert_to_foster);
5453     redo B;
5454 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
5455 wakaba 1.1 if ($token->{type} eq 'character') {
5456     ## NOTE: This is a "character in table" code clone.
5457     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5458 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5459 wakaba 1.1
5460     unless (length $token->{data}) {
5461     $token = $self->_get_next_token;
5462     redo B;
5463     }
5464     }
5465    
5466 wakaba 1.3 $self->{parse_error}-> (type => 'in table:#character');
5467    
5468 wakaba 1.1 ## As if in body, but insert into foster parent element
5469     ## ISSUE: Spec says that "whenever a node would be inserted
5470     ## into the current node" while characters might not be
5471     ## result in a new Text node.
5472     $reconstruct_active_formatting_elements->($insert_to_foster);
5473    
5474     if ({
5475     table => 1, tbody => 1, tfoot => 1,
5476     thead => 1, tr => 1,
5477 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5478 wakaba 1.1 # MUST
5479     my $foster_parent_element;
5480     my $next_sibling;
5481     my $prev_sibling;
5482 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
5483     if ($self->{open_elements}->[$_]->[1] eq 'table') {
5484     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5485 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
5486     $foster_parent_element = $parent;
5487 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
5488 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
5489     } else {
5490 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5491 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
5492     }
5493     last OE;
5494     }
5495     } # OE
5496 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5497 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
5498     unless defined $foster_parent_element;
5499     if (defined $prev_sibling and
5500     $prev_sibling->node_type == 3) {
5501     $prev_sibling->manakai_append_text ($token->{data});
5502     } else {
5503     $foster_parent_element->insert_before
5504     ($self->{document}->create_text_node ($token->{data}),
5505     $next_sibling);
5506     }
5507     } else {
5508 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5509 wakaba 1.1 }
5510    
5511     $token = $self->_get_next_token;
5512     redo B;
5513     } elsif ($token->{type} eq 'comment') {
5514     ## Copied from 'in table'
5515     my $comment = $self->{document}->create_comment ($token->{data});
5516 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5517 wakaba 1.1 $token = $self->_get_next_token;
5518     redo B;
5519     } elsif ($token->{type} eq 'start tag') {
5520     if ($token->{tag_name} eq 'th' or
5521     $token->{tag_name} eq 'td') {
5522     ## Clear back to table row context
5523     while (not {
5524     tr => 1, html => 1,
5525 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5526     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5527     pop @{$self->{open_elements}};
5528 wakaba 1.1 }
5529    
5530    
5531     {
5532     my $el;
5533    
5534     $el = $self->{document}->create_element_ns
5535     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5536    
5537     for my $attr_name (keys %{ $token->{attributes}}) {
5538     $el->set_attribute_ns (undef, [undef, $attr_name],
5539     $token->{attributes} ->{$attr_name}->{value});
5540     }
5541    
5542 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5543     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5544 wakaba 1.1 }
5545    
5546 wakaba 1.3 $self->{insertion_mode} = 'in cell';
5547 wakaba 1.1
5548     push @$active_formatting_elements, ['#marker', ''];
5549    
5550     $token = $self->_get_next_token;
5551     redo B;
5552     } elsif ({
5553     caption => 1, col => 1, colgroup => 1,
5554     tbody => 1, tfoot => 1, thead => 1, tr => 1,
5555     }->{$token->{tag_name}}) {
5556     ## As if </tr>
5557     ## have an element in table scope
5558     my $i;
5559 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5560     my $node = $self->{open_elements}->[$_];
5561 wakaba 1.1 if ($node->[1] eq 'tr') {
5562     $i = $_;
5563     last INSCOPE;
5564     } elsif ({
5565     table => 1, html => 1,
5566     }->{$node->[1]}) {
5567     last INSCOPE;
5568     }
5569     } # INSCOPE
5570     unless (defined $i) {
5571 wakaba 1.3 $self->{parse_error}-> (type => 'unmacthed end tag:'.$token->{tag_name});
5572 wakaba 1.1 ## Ignore the token
5573     $token = $self->_get_next_token;
5574     redo B;
5575     }
5576    
5577     ## Clear back to table row context
5578     while (not {
5579     tr => 1, html => 1,
5580 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5581     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5582     pop @{$self->{open_elements}};
5583 wakaba 1.1 }
5584    
5585 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5586     $self->{insertion_mode} = 'in table body';
5587 wakaba 1.1 ## reprocess
5588     redo B;
5589     } elsif ($token->{tag_name} eq 'table') {
5590     ## NOTE: This is a code clone of "table in table"
5591 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:table');
5592 wakaba 1.1
5593     ## As if </table>
5594     ## have a table element in table scope
5595     my $i;
5596 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5597     my $node = $self->{open_elements}->[$_];
5598 wakaba 1.1 if ($node->[1] eq 'table') {
5599     $i = $_;
5600     last INSCOPE;
5601     } elsif ({
5602     table => 1, html => 1,
5603     }->{$node->[1]}) {
5604     last INSCOPE;
5605     }
5606     } # INSCOPE
5607     unless (defined $i) {
5608 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:table');
5609 wakaba 1.1 ## Ignore tokens </table><table>
5610     $token = $self->_get_next_token;
5611     redo B;
5612     }
5613    
5614     ## generate implied end tags
5615     if ({
5616     dd => 1, dt => 1, li => 1, p => 1,
5617     td => 1, th => 1, tr => 1,
5618 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5619 wakaba 1.1 unshift @{$self->{token}}, $token; # <table>
5620     $token = {type => 'end tag', tag_name => 'table'};
5621     unshift @{$self->{token}}, $token;
5622     $token = {type => 'end tag',
5623 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5624 wakaba 1.1 redo B;
5625     }
5626    
5627 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
5628     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5629 wakaba 1.1 }
5630    
5631 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5632 wakaba 1.1
5633 wakaba 1.3 $self->_reset_insertion_mode;
5634 wakaba 1.1
5635     ## reprocess
5636     redo B;
5637     } else {
5638     #
5639     }
5640     } elsif ($token->{type} eq 'end tag') {
5641     if ($token->{tag_name} eq 'tr') {
5642     ## have an element in table scope
5643     my $i;
5644 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5645     my $node = $self->{open_elements}->[$_];
5646 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5647     $i = $_;
5648     last INSCOPE;
5649     } elsif ({
5650     table => 1, html => 1,
5651     }->{$node->[1]}) {
5652     last INSCOPE;
5653     }
5654     } # INSCOPE
5655     unless (defined $i) {
5656 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5657 wakaba 1.1 ## Ignore the token
5658     $token = $self->_get_next_token;
5659     redo B;
5660     }
5661    
5662     ## Clear back to table row context
5663     while (not {
5664     tr => 1, html => 1,
5665 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5666     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5667     pop @{$self->{open_elements}};
5668 wakaba 1.1 }
5669    
5670 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5671     $self->{insertion_mode} = 'in table body';
5672 wakaba 1.1 $token = $self->_get_next_token;
5673     redo B;
5674     } elsif ($token->{tag_name} eq 'table') {
5675     ## As if </tr>
5676     ## have an element in table scope
5677     my $i;
5678 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5679     my $node = $self->{open_elements}->[$_];
5680 wakaba 1.1 if ($node->[1] eq 'tr') {
5681     $i = $_;
5682     last INSCOPE;
5683     } elsif ({
5684     table => 1, html => 1,
5685     }->{$node->[1]}) {
5686     last INSCOPE;
5687     }
5688     } # INSCOPE
5689     unless (defined $i) {
5690 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{type});
5691 wakaba 1.1 ## Ignore the token
5692     $token = $self->_get_next_token;
5693     redo B;
5694     }
5695    
5696     ## Clear back to table row context
5697     while (not {
5698     tr => 1, html => 1,
5699 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5700     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5701     pop @{$self->{open_elements}};
5702 wakaba 1.1 }
5703    
5704 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5705     $self->{insertion_mode} = 'in table body';
5706 wakaba 1.1 ## reprocess
5707     redo B;
5708     } elsif ({
5709     tbody => 1, tfoot => 1, thead => 1,
5710     }->{$token->{tag_name}}) {
5711     ## have an element in table scope
5712     my $i;
5713 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5714     my $node = $self->{open_elements}->[$_];
5715 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5716     $i = $_;
5717     last INSCOPE;
5718     } elsif ({
5719     table => 1, html => 1,
5720     }->{$node->[1]}) {
5721     last INSCOPE;
5722     }
5723     } # INSCOPE
5724     unless (defined $i) {
5725 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5726 wakaba 1.1 ## Ignore the token
5727     $token = $self->_get_next_token;
5728     redo B;
5729     }
5730    
5731     ## As if </tr>
5732     ## have an element in table scope
5733     my $i;
5734 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5735     my $node = $self->{open_elements}->[$_];
5736 wakaba 1.1 if ($node->[1] eq 'tr') {
5737     $i = $_;
5738     last INSCOPE;
5739     } elsif ({
5740     table => 1, html => 1,
5741     }->{$node->[1]}) {
5742     last INSCOPE;
5743     }
5744     } # INSCOPE
5745     unless (defined $i) {
5746 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:tr');
5747 wakaba 1.1 ## Ignore the token
5748     $token = $self->_get_next_token;
5749     redo B;
5750     }
5751    
5752     ## Clear back to table row context
5753     while (not {
5754     tr => 1, html => 1,
5755 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5756     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5757     pop @{$self->{open_elements}};
5758 wakaba 1.1 }
5759    
5760 wakaba 1.3 pop @{$self->{open_elements}}; # tr
5761     $self->{insertion_mode} = 'in table body';
5762 wakaba 1.1 ## reprocess
5763     redo B;
5764     } elsif ({
5765     body => 1, caption => 1, col => 1,
5766     colgroup => 1, html => 1, td => 1, th => 1,
5767     }->{$token->{tag_name}}) {
5768 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5769 wakaba 1.1 ## Ignore the token
5770     $token = $self->_get_next_token;
5771     redo B;
5772     } else {
5773     #
5774     }
5775     } else {
5776     #
5777     }
5778    
5779     ## As if in table
5780 wakaba 1.3 $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
5781 wakaba 1.1 $in_body->($insert_to_foster);
5782     redo B;
5783 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
5784 wakaba 1.1 if ($token->{type} eq 'character') {
5785     ## NOTE: This is a code clone of "character in body".
5786     $reconstruct_active_formatting_elements->($insert_to_current);
5787    
5788 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5789 wakaba 1.1
5790     $token = $self->_get_next_token;
5791     redo B;
5792     } elsif ($token->{type} eq 'comment') {
5793     ## NOTE: This is a code clone of "comment in body".
5794     my $comment = $self->{document}->create_comment ($token->{data});
5795 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5796 wakaba 1.1 $token = $self->_get_next_token;
5797     redo B;
5798     } elsif ($token->{type} eq 'start tag') {
5799     if ({
5800     caption => 1, col => 1, colgroup => 1,
5801     tbody => 1, td => 1, tfoot => 1, th => 1,
5802     thead => 1, tr => 1,
5803     }->{$token->{tag_name}}) {
5804     ## have an element in table scope
5805     my $tn;
5806 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5807     my $node = $self->{open_elements}->[$_];
5808 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
5809     $tn = $node->[1];
5810     last INSCOPE;
5811     } elsif ({
5812     table => 1, html => 1,
5813     }->{$node->[1]}) {
5814     last INSCOPE;
5815     }
5816     } # INSCOPE
5817     unless (defined $tn) {
5818 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5819 wakaba 1.1 ## Ignore the token
5820     $token = $self->_get_next_token;
5821     redo B;
5822     }
5823    
5824     ## Close the cell
5825     unshift @{$self->{token}}, $token; # <?>
5826     $token = {type => 'end tag', tag_name => $tn};
5827     redo B;
5828     } else {
5829     #
5830     }
5831     } elsif ($token->{type} eq 'end tag') {
5832     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
5833     ## have an element in table scope
5834     my $i;
5835 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5836     my $node = $self->{open_elements}->[$_];
5837 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5838     $i = $_;
5839     last INSCOPE;
5840     } elsif ({
5841     table => 1, html => 1,
5842     }->{$node->[1]}) {
5843     last INSCOPE;
5844     }
5845     } # INSCOPE
5846     unless (defined $i) {
5847 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5848 wakaba 1.1 ## Ignore the token
5849     $token = $self->_get_next_token;
5850     redo B;
5851     }
5852    
5853     ## generate implied end tags
5854     if ({
5855     dd => 1, dt => 1, li => 1, p => 1,
5856     td => ($token->{tag_name} eq 'th'),
5857     th => ($token->{tag_name} eq 'td'),
5858     tr => 1,
5859 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
5860 wakaba 1.1 unshift @{$self->{token}}, $token;
5861     $token = {type => 'end tag',
5862 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
5863 wakaba 1.1 redo B;
5864     }
5865    
5866 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
5867     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
5868 wakaba 1.1 }
5869    
5870 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5871 wakaba 1.1
5872     $clear_up_to_marker->();
5873    
5874 wakaba 1.3 $self->{insertion_mode} = 'in row';
5875 wakaba 1.1
5876     $token = $self->_get_next_token;
5877     redo B;
5878     } elsif ({
5879     body => 1, caption => 1, col => 1,
5880     colgroup => 1, html => 1,
5881     }->{$token->{tag_name}}) {
5882 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5883 wakaba 1.1 ## Ignore the token
5884     $token = $self->_get_next_token;
5885     redo B;
5886     } elsif ({
5887     table => 1, tbody => 1, tfoot => 1,
5888     thead => 1, tr => 1,
5889     }->{$token->{tag_name}}) {
5890     ## have an element in table scope
5891     my $i;
5892     my $tn;
5893 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5894     my $node = $self->{open_elements}->[$_];
5895 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
5896     $i = $_;
5897     last INSCOPE;
5898     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
5899     $tn = $node->[1];
5900     ## NOTE: There is exactly one |td| or |th| element
5901     ## in scope in the stack of open elements by definition.
5902     } elsif ({
5903     table => 1, html => 1,
5904     }->{$node->[1]}) {
5905     last INSCOPE;
5906     }
5907     } # INSCOPE
5908     unless (defined $i) {
5909 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5910 wakaba 1.1 ## Ignore the token
5911     $token = $self->_get_next_token;
5912     redo B;
5913     }
5914    
5915     ## Close the cell
5916     unshift @{$self->{token}}, $token; # </?>
5917     $token = {type => 'end tag', tag_name => $tn};
5918     redo B;
5919     } else {
5920     #
5921     }
5922     } else {
5923     #
5924     }
5925    
5926     $in_body->($insert_to_current);
5927     redo B;
5928 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
5929 wakaba 1.1 if ($token->{type} eq 'character') {
5930 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5931 wakaba 1.1 $token = $self->_get_next_token;
5932     redo B;
5933     } elsif ($token->{type} eq 'comment') {
5934     my $comment = $self->{document}->create_comment ($token->{data});
5935 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5936 wakaba 1.1 $token = $self->_get_next_token;
5937     redo B;
5938     } elsif ($token->{type} eq 'start tag') {
5939     if ($token->{tag_name} eq 'option') {
5940 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
5941 wakaba 1.1 ## As if </option>
5942 wakaba 1.3 pop @{$self->{open_elements}};
5943 wakaba 1.1 }
5944    
5945    
5946     {
5947     my $el;
5948    
5949     $el = $self->{document}->create_element_ns
5950     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5951    
5952     for my $attr_name (keys %{ $token->{attributes}}) {
5953     $el->set_attribute_ns (undef, [undef, $attr_name],
5954     $token->{attributes} ->{$attr_name}->{value});
5955     }
5956    
5957 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5958     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5959 wakaba 1.1 }
5960    
5961     $token = $self->_get_next_token;
5962     redo B;
5963     } elsif ($token->{tag_name} eq 'optgroup') {
5964 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
5965 wakaba 1.1 ## As if </option>
5966 wakaba 1.3 pop @{$self->{open_elements}};
5967 wakaba 1.1 }
5968    
5969 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
5970 wakaba 1.1 ## As if </optgroup>
5971 wakaba 1.3 pop @{$self->{open_elements}};
5972 wakaba 1.1 }
5973    
5974    
5975     {
5976     my $el;
5977    
5978     $el = $self->{document}->create_element_ns
5979     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5980    
5981     for my $attr_name (keys %{ $token->{attributes}}) {
5982     $el->set_attribute_ns (undef, [undef, $attr_name],
5983     $token->{attributes} ->{$attr_name}->{value});
5984     }
5985    
5986 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5987     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5988 wakaba 1.1 }
5989    
5990     $token = $self->_get_next_token;
5991     redo B;
5992     } elsif ($token->{tag_name} eq 'select') {
5993 wakaba 1.3 $self->{parse_error}-> (type => 'not closed:select');
5994 wakaba 1.1 ## As if </select> instead
5995     ## have an element in table scope
5996     my $i;
5997 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5998     my $node = $self->{open_elements}->[$_];
5999 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6000     $i = $_;
6001     last INSCOPE;
6002     } elsif ({
6003     table => 1, html => 1,
6004     }->{$node->[1]}) {
6005     last INSCOPE;
6006     }
6007     } # INSCOPE
6008     unless (defined $i) {
6009 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:select');
6010 wakaba 1.1 ## Ignore the token
6011     $token = $self->_get_next_token;
6012     redo B;
6013     }
6014    
6015 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6016 wakaba 1.1
6017 wakaba 1.3 $self->_reset_insertion_mode;
6018 wakaba 1.1
6019     $token = $self->_get_next_token;
6020     redo B;
6021     } else {
6022     #
6023     }
6024     } elsif ($token->{type} eq 'end tag') {
6025     if ($token->{tag_name} eq 'optgroup') {
6026 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
6027     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
6028 wakaba 1.1 ## As if </option>
6029 wakaba 1.3 splice @{$self->{open_elements}}, -2;
6030     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
6031     pop @{$self->{open_elements}};
6032 wakaba 1.1 } else {
6033 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6034 wakaba 1.1 ## Ignore the token
6035     }
6036     $token = $self->_get_next_token;
6037     redo B;
6038     } elsif ($token->{tag_name} eq 'option') {
6039 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
6040     pop @{$self->{open_elements}};
6041 wakaba 1.1 } else {
6042 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6043 wakaba 1.1 ## Ignore the token
6044     }
6045     $token = $self->_get_next_token;
6046     redo B;
6047     } elsif ($token->{tag_name} eq 'select') {
6048     ## have an element in table scope
6049     my $i;
6050 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6051     my $node = $self->{open_elements}->[$_];
6052 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6053     $i = $_;
6054     last INSCOPE;
6055     } elsif ({
6056     table => 1, html => 1,
6057     }->{$node->[1]}) {
6058     last INSCOPE;
6059     }
6060     } # INSCOPE
6061     unless (defined $i) {
6062 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6063 wakaba 1.1 ## Ignore the token
6064     $token = $self->_get_next_token;
6065     redo B;
6066     }
6067    
6068 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6069 wakaba 1.1
6070 wakaba 1.3 $self->_reset_insertion_mode;
6071 wakaba 1.1
6072     $token = $self->_get_next_token;
6073     redo B;
6074     } elsif ({
6075     caption => 1, table => 1, tbody => 1,
6076     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6077     }->{$token->{tag_name}}) {
6078 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6079 wakaba 1.1
6080     ## have an element in table scope
6081     my $i;
6082 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6083     my $node = $self->{open_elements}->[$_];
6084 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
6085     $i = $_;
6086     last INSCOPE;
6087     } elsif ({
6088     table => 1, html => 1,
6089     }->{$node->[1]}) {
6090     last INSCOPE;
6091     }
6092     } # INSCOPE
6093     unless (defined $i) {
6094     ## Ignore the token
6095     $token = $self->_get_next_token;
6096     redo B;
6097     }
6098    
6099     ## As if </select>
6100     ## have an element in table scope
6101     undef $i;
6102 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6103     my $node = $self->{open_elements}->[$_];
6104 wakaba 1.1 if ($node->[1] eq 'select') {
6105     $i = $_;
6106     last INSCOPE;
6107     } elsif ({
6108     table => 1, html => 1,
6109     }->{$node->[1]}) {
6110     last INSCOPE;
6111     }
6112     } # INSCOPE
6113     unless (defined $i) {
6114 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:select');
6115 wakaba 1.1 ## Ignore the </select> token
6116     $token = $self->_get_next_token; ## TODO: ok?
6117     redo B;
6118     }
6119    
6120 wakaba 1.3 splice @{$self->{open_elements}}, $i;
6121 wakaba 1.1
6122 wakaba 1.3 $self->_reset_insertion_mode;
6123 wakaba 1.1
6124     ## reprocess
6125     redo B;
6126     } else {
6127     #
6128     }
6129     } else {
6130     #
6131     }
6132    
6133 wakaba 1.3 $self->{parse_error}-> (type => 'in select:'.$token->{tag_name});
6134 wakaba 1.1 ## Ignore the token
6135     $token = $self->_get_next_token;
6136     redo B;
6137 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
6138 wakaba 1.1 if ($token->{type} eq 'character') {
6139     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6140     ## As if in body
6141     $reconstruct_active_formatting_elements->($insert_to_current);
6142    
6143 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6144 wakaba 1.1
6145     unless (length $token->{data}) {
6146     $token = $self->_get_next_token;
6147     redo B;
6148     }
6149     }
6150    
6151     #
6152 wakaba 1.3 $self->{parse_error}-> (type => 'after body:#'.$token->{type});
6153 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
6154     my $comment = $self->{document}->create_comment ($token->{data});
6155 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
6156 wakaba 1.1 $token = $self->_get_next_token;
6157     redo B;
6158 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
6159     $self->{parse_error}-> (type => 'after body:'.$token->{tag_name});
6160     #
6161 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
6162     if ($token->{tag_name} eq 'html') {
6163 wakaba 1.3 if (defined $self->{inner_html_node}) {
6164     $self->{parse_error}-> (type => 'unmatched end tag:html');
6165     ## Ignore the token
6166     $token = $self->_get_next_token;
6167     redo B;
6168     } else {
6169     $phase = 'trailing end';
6170     $token = $self->_get_next_token;
6171     redo B;
6172     }
6173 wakaba 1.1 } else {
6174 wakaba 1.3 $self->{parse_error}-> (type => 'after body:/'.$token->{tag_name});
6175 wakaba 1.1 }
6176     } else {
6177 wakaba 1.3 $self->{parse_error}-> (type => 'after body:#'.$token->{type});
6178 wakaba 1.1 }
6179    
6180 wakaba 1.3 $self->{insertion_mode} = 'in body';
6181 wakaba 1.1 ## reprocess
6182     redo B;
6183 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
6184 wakaba 1.1 if ($token->{type} eq 'character') {
6185     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6186 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6187 wakaba 1.1
6188     unless (length $token->{data}) {
6189     $token = $self->_get_next_token;
6190     redo B;
6191     }
6192     }
6193    
6194     #
6195     } elsif ($token->{type} eq 'comment') {
6196     my $comment = $self->{document}->create_comment ($token->{data});
6197 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
6198 wakaba 1.1 $token = $self->_get_next_token;
6199     redo B;
6200     } elsif ($token->{type} eq 'start tag') {
6201     if ($token->{tag_name} eq 'frameset') {
6202    
6203     {
6204     my $el;
6205    
6206     $el = $self->{document}->create_element_ns
6207     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6208    
6209     for my $attr_name (keys %{ $token->{attributes}}) {
6210     $el->set_attribute_ns (undef, [undef, $attr_name],
6211     $token->{attributes} ->{$attr_name}->{value});
6212     }
6213    
6214 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6215     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6216 wakaba 1.1 }
6217    
6218     $token = $self->_get_next_token;
6219     redo B;
6220     } elsif ($token->{tag_name} eq 'frame') {
6221    
6222     {
6223     my $el;
6224    
6225     $el = $self->{document}->create_element_ns
6226     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6227    
6228     for my $attr_name (keys %{ $token->{attributes}}) {
6229     $el->set_attribute_ns (undef, [undef, $attr_name],
6230     $token->{attributes} ->{$attr_name}->{value});
6231     }
6232    
6233 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
6234     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6235 wakaba 1.1 }
6236    
6237 wakaba 1.3 pop @{$self->{open_elements}};
6238 wakaba 1.1 $token = $self->_get_next_token;
6239     redo B;
6240     } elsif ($token->{tag_name} eq 'noframes') {
6241     $in_body->($insert_to_current);
6242     redo B;
6243     } else {
6244     #
6245     }
6246     } elsif ($token->{type} eq 'end tag') {
6247     if ($token->{tag_name} eq 'frameset') {
6248 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
6249     @{$self->{open_elements}} == 1) {
6250     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6251 wakaba 1.1 ## Ignore the token
6252     $token = $self->_get_next_token;
6253     } else {
6254 wakaba 1.3 pop @{$self->{open_elements}};
6255 wakaba 1.1 $token = $self->_get_next_token;
6256     }
6257    
6258     ## if not inner_html and
6259 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
6260     $self->{insertion_mode} = 'after frameset';
6261 wakaba 1.1 }
6262     redo B;
6263     } else {
6264     #
6265     }
6266     } else {
6267     #
6268     }
6269    
6270 wakaba 1.3 if (defined $token->{tag_name}) {
6271     $self->{parse_error}-> (type => 'in frameset:'.$token->{tag_name});
6272     } else {
6273     $self->{parse_error}-> (type => 'in frameset:#'.$token->{type});
6274     }
6275 wakaba 1.1 ## Ignore the token
6276     $token = $self->_get_next_token;
6277     redo B;
6278 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
6279 wakaba 1.1 if ($token->{type} eq 'character') {
6280     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6281 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6282 wakaba 1.1
6283     unless (length $token->{data}) {
6284     $token = $self->_get_next_token;
6285     redo B;
6286     }
6287     }
6288    
6289     #
6290     } elsif ($token->{type} eq 'comment') {
6291     my $comment = $self->{document}->create_comment ($token->{data});
6292 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
6293 wakaba 1.1 $token = $self->_get_next_token;
6294     redo B;
6295     } elsif ($token->{type} eq 'start tag') {
6296     if ($token->{tag_name} eq 'noframes') {
6297     $in_body->($insert_to_current);
6298     redo B;
6299     } else {
6300     #
6301     }
6302     } elsif ($token->{type} eq 'end tag') {
6303     if ($token->{tag_name} eq 'html') {
6304     $phase = 'trailing end';
6305     $token = $self->_get_next_token;
6306     redo B;
6307     } else {
6308     #
6309     }
6310     } else {
6311     #
6312     }
6313    
6314 wakaba 1.3 if (defined $token->{tag_name}) {
6315     $self->{parse_error}-> (type => 'after frameset:'.$token->{tag_name});
6316     } else {
6317     $self->{parse_error}-> (type => 'after frameset:#'.$token->{type});
6318     }
6319 wakaba 1.1 ## Ignore the token
6320     $token = $self->_get_next_token;
6321     redo B;
6322    
6323     ## ISSUE: An issue in spec there
6324     } else {
6325 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
6326 wakaba 1.1 }
6327     }
6328     } elsif ($phase eq 'trailing end') {
6329     ## states in the main stage is preserved yet # MUST
6330    
6331     if ($token->{type} eq 'DOCTYPE') {
6332 wakaba 1.3 $self->{parse_error}-> (type => 'after html:#DOCTYPE');
6333 wakaba 1.1 ## Ignore the token
6334     $token = $self->_get_next_token;
6335     redo B;
6336     } elsif ($token->{type} eq 'comment') {
6337     my $comment = $self->{document}->create_comment ($token->{data});
6338     $self->{document}->append_child ($comment);
6339     $token = $self->_get_next_token;
6340     redo B;
6341     } elsif ($token->{type} eq 'character') {
6342     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6343     my $data = $1;
6344     ## As if in the main phase.
6345     ## NOTE: The insertion mode in the main phase
6346     ## just before the phase has been changed to the trailing
6347     ## end phase is either "after body" or "after frameset".
6348     $reconstruct_active_formatting_elements->($insert_to_current)
6349     if $phase eq 'main';
6350    
6351 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
6352 wakaba 1.1
6353     unless (length $token->{data}) {
6354     $token = $self->_get_next_token;
6355     redo B;
6356     }
6357     }
6358    
6359 wakaba 1.3 $self->{parse_error}-> (type => 'after html:#character');
6360 wakaba 1.1 $phase = 'main';
6361     ## reprocess
6362     redo B;
6363     } elsif ($token->{type} eq 'start tag' or
6364     $token->{type} eq 'end tag') {
6365 wakaba 1.3 $self->{parse_error}-> (type => 'after html:'.$token->{tag_name});
6366 wakaba 1.1 $phase = 'main';
6367     ## reprocess
6368     redo B;
6369     } elsif ($token->{type} eq 'end-of-file') {
6370     ## Stop parsing
6371     last B;
6372     } else {
6373     die "$0: $token->{type}: Unknown token";
6374     }
6375     }
6376     } # B
6377    
6378     ## Stop parsing # MUST
6379    
6380     ## TODO: script stuffs
6381 wakaba 1.3 } # _tree_construct_main
6382    
6383     sub set_inner_html ($$$) {
6384     my $class = shift;
6385     my $node = shift;
6386     my $s = \$_[0];
6387     my $onerror = $_[1];
6388    
6389     my $nt = $node->node_type;
6390     if ($nt == 9) {
6391     # MUST
6392    
6393     ## Step 1 # MUST
6394     ## TODO: If the document has an active parser, ...
6395     ## ISSUE: There is an issue in the spec.
6396    
6397     ## Step 2 # MUST
6398     my @cn = @{$node->child_nodes};
6399     for (@cn) {
6400     $node->remove_child ($_);
6401     }
6402    
6403     ## Step 3, 4, 5 # MUST
6404     $class->parse_string ($$s => $node, $onerror);
6405     } elsif ($nt == 1) {
6406     ## TODO: If non-html element
6407    
6408     ## NOTE: Most of this code is copied from |parse_string|
6409    
6410     ## Step 1 # MUST
6411 wakaba 1.14 my $this_doc = $node->owner_document;
6412     my $doc = $this_doc->implementation->create_document;
6413 wakaba 1.18 $doc->manakai_is_html (1);
6414 wakaba 1.3 my $p = $class->new;
6415     $p->{document} = $doc;
6416    
6417     ## Step 9 # MUST
6418     my $i = 0;
6419     my $line = 1;
6420     my $column = 0;
6421     $p->{set_next_input_character} = sub {
6422     my $self = shift;
6423 wakaba 1.14
6424     pop @{$self->{prev_input_character}};
6425     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
6426    
6427 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
6428     $self->{next_input_character} = ord substr $$s, $i++, 1;
6429     $column++;
6430 wakaba 1.4
6431     if ($self->{next_input_character} == 0x000A) { # LF
6432     $line++;
6433     $column = 0;
6434     } elsif ($self->{next_input_character} == 0x000D) { # CR
6435 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6436 wakaba 1.3 $self->{next_input_character} = 0x000A; # LF # MUST
6437     $line++;
6438 wakaba 1.4 $column = 0;
6439 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
6440     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6441     } elsif ($self->{next_input_character} == 0x0000) { # NULL
6442 wakaba 1.14 $self->{parse_error}-> (type => 'NULL');
6443 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6444     }
6445     };
6446 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
6447     $p->{next_input_character} = -1;
6448 wakaba 1.3
6449     my $ponerror = $onerror || sub {
6450     my (%opt) = @_;
6451     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
6452     };
6453     $p->{parse_error} = sub {
6454     $ponerror->(@_, line => $line, column => $column);
6455     };
6456    
6457     $p->_initialize_tokenizer;
6458     $p->_initialize_tree_constructor;
6459    
6460     ## Step 2
6461     my $node_ln = $node->local_name;
6462     $p->{content_model_flag} = {
6463     title => 'RCDATA',
6464     textarea => 'RCDATA',
6465     style => 'CDATA',
6466     script => 'CDATA',
6467     xmp => 'CDATA',
6468     iframe => 'CDATA',
6469     noembed => 'CDATA',
6470     noframes => 'CDATA',
6471     noscript => 'CDATA',
6472     plaintext => 'PLAINTEXT',
6473     }->{$node_ln} || 'PCDATA';
6474     ## ISSUE: What is "the name of the element"? local name?
6475    
6476     $p->{inner_html_node} = [$node, $node_ln];
6477    
6478     ## Step 4
6479     my $root = $doc->create_element_ns
6480     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6481    
6482     ## Step 5 # MUST
6483     $doc->append_child ($root);
6484    
6485     ## Step 6 # MUST
6486     push @{$p->{open_elements}}, [$root, 'html'];
6487    
6488     undef $p->{head_element};
6489    
6490     ## Step 7 # MUST
6491     $p->_reset_insertion_mode;
6492    
6493     ## Step 8 # MUST
6494     my $anode = $node;
6495     AN: while (defined $anode) {
6496     if ($anode->node_type == 1) {
6497     my $nsuri = $anode->namespace_uri;
6498     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6499     if ($anode->local_name eq 'form') { ## TODO: case?
6500     $p->{form_element} = $anode;
6501     last AN;
6502     }
6503     }
6504     }
6505     $anode = $anode->parent_node;
6506     } # AN
6507    
6508     ## Step 3 # MUST
6509     ## Step 10 # MUST
6510     {
6511     my $self = $p;
6512     $token = $self->_get_next_token;
6513     }
6514     $p->_tree_construction_main;
6515    
6516     ## Step 11 # MUST
6517     my @cn = @{$node->child_nodes};
6518     for (@cn) {
6519     $node->remove_child ($_);
6520     }
6521     ## ISSUE: mutation events? read-only?
6522    
6523     ## Step 12 # MUST
6524     @cn = @{$root->child_nodes};
6525     for (@cn) {
6526 wakaba 1.14 $this_doc->adopt_node ($_);
6527 wakaba 1.3 $node->append_child ($_);
6528     }
6529 wakaba 1.14 ## ISSUE: mutation events?
6530 wakaba 1.3
6531     $p->_terminate_tree_constructor;
6532     } else {
6533     die "$0: |set_inner_html| is not defined for node of type $nt";
6534     }
6535     } # set_inner_html
6536    
6537     } # tree construction stage
6538 wakaba 1.1
6539     sub get_inner_html ($$$) {
6540 wakaba 1.3 my (undef, $node, $on_error) = @_;
6541 wakaba 1.1
6542     ## Step 1
6543     my $s = '';
6544    
6545     my $in_cdata;
6546     my $parent = $node;
6547     while (defined $parent) {
6548     if ($parent->node_type == 1 and
6549     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
6550     {
6551     style => 1, script => 1, xmp => 1, iframe => 1,
6552     noembed => 1, noframes => 1, noscript => 1,
6553     }->{$parent->local_name}) { ## TODO: case thingy
6554     $in_cdata = 1;
6555     }
6556     $parent = $parent->parent_node;
6557     }
6558    
6559     ## Step 2
6560     my @node = @{$node->child_nodes};
6561     C: while (@node) {
6562     my $child = shift @node;
6563     unless (ref $child) {
6564     if ($child eq 'cdata-out') {
6565     $in_cdata = 0;
6566     } else {
6567     $s .= $child; # end tag
6568     }
6569     next C;
6570     }
6571    
6572     my $nt = $child->node_type;
6573     if ($nt == 1) { # Element
6574     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
6575     $s .= '<' . $tag_name;
6576    
6577     ## ISSUE: Non-html elements
6578    
6579     my @attrs = @{$child->attributes}; # sort order MUST be stable
6580     for my $attr (@attrs) { # order is implementation dependent
6581     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
6582     $s .= ' ' . $attr_name . '="';
6583     my $attr_value = $attr->value;
6584     ## escape
6585     $attr_value =~ s/&/&amp;/g;
6586     $attr_value =~ s/</&lt;/g;
6587     $attr_value =~ s/>/&gt;/g;
6588     $attr_value =~ s/"/&quot;/g;
6589     $s .= $attr_value . '"';
6590     }
6591     $s .= '>';
6592    
6593     next C if {
6594     area => 1, base => 1, basefont => 1, bgsound => 1,
6595     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
6596     img => 1, input => 1, link => 1, meta => 1, param => 1,
6597     spacer => 1, wbr => 1,
6598     }->{$tag_name};
6599    
6600     if (not $in_cdata and {
6601     style => 1, script => 1, xmp => 1, iframe => 1,
6602     noembed => 1, noframes => 1, noscript => 1,
6603     }->{$tag_name}) {
6604     unshift @node, 'cdata-out';
6605     $in_cdata = 1;
6606     }
6607    
6608     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
6609     } elsif ($nt == 3 or $nt == 4) {
6610     if ($in_cdata) {
6611     $s .= $child->data;
6612     } else {
6613     my $value = $child->data;
6614     $value =~ s/&/&amp;/g;
6615     $value =~ s/</&lt;/g;
6616     $value =~ s/>/&gt;/g;
6617     $value =~ s/"/&quot;/g;
6618     $s .= $value;
6619     }
6620     } elsif ($nt == 8) {
6621     $s .= '<!--' . $child->data . '-->';
6622     } elsif ($nt == 10) {
6623     $s .= '<!DOCTYPE ' . $child->name . '>';
6624     } elsif ($nt == 5) { # entrefs
6625     push @node, @{$child->child_nodes};
6626     } else {
6627     $on_error->($child) if defined $on_error;
6628     }
6629     ## ISSUE: This code does not support PIs.
6630     } # C
6631    
6632     ## Step 3
6633     return \$s;
6634     } # get_inner_html
6635    
6636     1;
6637 wakaba 1.21 # $Date: 2007/06/23 14:25:05 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24