/[suikacvs]/markup/html/whatpm/What/HTML.pm.src
Suika

Contents of /markup/html/whatpm/What/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download) (as text)
Mon Apr 30 09:59:35 2007 UTC (17 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +5 -4 lines
File MIME type: application/x-wais-source
++ whatpm/What/ChangeLog	30 Apr 2007 09:59:16 -0000
	* HTML.pm.src: Some tokenizer bugs are fixed.

2007-04-30  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/t/ChangeLog	30 Apr 2007 09:58:16 -0000
2007-04-30  Wakaba  <wakaba@suika.fam.cx>

	* .cvsignore: New file.

	* Makefile: New file.

	* HTML-tokenizer.t: New test.

1 wakaba 1.1 package What::HTML;
2     use strict;
3 wakaba 1.3 our $VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is a very, very early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21 wakaba 1.2 my $special_category = {
22     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
23     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
24     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
25     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
26     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
27     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
28     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
29     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
30     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
31     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
32     };
33     my $scoping_category = {
34     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
35     table => 1, td => 1, th => 1,
36     };
37     my $formatting_category = {
38     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
39     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
40     };
41     # $phrasing_category: all other elements
42    
43 wakaba 1.1 sub new ($) {
44     my $class = shift;
45     my $self = bless {}, $class;
46     $self->{set_next_input_character} = sub {
47     $self->{next_input_character} = -1;
48     };
49     $self->{parse_error} = sub {
50     #
51     };
52     return $self;
53     } # new
54    
55     ## Implementations MUST act as if state machine in the spec
56    
57     sub _initialize_tokenizer ($) {
58     my $self = shift;
59     $self->{state} = 'data'; # MUST
60     $self->{content_model_flag} = 'PCDATA'; # be
61     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
62     undef $self->{current_attribute};
63     undef $self->{last_emitted_start_tag_name};
64     undef $self->{last_attribute_value_state};
65     $self->{char} = [];
66     # $self->{next_input_character}
67     !!!next-input-character;
68     $self->{token} = [];
69     } # _initialize_tokenizer
70    
71     ## A token has:
72     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
73     ## 'character', or 'end-of-file'
74     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
75     ## ISSUE: the spec need s/tagname/tag name/
76     ## ->{error} == 1 or 0 (DOCTYPE)
77     ## ->{attributes} isa HASH (start tag, end tag)
78     ## ->{data} (comment, character)
79    
80     ## Macros
81     ## Macros MUST be preceded by three EXCLAMATION MARKs.
82     ## emit ($token)
83     ## Emits the specified token.
84    
85     ## Emitted token MUST immediately be handled by the tree construction state.
86    
87     ## Before each step, UA MAY check to see if either one of the scripts in
88     ## "list of scripts that will execute as soon as possible" or the first
89     ## script in the "list of scripts that will execute asynchronously",
90     ## has completed loading. If one has, then it MUST be executed
91     ## and removed from the list.
92    
93     sub _get_next_token ($) {
94     my $self = shift;
95     if (@{$self->{token}}) {
96     return shift @{$self->{token}};
97     }
98    
99     A: {
100     if ($self->{state} eq 'data') {
101     if ($self->{next_input_character} == 0x0026) { # &
102     if ($self->{content_model_flag} eq 'PCDATA' or
103     $self->{content_model_flag} eq 'RCDATA') {
104     $self->{state} = 'entity data';
105     !!!next-input-character;
106     redo A;
107     } else {
108     #
109     }
110     } elsif ($self->{next_input_character} == 0x003C) { # <
111     if ($self->{content_model_flag} ne 'PLAINTEXT') {
112     $self->{state} = 'tag open';
113     !!!next-input-character;
114     redo A;
115     } else {
116     #
117     }
118     } elsif ($self->{next_input_character} == -1) {
119     !!!emit ({type => 'end-of-file'});
120     last A; ## TODO: ok?
121     }
122     # Anything else
123     my $token = {type => 'character',
124     data => chr $self->{next_input_character}};
125     ## Stay in the data state
126     !!!next-input-character;
127    
128     !!!emit ($token);
129    
130     redo A;
131     } elsif ($self->{state} eq 'entity data') {
132     ## (cannot happen in CDATA state)
133    
134     my $token = $self->_tokenize_attempt_to_consume_an_entity;
135    
136     $self->{state} = 'data';
137     # next-input-character is already done
138    
139     unless (defined $token) {
140     !!!emit ({type => 'character', data => '&'});
141     } else {
142     !!!emit ($token);
143     }
144    
145     redo A;
146     } elsif ($self->{state} eq 'tag open') {
147     if ($self->{content_model_flag} eq 'RCDATA' or
148     $self->{content_model_flag} eq 'CDATA') {
149     if ($self->{next_input_character} == 0x002F) { # /
150     !!!next-input-character;
151     $self->{state} = 'close tag open';
152     redo A;
153     } else {
154     ## reconsume
155     $self->{state} = 'data';
156    
157     !!!emit (type => 'character', data => {'/'});
158    
159     redo A;
160     }
161     } elsif ($self->{content_model_flag} eq 'PCDATA') {
162     if ($self->{next_input_character} == 0x0021) { # !
163     $self->{state} = 'markup declaration open';
164     !!!next-input-character;
165     redo A;
166     } elsif ($self->{next_input_character} == 0x002F) { # /
167     $self->{state} = 'close tag open';
168     !!!next-input-character;
169     redo A;
170     } elsif (0x0041 <= $self->{next_input_character} and
171     $self->{next_input_character} <= 0x005A) { # A..Z
172     $self->{current_token}
173     = {type => 'start tag',
174     tag_name => chr ($self->{next_input_character} + 0x0020)};
175     $self->{state} = 'tag name';
176     !!!next-input-character;
177     redo A;
178     } elsif (0x0061 <= $self->{next_input_character} and
179     $self->{next_input_character} <= 0x007A) { # a..z
180     $self->{current_token} = {type => 'start tag',
181     tag_name => chr ($self->{next_input_character})};
182     $self->{state} = 'tag name';
183     !!!next-input-character;
184     redo A;
185     } elsif ($self->{next_input_character} == 0x003E) { # >
186     !!!parse-error;
187     $self->{state} = 'data';
188     !!!next-input-character;
189    
190 wakaba 1.3 !!!emit ({type => 'character', data => '<>'});
191 wakaba 1.1
192     redo A;
193     } elsif ($self->{next_input_character} == 0x003F) { # ?
194     !!!parse-error;
195     $self->{state} = 'bogus comment';
196     ## $self->{next_input_character} is intentionally left as is
197     redo A;
198     } else {
199     !!!parse-error;
200     $self->{state} = 'data';
201     ## reconsume
202    
203     !!!emit ({type => 'character', data => '<'});
204    
205     redo A;
206     }
207     } else {
208     die "$0: $self->{content_model_flag}: Unknown content model flag";
209     }
210     } elsif ($self->{state} eq 'close tag open') {
211     if ($self->{content_model_flag} eq 'RCDATA' or
212     $self->{content_model_flag} eq 'CDATA') {
213     my @next_char;
214     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
215     push @next_char, $self->{next_input_character};
216     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
217     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
218     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
219     !!!next-input-character;
220     next TAGNAME;
221     } else {
222     !!!parse-error;
223     $self->{next_input_character} = shift @next_char; # reconsume
224     !!!back-next-input-character (@next_char);
225     $self->{state} = 'data';
226    
227     !!!emit ({type => 'character', data => '</'});
228    
229     redo A;
230     }
231     }
232 wakaba 1.2 push @next_char, $self->{next_input_character};
233 wakaba 1.1
234 wakaba 1.2 unless ($self->{next_input_character} == 0x0009 or # HT
235     $self->{next_input_character} == 0x000A or # LF
236     $self->{next_input_character} == 0x000B or # VT
237     $self->{next_input_character} == 0x000C or # FF
238     $self->{next_input_character} == 0x0020 or # SP
239     $self->{next_input_character} == 0x003E or # >
240     $self->{next_input_character} == 0x002F or # /
241     $self->{next_input_character} == 0x003C or # <
242 wakaba 1.1 $self->{next_input_character} == -1) {
243     !!!parse-error;
244     $self->{next_input_character} = shift @next_char; # reconsume
245     !!!back-next-input-character (@next_char);
246     $self->{state} = 'data';
247    
248     !!!emit ({type => 'character', data => '</'});
249    
250     redo A;
251     } else {
252     $self->{next_input_character} = shift @next_char;
253     !!!back-next-input-character (@next_char);
254     # and consume...
255     }
256     }
257    
258     if (0x0041 <= $self->{next_input_character} and
259     $self->{next_input_character} <= 0x005A) { # A..Z
260     $self->{current_token} = {type => 'end tag',
261     tag_name => chr ($self->{next_input_character} + 0x0020)};
262     $self->{state} = 'tag name';
263     !!!next-input-character;
264     redo A;
265     } elsif (0x0061 <= $self->{next_input_character} and
266     $self->{next_input_character} <= 0x007A) { # a..z
267     $self->{current_token} = {type => 'end tag',
268     tag_name => chr ($self->{next_input_character})};
269     $self->{state} = 'tag name';
270     !!!next-input-character;
271     redo A;
272     } elsif ($self->{next_input_character} == 0x003E) { # >
273     !!!parse-error;
274     $self->{state} = 'data';
275     !!!next-input-character;
276     redo A;
277     } elsif ($self->{next_input_character} == -1) {
278     !!!parse-error;
279     $self->{state} = 'data';
280     # reconsume
281    
282     !!!emit ({type => 'character', data => '</'});
283    
284     redo A;
285     } else {
286     !!!parse-error;
287     $self->{state} = 'bogus comment';
288     ## $self->{next_input_character} is intentionally left as is
289     redo A;
290     }
291     } elsif ($self->{state} eq 'tag name') {
292     if ($self->{next_input_character} == 0x0009 or # HT
293     $self->{next_input_character} == 0x000A or # LF
294     $self->{next_input_character} == 0x000B or # VT
295     $self->{next_input_character} == 0x000C or # FF
296     $self->{next_input_character} == 0x0020) { # SP
297     $self->{state} = 'before attribute name';
298     !!!next-input-character;
299     redo A;
300     } elsif ($self->{next_input_character} == 0x003E) { # >
301     if ($self->{current_token}->{type} eq 'start tag') {
302     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
303     } elsif ($self->{current_token}->{type} eq 'end tag') {
304     $self->{content_model_flag} = 'PCDATA'; # MUST
305 wakaba 1.2 if ($self->{current_token}->{attributes}) {
306 wakaba 1.1 !!!parse-error;
307     }
308     } else {
309     die "$0: $self->{current_token}->{type}: Unknown token type";
310     }
311     $self->{state} = 'data';
312     !!!next-input-character;
313    
314     !!!emit ($self->{current_token}); # start tag or end tag
315     undef $self->{current_token};
316    
317     redo A;
318     } elsif (0x0041 <= $self->{next_input_character} and
319     $self->{next_input_character} <= 0x005A) { # A..Z
320     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
321     # start tag or end tag
322     ## Stay in this state
323     !!!next-input-character;
324     redo A;
325     } elsif ($self->{next_input_character} == 0x003C or # <
326     $self->{next_input_character} == -1) {
327     !!!parse-error;
328     if ($self->{current_token}->{type} eq 'start tag') {
329     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
330     } elsif ($self->{current_token}->{type} eq 'end tag') {
331     $self->{content_model_flag} = 'PCDATA'; # MUST
332 wakaba 1.2 if ($self->{current_token}->{attributes}) {
333 wakaba 1.1 !!!parse-error;
334     }
335     } else {
336     die "$0: $self->{current_token}->{type}: Unknown token type";
337     }
338     $self->{state} = 'data';
339     # reconsume
340    
341     !!!emit ($self->{current_token}); # start tag or end tag
342     undef $self->{current_token};
343    
344     redo A;
345     } elsif ($self->{next_input_character} == 0x002F) { # /
346     !!!next-input-character;
347     if ($self->{next_input_character} == 0x003E and # >
348     $self->{current_token}->{type} eq 'start tag' and
349     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
350     # permitted slash
351     #
352     } else {
353     !!!parse-error;
354     }
355     $self->{state} = 'before attribute name';
356     # next-input-character is already done
357     redo A;
358     } else {
359     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
360     # start tag or end tag
361     ## Stay in the state
362     !!!next-input-character;
363     redo A;
364     }
365     } elsif ($self->{state} eq 'before attribute name') {
366     if ($self->{next_input_character} == 0x0009 or # HT
367     $self->{next_input_character} == 0x000A or # LF
368     $self->{next_input_character} == 0x000B or # VT
369     $self->{next_input_character} == 0x000C or # FF
370     $self->{next_input_character} == 0x0020) { # SP
371     ## Stay in the state
372     !!!next-input-character;
373     redo A;
374     } elsif ($self->{next_input_character} == 0x003E) { # >
375     if ($self->{current_token}->{type} eq 'start tag') {
376     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
377     } elsif ($self->{current_token}->{type} eq 'end tag') {
378     $self->{content_model_flag} = 'PCDATA'; # MUST
379 wakaba 1.2 if ($self->{current_token}->{attributes}) {
380 wakaba 1.1 !!!parse-error;
381     }
382     } else {
383     die "$0: $self->{current_token}->{type}: Unknown token type";
384     }
385     $self->{state} = 'data';
386     !!!next-input-character;
387    
388     !!!emit ($self->{current_token}); # start tag or end tag
389     undef $self->{current_token};
390    
391     redo A;
392     } elsif (0x0041 <= $self->{next_input_character} and
393     $self->{next_input_character} <= 0x005A) { # A..Z
394     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
395     value => ''};
396     $self->{state} = 'attribute name';
397     !!!next-input-character;
398     redo A;
399     } elsif ($self->{next_input_character} == 0x002F) { # /
400     !!!next-input-character;
401     if ($self->{next_input_character} == 0x003E and # >
402     $self->{current_token}->{type} eq 'start tag' and
403     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
404     # permitted slash
405     #
406     } else {
407     !!!parse-error;
408     }
409     ## Stay in the state
410     # next-input-character is already done
411     redo A;
412     } elsif ($self->{next_input_character} == 0x003C or # <
413     $self->{next_input_character} == -1) {
414     !!!parse-error;
415     if ($self->{current_token}->{type} eq 'start tag') {
416     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
417     } elsif ($self->{current_token}->{type} eq 'end tag') {
418     $self->{content_model_flag} = 'PCDATA'; # MUST
419 wakaba 1.2 if ($self->{current_token}->{attributes}) {
420 wakaba 1.1 !!!parse-error;
421     }
422     } else {
423     die "$0: $self->{current_token}->{type}: Unknown token type";
424     }
425     $self->{state} = 'data';
426     # reconsume
427    
428     !!!emit ($self->{current_token}); # start tag or end tag
429     undef $self->{current_token};
430    
431     redo A;
432     } else {
433     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
434     value => ''};
435     $self->{state} = 'attribute name';
436     !!!next-input-character;
437     redo A;
438     }
439     } elsif ($self->{state} eq 'attribute name') {
440     my $before_leave = sub {
441 wakaba 1.2 if (exists $self->{current_token}->{attributes} # start tag or end tag
442 wakaba 1.1 ->{$self->{current_attribute}->{name}}) { # MUST
443     !!!parse-error;
444     ## Discard $self->{current_attribute} # MUST
445     } else {
446 wakaba 1.2 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
447 wakaba 1.1 = $self->{current_attribute};
448     }
449     }; # $before_leave
450    
451     if ($self->{next_input_character} == 0x0009 or # HT
452     $self->{next_input_character} == 0x000A or # LF
453     $self->{next_input_character} == 0x000B or # VT
454     $self->{next_input_character} == 0x000C or # FF
455     $self->{next_input_character} == 0x0020) { # SP
456     $before_leave->();
457     $self->{state} = 'after attribute name';
458     !!!next-input-character;
459     redo A;
460     } elsif ($self->{next_input_character} == 0x003D) { # =
461     $before_leave->();
462     $self->{state} = 'before attribute value';
463     !!!next-input-character;
464     redo A;
465     } elsif ($self->{next_input_character} == 0x003E) { # >
466     $before_leave->();
467     if ($self->{current_token}->{type} eq 'start tag') {
468     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
469     } elsif ($self->{current_token}->{type} eq 'end tag') {
470     $self->{content_model_flag} = 'PCDATA'; # MUST
471 wakaba 1.2 if ($self->{current_token}->{attributes}) {
472 wakaba 1.1 !!!parse-error;
473     }
474     } else {
475     die "$0: $self->{current_token}->{type}: Unknown token type";
476     }
477     $self->{state} = 'data';
478     !!!next-input-character;
479    
480     !!!emit ($self->{current_token}); # start tag or end tag
481     undef $self->{current_token};
482    
483     redo A;
484     } elsif (0x0041 <= $self->{next_input_character} and
485     $self->{next_input_character} <= 0x005A) { # A..Z
486     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
487     ## Stay in the state
488     !!!next-input-character;
489     redo A;
490     } elsif ($self->{next_input_character} == 0x002F) { # /
491     $before_leave->();
492     !!!next-input-character;
493     if ($self->{next_input_character} == 0x003E and # >
494     $self->{current_token}->{type} eq 'start tag' and
495     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
496     # permitted slash
497     #
498     } else {
499     !!!parse-error;
500     }
501     $self->{state} = 'before attribute name';
502     # next-input-character is already done
503     redo A;
504     } elsif ($self->{next_input_character} == 0x003C or # <
505     $self->{next_input_character} == -1) {
506     !!!parse-error;
507     $before_leave->();
508     if ($self->{current_token}->{type} eq 'start tag') {
509     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
510     } elsif ($self->{current_token}->{type} eq 'end tag') {
511     $self->{content_model_flag} = 'PCDATA'; # MUST
512 wakaba 1.2 if ($self->{current_token}->{attributes}) {
513 wakaba 1.1 !!!parse-error;
514     }
515     } else {
516     die "$0: $self->{current_token}->{type}: Unknown token type";
517     }
518     $self->{state} = 'data';
519     # reconsume
520    
521     !!!emit ($self->{current_token}); # start tag or end tag
522     undef $self->{current_token};
523    
524     redo A;
525     } else {
526     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
527     ## Stay in the state
528     !!!next-input-character;
529     redo A;
530     }
531     } elsif ($self->{state} eq 'after attribute name') {
532     if ($self->{next_input_character} == 0x0009 or # HT
533     $self->{next_input_character} == 0x000A or # LF
534     $self->{next_input_character} == 0x000B or # VT
535     $self->{next_input_character} == 0x000C or # FF
536     $self->{next_input_character} == 0x0020) { # SP
537     ## Stay in the state
538     !!!next-input-character;
539     redo A;
540     } elsif ($self->{next_input_character} == 0x003D) { # =
541     $self->{state} = 'before attribute value';
542     !!!next-input-character;
543     redo A;
544     } elsif ($self->{next_input_character} == 0x003E) { # >
545     if ($self->{current_token}->{type} eq 'start tag') {
546     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
547     } elsif ($self->{current_token}->{type} eq 'end tag') {
548     $self->{content_model_flag} = 'PCDATA'; # MUST
549 wakaba 1.2 if ($self->{current_token}->{attributes}) {
550 wakaba 1.1 !!!parse-error;
551     }
552     } else {
553     die "$0: $self->{current_token}->{type}: Unknown token type";
554     }
555     $self->{state} = 'data';
556     !!!next-input-character;
557    
558     !!!emit ($self->{current_token}); # start tag or end tag
559     undef $self->{current_token};
560    
561     redo A;
562     } elsif (0x0041 <= $self->{next_input_character} and
563     $self->{next_input_character} <= 0x005A) { # A..Z
564     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
565     value => ''};
566     $self->{state} = 'attribute name';
567     !!!next-input-character;
568     redo A;
569     } elsif ($self->{next_input_character} == 0x002F) { # /
570     !!!next-input-character;
571     if ($self->{next_input_character} == 0x003E and # >
572     $self->{current_token}->{type} eq 'start tag' and
573     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
574     # permitted slash
575     #
576     } else {
577     !!!parse-error;
578     }
579     $self->{state} = 'before attribute name';
580     # next-input-character is already done
581     redo A;
582     } elsif ($self->{next_input_character} == 0x003C or # <
583     $self->{next_input_character} == -1) {
584     !!!parse-error;
585     if ($self->{current_token}->{type} eq 'start tag') {
586     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
587     } elsif ($self->{current_token}->{type} eq 'end tag') {
588     $self->{content_model_flag} = 'PCDATA'; # MUST
589 wakaba 1.2 if ($self->{current_token}->{attributes}) {
590 wakaba 1.1 !!!parse-error;
591     }
592     } else {
593     die "$0: $self->{current_token}->{type}: Unknown token type";
594     }
595     $self->{state} = 'data';
596     # reconsume
597    
598     !!!emit ($self->{current_token}); # start tag or end tag
599     undef $self->{current_token};
600    
601     redo A;
602     } else {
603     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
604     value => ''};
605     $self->{state} = 'attribute name';
606     !!!next-input-character;
607     redo A;
608     }
609     } elsif ($self->{state} eq 'before attribute value') {
610     if ($self->{next_input_character} == 0x0009 or # HT
611     $self->{next_input_character} == 0x000A or # LF
612     $self->{next_input_character} == 0x000B or # VT
613     $self->{next_input_character} == 0x000C or # FF
614     $self->{next_input_character} == 0x0020) { # SP
615     ## Stay in the state
616     !!!next-input-character;
617     redo A;
618     } elsif ($self->{next_input_character} == 0x0022) { # "
619     $self->{state} = 'attribute value (double-quoted)';
620     !!!next-input-character;
621     redo A;
622     } elsif ($self->{next_input_character} == 0x0026) { # &
623     $self->{state} = 'attribute value (unquoted)';
624     ## reconsume
625     redo A;
626     } elsif ($self->{next_input_character} == 0x0027) { # '
627     $self->{state} = 'attribute value (single-quoted)';
628     !!!next-input-character;
629     redo A;
630     } elsif ($self->{next_input_character} == 0x003E) { # >
631     if ($self->{current_token}->{type} eq 'start tag') {
632     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
633     } elsif ($self->{current_token}->{type} eq 'end tag') {
634     $self->{content_model_flag} = 'PCDATA'; # MUST
635 wakaba 1.2 if ($self->{current_token}->{attributes}) {
636 wakaba 1.1 !!!parse-error;
637     }
638     } else {
639     die "$0: $self->{current_token}->{type}: Unknown token type";
640     }
641     $self->{state} = 'data';
642     !!!next-input-character;
643    
644     !!!emit ($self->{current_token}); # start tag or end tag
645     undef $self->{current_token};
646    
647     redo A;
648     } elsif ($self->{next_input_character} == 0x003C or # <
649     $self->{next_input_character} == -1) {
650     !!!parse-error;
651     if ($self->{current_token}->{type} eq 'start tag') {
652     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
653     } elsif ($self->{current_token}->{type} eq 'end tag') {
654     $self->{content_model_flag} = 'PCDATA'; # MUST
655 wakaba 1.2 if ($self->{current_token}->{attributes}) {
656 wakaba 1.1 !!!parse-error;
657     }
658     } else {
659     die "$0: $self->{current_token}->{type}: Unknown token type";
660     }
661     $self->{state} = 'data';
662     ## reconsume
663    
664     !!!emit ($self->{current_token}); # start tag or end tag
665     undef $self->{current_token};
666    
667     redo A;
668     } else {
669     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
670     $self->{state} = 'attribute value (unquoted)';
671     !!!next-input-character;
672     redo A;
673     }
674     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
675     if ($self->{next_input_character} == 0x0022) { # "
676     $self->{state} = 'before attribute name';
677     !!!next-input-character;
678     redo A;
679     } elsif ($self->{next_input_character} == 0x0026) { # &
680     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
681     $self->{state} = 'entity in attribute value';
682     !!!next-input-character;
683     redo A;
684     } elsif ($self->{next_input_character} == -1) {
685     !!!parse-error;
686     if ($self->{current_token}->{type} eq 'start tag') {
687     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
688     } elsif ($self->{current_token}->{type} eq 'end tag') {
689     $self->{content_model_flag} = 'PCDATA'; # MUST
690 wakaba 1.2 if ($self->{current_token}->{attributes}) {
691 wakaba 1.1 !!!parse-error;
692     }
693     } else {
694     die "$0: $self->{current_token}->{type}: Unknown token type";
695     }
696     $self->{state} = 'data';
697     ## reconsume
698    
699     !!!emit ($self->{current_token}); # start tag or end tag
700     undef $self->{current_token};
701    
702     redo A;
703     } else {
704     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
705     ## Stay in the state
706     !!!next-input-character;
707     redo A;
708     }
709     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
710     if ($self->{next_input_character} == 0x0027) { # '
711     $self->{state} = 'before attribute name';
712     !!!next-input-character;
713     redo A;
714     } elsif ($self->{next_input_character} == 0x0026) { # &
715     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
716     $self->{state} = 'entity in attribute value';
717     !!!next-input-character;
718     redo A;
719     } elsif ($self->{next_input_character} == -1) {
720     !!!parse-error;
721     if ($self->{current_token}->{type} eq 'start tag') {
722     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
723     } elsif ($self->{current_token}->{type} eq 'end tag') {
724     $self->{content_model_flag} = 'PCDATA'; # MUST
725 wakaba 1.2 if ($self->{current_token}->{attributes}) {
726 wakaba 1.1 !!!parse-error;
727     }
728     } else {
729     die "$0: $self->{current_token}->{type}: Unknown token type";
730     }
731     $self->{state} = 'data';
732     ## reconsume
733    
734     !!!emit ($self->{current_token}); # start tag or end tag
735     undef $self->{current_token};
736    
737     redo A;
738     } else {
739     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
740     ## Stay in the state
741     !!!next-input-character;
742     redo A;
743     }
744     } elsif ($self->{state} eq 'attribute value (unquoted)') {
745     if ($self->{next_input_character} == 0x0009 or # HT
746     $self->{next_input_character} == 0x000A or # LF
747     $self->{next_input_character} == 0x000B or # HT
748     $self->{next_input_character} == 0x000C or # FF
749     $self->{next_input_character} == 0x0020) { # SP
750     $self->{state} = 'before attribute name';
751     !!!next-input-character;
752     redo A;
753     } elsif ($self->{next_input_character} == 0x0026) { # &
754     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
755     $self->{state} = 'entity in attribute value';
756     !!!next-input-character;
757     redo A;
758     } elsif ($self->{next_input_character} == 0x003E) { # >
759     if ($self->{current_token}->{type} eq 'start tag') {
760     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
761     } elsif ($self->{current_token}->{type} eq 'end tag') {
762     $self->{content_model_flag} = 'PCDATA'; # MUST
763 wakaba 1.2 if ($self->{current_token}->{attributes}) {
764 wakaba 1.1 !!!parse-error;
765     }
766     } else {
767     die "$0: $self->{current_token}->{type}: Unknown token type";
768     }
769     $self->{state} = 'data';
770     !!!next-input-character;
771    
772     !!!emit ($self->{current_token}); # start tag or end tag
773     undef $self->{current_token};
774    
775     redo A;
776     } elsif ($self->{next_input_character} == 0x003C or # <
777     $self->{next_input_character} == -1) {
778     !!!parse-error;
779     if ($self->{current_token}->{type} eq 'start tag') {
780     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
781     } elsif ($self->{current_token}->{type} eq 'end tag') {
782     $self->{content_model_flag} = 'PCDATA'; # MUST
783 wakaba 1.2 if ($self->{current_token}->{attributes}) {
784 wakaba 1.1 !!!parse-error;
785     }
786     } else {
787     die "$0: $self->{current_token}->{type}: Unknown token type";
788     }
789     $self->{state} = 'data';
790     ## reconsume
791    
792     !!!emit ($self->{current_token}); # start tag or end tag
793     undef $self->{current_token};
794    
795     redo A;
796     } else {
797     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
798     ## Stay in the state
799     !!!next-input-character;
800     redo A;
801     }
802     } elsif ($self->{state} eq 'entity in attribute value') {
803     my $token = $self->_tokenize_attempt_to_consume_an_entity;
804    
805     unless (defined $token) {
806     $self->{current_attribute}->{value} .= '&';
807     } else {
808     $self->{current_attribute}->{value} .= $token->{data};
809     ## ISSUE: spec says "append the returned character token to the current attribute's value"
810     }
811    
812     $self->{state} = $self->{last_attribute_value_state};
813     # next-input-character is already done
814     redo A;
815     } elsif ($self->{state} eq 'bogus comment') {
816     ## (only happen if PCDATA state)
817    
818     my $token = {type => 'comment', data => ''};
819    
820     BC: {
821     if ($self->{next_input_character} == 0x003E) { # >
822     $self->{state} = 'data';
823     !!!next-input-character;
824    
825     !!!emit ($token);
826    
827     redo A;
828     } elsif ($self->{next_input_character} == -1) {
829     $self->{state} = 'data';
830     ## reconsume
831    
832     !!!emit ($token);
833    
834     redo A;
835     } else {
836     $token->{data} .= chr ($self->{next_input_character});
837     !!!next-input-character;
838     redo BC;
839     }
840     } # BC
841     } elsif ($self->{state} eq 'markup declaration open') {
842     ## (only happen if PCDATA state)
843    
844     my @next_char;
845     push @next_char, $self->{next_input_character};
846    
847     if ($self->{next_input_character} == 0x002D) { # -
848     !!!next-input-character;
849     push @next_char, $self->{next_input_character};
850     if ($self->{next_input_character} == 0x002D) { # -
851     $self->{current_token} = {type => 'comment', data => ''};
852     $self->{state} = 'comment';
853     !!!next-input-character;
854     redo A;
855     }
856     } elsif ($self->{next_input_character} == 0x0044 or # D
857     $self->{next_input_character} == 0x0064) { # d
858     !!!next-input-character;
859     push @next_char, $self->{next_input_character};
860     if ($self->{next_input_character} == 0x004F or # O
861     $self->{next_input_character} == 0x006F) { # o
862     !!!next-input-character;
863     push @next_char, $self->{next_input_character};
864     if ($self->{next_input_character} == 0x0043 or # C
865     $self->{next_input_character} == 0x0063) { # c
866     !!!next-input-character;
867     push @next_char, $self->{next_input_character};
868     if ($self->{next_input_character} == 0x0054 or # T
869     $self->{next_input_character} == 0x0074) { # t
870     !!!next-input-character;
871     push @next_char, $self->{next_input_character};
872     if ($self->{next_input_character} == 0x0059 or # Y
873     $self->{next_input_character} == 0x0079) { # y
874     !!!next-input-character;
875     push @next_char, $self->{next_input_character};
876     if ($self->{next_input_character} == 0x0050 or # P
877     $self->{next_input_character} == 0x0070) { # p
878     !!!next-input-character;
879     push @next_char, $self->{next_input_character};
880     if ($self->{next_input_character} == 0x0045 or # E
881     $self->{next_input_character} == 0x0065) { # e
882     ## ISSUE: What a stupid code this is!
883     $self->{state} = 'DOCTYPE';
884     !!!next-input-character;
885     redo A;
886     }
887     }
888     }
889     }
890     }
891     }
892     }
893    
894     !!!parse-error;
895     $self->{next_input_character} = shift @next_char;
896     !!!back-next-input-character (@next_char);
897     $self->{state} = 'bogus comment';
898     redo A;
899    
900     ## ISSUE: typos in spec: chacacters, is is a parse error
901     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
902     } elsif ($self->{state} eq 'comment') {
903     if ($self->{next_input_character} == 0x002D) { # -
904     $self->{state} = 'comment dash';
905     !!!next-input-character;
906     redo A;
907     } elsif ($self->{next_input_character} == -1) {
908     !!!parse-error;
909     $self->{state} = 'data';
910     ## reconsume
911    
912     !!!emit ($self->{current_token}); # comment
913     undef $self->{current_token};
914    
915     redo A;
916     } else {
917     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
918     ## Stay in the state
919     !!!next-input-character;
920     redo A;
921     }
922     } elsif ($self->{state} eq 'comment dash') {
923     if ($self->{next_input_character} == 0x002D) { # -
924     $self->{state} = 'comment end';
925     !!!next-input-character;
926     redo A;
927     } elsif ($self->{next_input_character} == -1) {
928     !!!parse-error;
929     $self->{state} = 'data';
930     ## reconsume
931    
932     !!!emit ($self->{current_token}); # comment
933     undef $self->{current_token};
934    
935     redo A;
936     } else {
937     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
938     $self->{state} = 'comment';
939     !!!next-input-character;
940     redo A;
941     }
942     } elsif ($self->{state} eq 'comment end') {
943     if ($self->{next_input_character} == 0x003E) { # >
944     $self->{state} = 'data';
945     !!!next-input-character;
946    
947     !!!emit ($self->{current_token}); # comment
948     undef $self->{current_token};
949    
950     redo A;
951     } elsif ($self->{next_input_character} == 0x002D) { # -
952     !!!parse-error;
953     $self->{current_token}->{data} .= '-'; # comment
954     ## Stay in the state
955     !!!next-input-character;
956     redo A;
957     } elsif ($self->{next_input_character} == -1) {
958     !!!parse-error;
959     $self->{state} = 'data';
960     ## reconsume
961    
962     !!!emit ($self->{current_token}); # comment
963     undef $self->{current_token};
964    
965     redo A;
966     } else {
967     !!!parse-error;
968     $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
969     $self->{state} = 'comment';
970     !!!next-input-character;
971     redo A;
972     }
973     } elsif ($self->{state} eq 'DOCTYPE') {
974     if ($self->{next_input_character} == 0x0009 or # HT
975     $self->{next_input_character} == 0x000A or # LF
976     $self->{next_input_character} == 0x000B or # VT
977     $self->{next_input_character} == 0x000C or # FF
978     $self->{next_input_character} == 0x0020) { # SP
979     $self->{state} = 'before DOCTYPE name';
980     !!!next-input-character;
981     redo A;
982     } else {
983     !!!parse-error;
984     $self->{state} = 'before DOCTYPE name';
985     ## reconsume
986     redo A;
987     }
988     } elsif ($self->{state} eq 'before DOCTYPE name') {
989     if ($self->{next_input_character} == 0x0009 or # HT
990     $self->{next_input_character} == 0x000A or # LF
991     $self->{next_input_character} == 0x000B or # VT
992     $self->{next_input_character} == 0x000C or # FF
993     $self->{next_input_character} == 0x0020) { # SP
994     ## Stay in the state
995     !!!next-input-character;
996     redo A;
997     } elsif (0x0061 <= $self->{next_input_character} and
998     $self->{next_input_character} <= 0x007A) { # a..z
999     $self->{current_token} = {type => 'DOCTYPE',
1000     name => chr ($self->{next_input_character} - 0x0020),
1001     error => 1};
1002     $self->{state} = 'DOCTYPE name';
1003     !!!next-input-character;
1004     redo A;
1005     } elsif ($self->{next_input_character} == 0x003E) { # >
1006     !!!parse-error;
1007     $self->{state} = 'data';
1008     !!!next-input-character;
1009    
1010     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1011    
1012     redo A;
1013     } elsif ($self->{next_input_character} == -1) {
1014     !!!parse-error;
1015     $self->{state} = 'data';
1016     ## reconsume
1017    
1018     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1019    
1020     redo A;
1021     } else {
1022     $self->{current_token} = {type => 'DOCTYPE',
1023     name => chr ($self->{next_input_character}),
1024     error => 1};
1025     $self->{state} = 'DOCTYPE name';
1026     !!!next-input-character;
1027     redo A;
1028     }
1029     } elsif ($self->{state} eq 'DOCTYPE name') {
1030     if ($self->{next_input_character} == 0x0009 or # HT
1031     $self->{next_input_character} == 0x000A or # LF
1032     $self->{next_input_character} == 0x000B or # VT
1033     $self->{next_input_character} == 0x000C or # FF
1034     $self->{next_input_character} == 0x0020) { # SP
1035     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1036     $self->{state} = 'after DOCTYPE name';
1037     !!!next-input-character;
1038     redo A;
1039     } elsif ($self->{next_input_character} == 0x003E) { # >
1040     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1041     $self->{state} = 'data';
1042     !!!next-input-character;
1043    
1044     !!!emit ($self->{current_token}); # DOCTYPE
1045     undef $self->{current_token};
1046    
1047     redo A;
1048     } elsif (0x0061 <= $self->{next_input_character} and
1049     $self->{next_input_character} <= 0x007A) { # a..z
1050     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1051     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1052     ## Stay in the state
1053     !!!next-input-character;
1054     redo A;
1055     } elsif ($self->{next_input_character} == -1) {
1056     !!!parse-error;
1057     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1058     $self->{state} = 'data';
1059     ## reconsume
1060    
1061     !!!emit ($self->{current_token});
1062     undef $self->{current_token};
1063    
1064     redo A;
1065     } else {
1066 wakaba 1.3 $self->{current_token}->{name}
1067     .= chr ($self->{next_input_character}); # DOCTYPE
1068 wakaba 1.1 #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1069     ## Stay in the state
1070     !!!next-input-character;
1071     redo A;
1072     }
1073     } elsif ($self->{state} eq 'after DOCTYPE name') {
1074     if ($self->{next_input_character} == 0x0009 or # HT
1075     $self->{next_input_character} == 0x000A or # LF
1076     $self->{next_input_character} == 0x000B or # VT
1077     $self->{next_input_character} == 0x000C or # FF
1078     $self->{next_input_character} == 0x0020) { # SP
1079     ## Stay in the state
1080     !!!next-input-character;
1081     redo A;
1082     } elsif ($self->{next_input_character} == 0x003E) { # >
1083     $self->{state} = 'data';
1084     !!!next-input-character;
1085    
1086     !!!emit ($self->{current_token}); # DOCTYPE
1087     undef $self->{current_token};
1088    
1089     redo A;
1090     } elsif ($self->{next_input_character} == -1) {
1091     !!!parse-error;
1092     $self->{state} = 'data';
1093     ## reconsume
1094    
1095     !!!emit ($self->{current_token}); # DOCTYPE
1096     undef $self->{current_token};
1097    
1098     redo A;
1099     } else {
1100     !!!parse-error;
1101     $self->{current_token}->{error} = 1; # DOCTYPE
1102     $self->{state} = 'bogus DOCTYPE';
1103     !!!next-input-character;
1104     redo A;
1105     }
1106     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1107     if ($self->{next_input_character} == 0x003E) { # >
1108     $self->{state} = 'data';
1109     !!!next-input-character;
1110    
1111     !!!emit ($self->{current_token}); # DOCTYPE
1112     undef $self->{current_token};
1113    
1114     redo A;
1115     } elsif ($self->{next_input_character} == -1) {
1116     !!!parse-error;
1117     $self->{state} = 'data';
1118     ## reconsume
1119    
1120     !!!emit ($self->{current_token}); # DOCTYPE
1121     undef $self->{current_token};
1122    
1123     redo A;
1124     } else {
1125     ## Stay in the state
1126     !!!next-input-character;
1127     redo A;
1128     }
1129     } else {
1130     die "$0: $self->{state}: Unknown state";
1131     }
1132     } # A
1133    
1134     die "$0: _get_next_token: unexpected case";
1135     } # _get_next_token
1136    
1137     sub _tokenize_attempt_to_consume_an_entity ($) {
1138     my $self = shift;
1139     my $r;
1140    
1141     if ($self->{next_input_character} == 0x0023) { # #
1142     !!!next-input-character;
1143     my $num;
1144     if ($self->{next_input_character} == 0x0078 or # x
1145     $self->{next_input_character} == 0x0058) { # X
1146     X: {
1147     my $x_char = $self->{next_input_character};
1148     !!!next-input-character;
1149     if (0x0030 <= $self->{next_input_character} and
1150     $self->{next_input_character} <= 0x0039) { # 0..9
1151     $num ||= 0;
1152     $num *= 0x10;
1153     $num += $self->{next_input_character} - 0x0030;
1154     redo X;
1155     } elsif (0x0061 <= $self->{next_input_character} and
1156     $self->{next_input_character} <= 0x0066) { # a..f
1157     ## ISSUE: the spec says U+0078, which is apparently incorrect
1158     $num ||= 0;
1159     $num *= 0x10;
1160     $num += $self->{next_input_character} - 0x0060 + 9;
1161     redo X;
1162     } elsif (0x0041 <= $self->{next_input_character} and
1163     $self->{next_input_character} <= 0x0046) { # A..F
1164     ## ISSUE: the spec says U+0058, which is apparently incorrect
1165     $num ||= 0;
1166     $num *= 0x10;
1167     $num += $self->{next_input_character} - 0x0040 + 9;
1168     redo X;
1169     } elsif (not defined $num) { # no hexadecimal digit
1170     !!!parse-error;
1171     $self->{next_input_character} = 0x0023; # #
1172     !!!back-next-input-character ($x_char);
1173     last X; ## nothing is returned
1174     } elsif ($self->{next_input_character} == 0x003B) { # ;
1175     !!!next-input-character;
1176     } else {
1177     !!!parse-error;
1178     }
1179    
1180     ## TODO: check the definition for |a valid Unicode character|.
1181     if ($num > 1114111 or $num == 0) {
1182     $num = 0xFFFD; # REPLACEMENT CHARACTER
1183     ## ISSUE: Why this is not an error?
1184     }
1185    
1186     $r = {type => 'character', data => chr $num};
1187     } # X
1188     } else {
1189     D: {
1190     if (0x0030 <= $self->{next_input_character} and
1191     $self->{next_input_character} <= 0x0039) { # 0..9
1192     $num *= 10;
1193     $num += $self->{next_input_character} - 0x0030;
1194     !!!next-input-character;
1195     redo D;
1196     } else {
1197     !!!parse-error;
1198     !!!back-next-input-character ($self->{next_input_character});
1199     $self->{next_input_character} = 0x0023; # #
1200     last D; ## nothing is returned
1201     }
1202    
1203     if ($self->{next_input_character} == 0x003B) { # ;
1204     !!!next-input-character;
1205     } else {
1206     !!!parse-error;
1207     }
1208    
1209     ## TODO: check the definition for |a valid Unicode character|.
1210     if ($num > 1114111 or $num == 0) {
1211     $num = 0xFFFD; # REPLACEMENT CHARACTER
1212     ## ISSUE: Why this is not an error?
1213     }
1214    
1215     $r = {type => 'character', data => chr $num};
1216     } # D
1217     }
1218     !!!consume-entity}
1219     return $r;
1220     } # _tokenize_attempt_to_consume_an_entity
1221    
1222 wakaba 1.2 sub _initialize_tree_constructor ($) {
1223     my $self = shift;
1224     require What::NanoDOM;
1225     $self->{document} = What::NanoDOM::Document->new;
1226     $self->{document}->strict_error_checking (0);
1227     ## TODO: Turn mutation events off # MUST
1228     ## TODO: Turn loose Document option (manakai extension) on
1229     } # _initialize_tree_constructor
1230    
1231     sub _terminate_tree_constructor ($) {
1232     my $self = shift;
1233     $self->{document}->strict_error_checking (1);
1234     ## TODO: Turn mutation events on
1235     } # _terminate_tree_constructor
1236    
1237     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1238    
1239     sub _construct_tree ($) {
1240     my ($self) = @_;
1241    
1242     ## When an interactive UA render the $self->{document} available
1243     ## to the user, or when it begin accepting user input, are
1244     ## not defined.
1245    
1246     ## Append a character: collect it and all subsequent consecutive
1247     ## characters and insert one Text node whose data is concatenation
1248     ## of all those characters. # MUST
1249    
1250     my $token;
1251     !!!next-token;
1252    
1253     my $phase = 'initial'; # MUST
1254    
1255     my $open_elements = [];
1256     my $active_formatting_elements = [];
1257     my $head_element;
1258     my $form_element;
1259     my $insertion_mode = 'before head';
1260    
1261     my $reconstruct_active_formatting_elements = sub { # MUST
1262     ## Step 1
1263     return unless @$active_formatting_elements;
1264    
1265     ## Step 3
1266     my $i = -1;
1267     my $entry = $active_formatting_elements->[$i];
1268    
1269     ## Step 2
1270     return if $entry->[0] eq '#marker';
1271     for (@$open_elements) {
1272     if ($entry->[0] eq $_->[0]) {
1273     return;
1274     }
1275     }
1276    
1277     ## Step 4
1278     S4: {
1279     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1280    
1281     ## Step 5
1282     $i--;
1283     $entry = $active_formatting_elements->[$i];
1284    
1285     ## Step 6
1286     if ($entry->[0] eq '#marker') {
1287     #
1288     } else {
1289     my $in_open_elements;
1290     OE: for (@$open_elements) {
1291     if ($entry->[0] eq $_->[0]) {
1292     $in_open_elements = 1;
1293     last OE;
1294     }
1295     }
1296     if ($in_open_elements) {
1297     #
1298     } else {
1299     redo S4;
1300     }
1301     }
1302    
1303     ## Step 7
1304     $i++;
1305     $entry = $active_formatting_elements->[$i];
1306     } # S4
1307    
1308     S7: {
1309     ## Step 8
1310     my $clone = $entry->[0]->clone_node (0);
1311    
1312     ## Step 9
1313     $open_elements->[-1]->[0]->append_child ($clone);
1314     push @$open_elements, [$clone, $entry->[1]];
1315    
1316     ## Step 10
1317     $active_formatting_elements->[$i] = $open_elements->[-1];
1318    
1319     unless ($i == $#$active_formatting_elements) {
1320     ## Step 7'
1321     $i++;
1322     $entry = $active_formatting_elements->[$i];
1323    
1324     redo S7;
1325     }
1326     } # S7
1327     }; # $reconstruct_active_formatting_elements
1328    
1329     my $clear_up_to_marker = sub {
1330     for (reverse 0..$#$active_formatting_elements) {
1331     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1332     splice @$active_formatting_elements, $_;
1333     return;
1334     }
1335     }
1336     }; # $clear_up_to_marker
1337    
1338     my $reset_insertion_mode = sub {
1339     ## Step 1
1340     my $last;
1341    
1342     ## Step 2
1343     my $i = -1;
1344     my $node = $open_elements->[$i];
1345    
1346     ## Step 3
1347     S3: {
1348     $last = 1 if $open_elements->[0]->[0] eq $node->[0];
1349     ## TODO: the element whose inner_html is set is neither td nor th, then $node = the element
1350    
1351     ## Step 4..13
1352     my $new_mode = {
1353     select => 'in select',
1354     td => 'in cell',
1355     th => 'in cell',
1356     tr => 'in row',
1357     tbody => 'in table body',
1358     thead => 'in table head',
1359     tfoot => 'in table foot',
1360     caption => 'in caption',
1361     colgroup => 'in column group',
1362     table => 'in table',
1363     head => 'in body', # not in head!
1364     body => 'in body',
1365     frameset => 'in frameset',
1366     }->{$node->[1]};
1367     $insertion_mode = $new_mode and return if defined $new_mode;
1368    
1369     ## Step 14
1370     if ($node->[1] eq 'html') {
1371     unless (defined $head_element) {
1372     $insertion_mode = 'before head';
1373     } else {
1374     $insertion_mode = 'after head';
1375     }
1376     return;
1377     }
1378    
1379     ## Step 15
1380     $insertion_mode = 'in body' and return if $last;
1381    
1382     ## Step 16
1383     $i--;
1384     $node = $open_elements->[$i];
1385    
1386     ## Step 17
1387     redo S3;
1388     } # S3
1389     }; # $reset_insertion_mode
1390    
1391     my $style_start_tag = sub {
1392     my $style_el; !!!create-element ($style_el, 'style');
1393     ## $insertion_mode eq 'in head' and ... (always true)
1394     (($insertion_mode eq 'in head' and defined $head_element)
1395     ? $head_element : $open_elements->[-1]->[0])
1396     ->append_child ($style_el);
1397     $self->{content_model_flag} = 'CDATA';
1398    
1399     my $text = '';
1400     !!!next-token;
1401     while ($token->{type} eq 'character') {
1402     $text .= $token->{data};
1403     !!!next-token;
1404     } # stop if non-character token or tokenizer stops tokenising
1405     if (length $text) {
1406     $style_el->manakai_append_text ($text);
1407     }
1408    
1409     $self->{content_model_flag} = 'PCDATA';
1410    
1411     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1412     ## Ignore the token
1413     } else {
1414     !!!parse-error;
1415     ## ISSUE: And ignore?
1416     }
1417     !!!next-token;
1418     }; # $style_start_tag
1419    
1420     my $script_start_tag = sub {
1421     my $script_el; !!!create-element ($script_el, 'script');
1422     ## TODO: mark as "parser-inserted"
1423    
1424     $self->{content_model_flag} = 'CDATA';
1425    
1426     my $text = '';
1427     !!!next-token;
1428     while ($token->{type} eq 'character') {
1429     $text .= $token->{data};
1430     !!!next-token;
1431     } # stop if non-character token or tokenizer stops tokenising
1432     if (length $text) {
1433     $script_el->manakai_append_text ($text);
1434     }
1435    
1436     $self->{content_model_flag} = 'PCDATA';
1437    
1438     if ($token->{type} eq 'end tag' and
1439     $token->{tag_name} eq 'script') {
1440     ## Ignore the token
1441     } else {
1442     !!!parse-error;
1443     ## ISSUE: And ignore?
1444     ## TODO: mark as "already executed"
1445     }
1446    
1447     ## TODO: inner_html mode then mark as "already executed" and skip
1448     if (1) {
1449     ## TODO: $old_insertion_point = current insertion point
1450     ## TODO: insertion point = just before the next input character
1451    
1452     (($insertion_mode eq 'in head' and defined $head_element)
1453     ? $head_element : $open_elements->[-1]->[0])->append_child ($script_el);
1454    
1455     ## TODO: insertion point = $old_insertion_point (might be "undefined")
1456    
1457     ## TODO: if there is a script that will execute as soon as the parser resume, then...
1458     }
1459    
1460     !!!next-token;
1461     }; # $script_start_tag
1462    
1463     my $formatting_end_tag = sub {
1464     my $tag_name = shift;
1465    
1466     FET: {
1467     ## Step 1
1468     my $formatting_element;
1469     my $formatting_element_i_in_active;
1470     AFE: for (reverse 0..$#$active_formatting_elements) {
1471     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
1472     $formatting_element = $active_formatting_elements->[$_];
1473     $formatting_element_i_in_active = $_;
1474     last AFE;
1475     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
1476     last AFE;
1477     }
1478     } # AFE
1479     unless (defined $formatting_element) {
1480     !!!parse-error;
1481     ## Ignore the token
1482     !!!next-token;
1483     return;
1484     }
1485     ## has an element in scope
1486     my $in_scope = 1;
1487     my $formatting_element_i_in_open;
1488     INSCOPE: for (reverse 0..$#$open_elements) {
1489     my $node = $open_elements->[$_];
1490     if ($node->[0] eq $formatting_element->[0]) {
1491     if ($in_scope) {
1492     $formatting_element_i_in_open = $_;
1493     last INSCOPE;
1494     } else { # in open elements but not in scope
1495     !!!parse-error;
1496     ## Ignore the token
1497     !!!next-token;
1498     return;
1499     }
1500     } elsif ({
1501     table => 1, caption => 1, td => 1, th => 1,
1502     button => 1, marquee => 1, object => 1, html => 1,
1503     }->{$node->[1]}) {
1504     $in_scope = 0;
1505     }
1506     } # INSCOPE
1507     unless (defined $formatting_element_i_in_open) {
1508     !!!parse-error;
1509     pop @$active_formatting_elements; # $formatting_element
1510     !!!next-token; ## TODO: ok?
1511     return;
1512     }
1513     if (not $open_elements->[-1]->[0] eq $formatting_element->[0]) {
1514     !!!parse-error;
1515     }
1516    
1517     ## Step 2
1518     my $furthest_block;
1519     my $furthest_block_i_in_open;
1520     OE: for (reverse 0..$#$open_elements) {
1521     my $node = $open_elements->[$_];
1522     if (not $formatting_category->{$node->[1]} and
1523     #not $phrasing_category->{$node->[1]} and
1524     ($special_category->{$node->[1]} or
1525     $scoping_category->{$node->[1]})) {
1526     $furthest_block = $node;
1527     $furthest_block_i_in_open = $_;
1528     } elsif ($node->[0] eq $formatting_element->[0]) {
1529     last OE;
1530     }
1531     } # OE
1532    
1533     ## Step 3
1534     unless (defined $furthest_block) { # MUST
1535     splice @$open_elements, $formatting_element_i_in_open;
1536     splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1537     !!!next-token;
1538     return;
1539     }
1540    
1541     ## Step 4
1542     my $common_ancestor_node = $open_elements->[$formatting_element_i_in_open - 1];
1543    
1544     ## Step 5
1545     my $furthest_block_parent = $furthest_block->[0]->parent_node;
1546     if (defined $furthest_block_parent) {
1547     $furthest_block_parent->remove_child ($furthest_block->[0]);
1548     }
1549    
1550     ## Step 6
1551     my $bookmark_prev_el
1552     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1553     ->[0];
1554    
1555     ## Step 7
1556     my $node = $furthest_block;
1557     my $node_i_in_open = $furthest_block_i_in_open;
1558     my $last_node = $furthest_block;
1559     S7: {
1560     ## Step 1
1561     $node_i_in_open--;
1562     $node = $open_elements->[$node_i_in_open];
1563    
1564     ## Step 2
1565     my $node_i_in_active;
1566     S7S2: {
1567     for (reverse 0..$#$active_formatting_elements) {
1568     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1569     $node_i_in_active = $_;
1570     last S7S2;
1571     }
1572     }
1573     splice @$open_elements, $node_i_in_open, 1;
1574     redo S7;
1575     } # S7S2
1576    
1577     ## Step 3
1578     last S7 if $node->[0] eq $formatting_element->[0];
1579    
1580     ## Step 4
1581     if ($last_node->[0] eq $furthest_block->[0]) {
1582     $bookmark_prev_el = $node->[0];
1583     }
1584    
1585     ## Step 5
1586     if ($node->[0]->has_child_nodes ()) {
1587     my $clone = [$node->[0]->clone_node (0), $node->[1]];
1588     $active_formatting_elements->[$node_i_in_active] = $clone;
1589     $open_elements->[$node_i_in_open] = $clone;
1590     $node = $clone;
1591     }
1592    
1593     ## Step 6
1594     $node->append_child ($last_node);
1595    
1596     ## Step 7
1597     $last_node = $node;
1598    
1599     ## Step 8
1600     redo S7;
1601     } # S7
1602    
1603     ## Step 8
1604     $common_ancestor_node->append_child ($last_node);
1605    
1606     ## Step 9
1607     my $clone = [$formatting_element->[0]->clone_node (0),
1608     $formatting_element->[1]];
1609    
1610     ## Step 10
1611     my @cn = @{$furthest_block->[0]->child_nodes};
1612     $clone->[0]->append_child ($_) for @cn;
1613    
1614     ## Step 11
1615     $furthest_block->[0]->append_child ($clone->[0]);
1616    
1617     ## Step 12
1618     my $i;
1619     AFE: for (reverse 0..$#$active_formatting_elements) {
1620     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1621     splice @$active_formatting_elements, $_, 1;
1622     $i-- and last AFE if defined $i;
1623     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1624     $i = $_;
1625     }
1626     } # AFE
1627     splice @$active_formatting_elements, $i + 1, 0, $clone;
1628    
1629     ## Step 13
1630     undef $i;
1631     OE: for (reverse 0..$#$open_elements) {
1632     if ($open_elements->[$_]->[0] eq $formatting_element->[0]) {
1633     splice @$open_elements, $_, 1;
1634     $i-- and last OE if defined $i;
1635     } elsif ($open_elements->[$_]->[0] eq $furthest_block->[0]) {
1636     $i = $_;
1637     }
1638     } # OE
1639     splice @$open_elements, $i + 1, 1, $clone;
1640    
1641     ## Step 14
1642     redo FET;
1643     } # FET
1644     }; # $formatting_end_tag
1645    
1646     my $in_body = sub {
1647     my $insert = shift;
1648     if ($token->{type} eq 'start tag') {
1649     if ($token->{tag_name} eq 'script') {
1650     $script_start_tag->();
1651     return;
1652     } elsif ($token->{tag_name} eq 'style') {
1653     $style_start_tag->();
1654     return;
1655     } elsif ({
1656     base => 1, link => 1, meta => 1, title => 1,
1657     }->{$token->{tag_name}}) {
1658     !!!parse-error;
1659     ## NOTE: This is an "as if in head" code clone
1660     my $el;
1661     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
1662     if (defined $head_element) {
1663     $head_element->append_child ($el);
1664     } else {
1665     $insert->($el);
1666     }
1667    
1668     ## ISSUE: Issue on magical <base> in the spec
1669    
1670     !!!next-token;
1671     return;
1672     } elsif ($token->{tag_name} eq 'body') {
1673     !!!parse-error;
1674    
1675     if (@$open_elements == 1 or
1676     $open_elements->[1]->[1] ne 'body') {
1677     ## Ignore the token
1678     } else {
1679     my $body_el = $open_elements->[1]->[0];
1680     for my $attr_name (keys %{$token->{attributes}}) {
1681     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
1682     $body_el->set_attribute_ns
1683     (undef, [undef, $attr_name],
1684     $token->{attributes}->{$attr_name}->{value});
1685     }
1686     }
1687     }
1688     !!!next-token;
1689     return;
1690     } elsif ({
1691     address => 1, blockquote => 1, center => 1, dir => 1,
1692     div => 1, dl => 1, fieldset => 1, listing => 1,
1693     menu => 1, ol => 1, p => 1, ul => 1,
1694     pre => 1,
1695     }->{$token->{tag_name}}) {
1696     ## has a p element in scope
1697     INSCOPE: for (reverse @$open_elements) {
1698     if ($_->[1] eq 'p') {
1699     !!!back-token;
1700     $token = {type => 'end tag', tag_name => 'p'};
1701     return;
1702     } elsif ({
1703     table => 1, caption => 1, td => 1, th => 1,
1704     button => 1, marquee => 1, object => 1, html => 1,
1705     }->{$_->[1]}) {
1706     last INSCOPE;
1707     }
1708     } # INSCOPE
1709    
1710     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1711     if ($token->{tag_name} eq 'pre') {
1712     !!!next-token;
1713     if ($token->{type} eq 'character') {
1714     $token->{data} =~ s/^\x0A//;
1715     unless (length $token->{data}) {
1716     !!!next-token;
1717     }
1718     }
1719     } else {
1720     !!!next-token;
1721     }
1722     return;
1723     } elsif ($token->{tag_name} eq 'form') {
1724     if (defined $form_element) {
1725     !!!parse-error;
1726     ## Ignore the token
1727     } else {
1728     ## has a p element in scope
1729     INSCOPE: for (reverse @$open_elements) {
1730     if ($_->[1] eq 'p') {
1731     !!!back-token;
1732     $token = {type => 'end tag', tag_name => 'p'};
1733     return;
1734     } elsif ({
1735     table => 1, caption => 1, td => 1, th => 1,
1736     button => 1, marquee => 1, object => 1, html => 1,
1737     }->{$_->[1]}) {
1738     last INSCOPE;
1739     }
1740     } # INSCOPE
1741    
1742     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1743     $form_element = $open_elements->[-1]->[0];
1744     !!!next-token;
1745     return;
1746     }
1747     } elsif ($token->{tag_name} eq 'li') {
1748     ## has a p element in scope
1749     INSCOPE: for (reverse @$open_elements) {
1750     if ($_->[1] eq 'p') {
1751     !!!back-token;
1752     $token = {type => 'end tag', tag_name => 'p'};
1753     return;
1754     } elsif ({
1755     table => 1, caption => 1, td => 1, th => 1,
1756     button => 1, marquee => 1, object => 1, html => 1,
1757     }->{$_->[1]}) {
1758     last INSCOPE;
1759     }
1760     } # INSCOPE
1761    
1762     ## Step 1
1763     my $i = -1;
1764     my $node = $open_elements->[$i];
1765     LI: {
1766     ## Step 2
1767     if ($node->[1] eq 'li') {
1768     splice @$open_elements, $i;
1769     last LI;
1770     }
1771    
1772     ## Step 3
1773     if (not $formatting_category->{$node->[1]} and
1774     #not $phrasing_category->{$node->[1]} and
1775     ($special_category->{$node->[1]} or
1776     $scoping_category->{$node->[1]}) and
1777     $node->[1] ne 'address' and $node->[1] ne 'div') {
1778     last LI;
1779     }
1780    
1781     ## Step 4
1782     $i++;
1783     $node = $open_elements->[$i];
1784     redo LI;
1785     } # LI
1786    
1787     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1788     !!!next-token;
1789     return;
1790     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
1791     ## has a p element in scope
1792     INSCOPE: for (reverse @$open_elements) {
1793     if ($_->[1] eq 'p') {
1794     !!!back-token;
1795     $token = {type => 'end tag', tag_name => 'p'};
1796     return;
1797     } elsif ({
1798     table => 1, caption => 1, td => 1, th => 1,
1799     button => 1, marquee => 1, object => 1, html => 1,
1800     }->{$_->[1]}) {
1801     last INSCOPE;
1802     }
1803     } # INSCOPE
1804    
1805     ## Step 1
1806     my $i = -1;
1807     my $node = $open_elements->[$i];
1808     LI: {
1809     ## Step 2
1810     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
1811     splice @$open_elements, $i;
1812     last LI;
1813     }
1814    
1815     ## Step 3
1816     if (not $formatting_category->{$node->[1]} and
1817     #not $phrasing_category->{$node->[1]} and
1818     ($special_category->{$node->[1]} or
1819     $scoping_category->{$node->[1]}) and
1820     $node->[1] ne 'address' and $node->[1] ne 'div') {
1821     last LI;
1822     }
1823    
1824     ## Step 4
1825     $i++;
1826     $node = $open_elements->[$i];
1827     redo LI;
1828     } # LI
1829    
1830     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1831     !!!next-token;
1832     return;
1833     } elsif ($token->{tag_name} eq 'plaintext') {
1834     ## has a p element in scope
1835     INSCOPE: for (reverse @$open_elements) {
1836     if ($_->[1] eq 'p') {
1837     !!!back-token;
1838     $token = {type => 'end tag', tag_name => 'p'};
1839     return;
1840     } elsif ({
1841     table => 1, caption => 1, td => 1, th => 1,
1842     button => 1, marquee => 1, object => 1, html => 1,
1843     }->{$_->[1]}) {
1844     last INSCOPE;
1845     }
1846     } # INSCOPE
1847    
1848     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1849    
1850     $self->{content_model_flag} = 'PLAINTEXT';
1851    
1852     !!!next-token;
1853     return;
1854     } elsif ({
1855     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
1856     }->{$token->{tag_name}}) {
1857     ## has a p element in scope
1858     INSCOPE: for (reverse 0..$#$open_elements) {
1859     my $node = $open_elements->[$_];
1860     if ($node->[1] eq 'p') {
1861     !!!back-token;
1862     $token = {type => 'end tag', tag_name => 'p'};
1863     return;
1864     } elsif ({
1865     table => 1, caption => 1, td => 1, th => 1,
1866     button => 1, marquee => 1, object => 1, html => 1,
1867     }->{$node->[1]}) {
1868     last INSCOPE;
1869     }
1870     } # INSCOPE
1871    
1872     ## has an element in scope
1873     my $i;
1874     INSCOPE: for (reverse 0..$#$open_elements) {
1875     my $node = $open_elements->[$_];
1876     if ({
1877     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
1878     }->{$node->[1]}) {
1879     $i = $_;
1880     last INSCOPE;
1881     } elsif ({
1882     table => 1, caption => 1, td => 1, th => 1,
1883     button => 1, marquee => 1, object => 1, html => 1,
1884     }->{$node->[1]}) {
1885     last INSCOPE;
1886     }
1887     } # INSCOPE
1888    
1889     if (defined $i) {
1890     !!!parse-error;
1891     splice @$open_elements, $i;
1892     }
1893    
1894     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1895    
1896     !!!next-token;
1897     return;
1898     } elsif ($token->{tag_name} eq 'a') {
1899     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
1900     my $node = $active_formatting_elements->[$i];
1901     if ($node->[1] eq 'a') {
1902     !!!parse-error;
1903    
1904     !!!back-token;
1905     $token = {type => 'end tag', tag_name => 'a'};
1906     $formatting_end_tag->($token->{tag_name});
1907    
1908     splice @$active_formatting_elements, $i;
1909     OE: for (reverse 0..$#$open_elements) {
1910     if ($open_elements->[$_]->[0] eq $node->[0]) {
1911     splice @$open_elements, $_;
1912     last OE;
1913     }
1914     } # OE
1915     last AFE;
1916     } elsif ($node->[0] eq '#marker') {
1917     last AFE;
1918     }
1919     } # AFE
1920    
1921     $reconstruct_active_formatting_elements->();
1922    
1923     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1924     push @$active_formatting_elements, $open_elements->[-1];
1925    
1926     !!!next-token;
1927     return;
1928     } elsif ({
1929     b => 1, big => 1, em => 1, font => 1, i => 1,
1930     nobr => 1, s => 1, small => 1, strile => 1,
1931     strong => 1, tt => 1, u => 1,
1932     }->{$token->{tag_name}}) {
1933     $reconstruct_active_formatting_elements->();
1934    
1935     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1936     push @$active_formatting_elements, $open_elements->[-1];
1937    
1938     !!!next-token;
1939     return;
1940     } elsif ($token->{tag_name} eq 'button') {
1941     ## has a button element in scope
1942     INSCOPE: for (reverse 0..$#$open_elements) {
1943     my $node = $open_elements->[$_];
1944     if ($node->[1] eq 'button') {
1945     !!!parse-error;
1946     !!!back-token;
1947     $token = {type => 'end tag', tag_name => 'button'};
1948     return;
1949     } elsif ({
1950     table => 1, caption => 1, td => 1, th => 1,
1951     button => 1, marquee => 1, object => 1, html => 1,
1952     }->{$node->[1]}) {
1953     last INSCOPE;
1954     }
1955     } # INSCOPE
1956    
1957     $reconstruct_active_formatting_elements->();
1958    
1959     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1960     push @$active_formatting_elements, ['#marker', ''];
1961    
1962     !!!next-token;
1963     return;
1964     } elsif ($token->{tag_name} eq 'marquee' or
1965     $token->{tag_name} eq 'object') {
1966     $reconstruct_active_formatting_elements->();
1967    
1968     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1969     push @$active_formatting_elements, ['#marker', ''];
1970    
1971     !!!next-token;
1972     return;
1973     } elsif ($token->{tag_name} eq 'xmp') {
1974     $reconstruct_active_formatting_elements->();
1975    
1976     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1977    
1978     $self->{content_model_flag} = 'CDATA';
1979    
1980     !!!next-token;
1981     return;
1982     } elsif ($token->{tag_name} eq 'tbale') {
1983     ## has a p element in scope
1984     INSCOPE: for (reverse @$open_elements) {
1985     if ($_->[1] eq 'p') {
1986     !!!back-token;
1987     $token = {type => 'end tag', tag_name => 'p'};
1988     return;
1989     } elsif ({
1990     table => 1, caption => 1, td => 1, th => 1,
1991     button => 1, marquee => 1, object => 1, html => 1,
1992     }->{$_->[1]}) {
1993     last INSCOPE;
1994     }
1995     } # INSCOPE
1996    
1997     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1998    
1999     $insertion_mode = 'in table';
2000    
2001     !!!next-token;
2002     return;
2003     } elsif ({
2004     area => 1, basefont => 1, bgsound => 1, br => 1,
2005     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2006     image => 1,
2007     }->{$token->{tag_name}}) {
2008     if ($token->{tag_name} eq 'image') {
2009     !!!parse-error;
2010     $token->{tag_name} = 'img';
2011     }
2012    
2013     $reconstruct_active_formatting_elements->();
2014    
2015     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2016     pop @$open_elements;
2017    
2018     !!!next-token;
2019     return;
2020     } elsif ($token->{tag_name} eq 'hr') {
2021     ## has a p element in scope
2022     INSCOPE: for (reverse @$open_elements) {
2023     if ($_->[1] eq 'p') {
2024     !!!back-token;
2025     $token = {type => 'end tag', tag_name => 'p'};
2026     return;
2027     } elsif ({
2028     table => 1, caption => 1, td => 1, th => 1,
2029     button => 1, marquee => 1, object => 1, html => 1,
2030     }->{$_->[1]}) {
2031     last INSCOPE;
2032     }
2033     } # INSCOPE
2034    
2035     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2036     pop @$open_elements;
2037    
2038     !!!next-token;
2039     return;
2040     } elsif ($token->{tag_name} eq 'input') {
2041     $reconstruct_active_formatting_elements->();
2042    
2043     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2044     ## TODO: associate with $form_element if defined
2045     pop @$open_elements;
2046    
2047     !!!next-token;
2048     return;
2049     } elsif ($token->{tag_name} eq 'isindex') {
2050     !!!parse-error;
2051    
2052     if (defined $form_element) {
2053     ## Ignore the token
2054     !!!next-token;
2055     return;
2056     } else {
2057     my $at = $token->{attributes};
2058     $at->{name} = {name => 'name', value => 'isindex'};
2059     my @tokens = (
2060     {type => 'start tag', tag_name => 'form'},
2061     {type => 'start tag', tag_name => 'hr'},
2062     {type => 'start tag', tag_name => 'p'},
2063     {type => 'start tag', tag_name => 'label'},
2064     {type => 'character',
2065     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2066     ## TODO: make this configurable
2067     {type => 'start tag', tag_name => 'input', attributes => $at},
2068     #{type => 'character', data => ''}, # SHOULD
2069     {type => 'end tag', tag_name => 'label'},
2070     {type => 'end tag', tag_name => 'p'},
2071     {type => 'start tag', tag_name => 'hr'},
2072     {type => 'end tag', tag_name => 'form'},
2073     );
2074     $token = shift @tokens;
2075     !!!back-token (@tokens);
2076     return;
2077     }
2078     } elsif ({
2079     textarea => 1,
2080     noembed => 1,
2081     noframes => 1,
2082     noscript => 0, ## TODO: 1 if scripting is enabled
2083     }->{$token->{tag_name}}) {
2084     my $tag_name = $token->{tag_name};
2085     my $el;
2086     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2087    
2088     if ($token->{tag_name} eq 'textarea') {
2089     ## TODO: form_element if defined
2090     $self->{content_model_flag} = 'RCDATA';
2091     } else {
2092     $self->{content_model_flag} = 'CDATA';
2093     }
2094    
2095     $insert->($el);
2096    
2097     my $text = '';
2098     !!!next-token;
2099     while ($token->{type} eq 'character') {
2100     $text .= $token->{data};
2101     !!!next-token;
2102     }
2103     if (length $text) {
2104     $el->manakai_append_text ($text);
2105     }
2106    
2107     $self->{content_model_flag} = 'PCDATA';
2108    
2109     if ($token->{type} eq 'end tag' and
2110     $token->{tag_name} eq $tag_name) {
2111     ## Ignore the token
2112     } else {
2113     !!!parse-error;
2114     ## ISSUE: And ignore?
2115     }
2116     !!!next-token;
2117     return;
2118     } elsif ($token->{type} eq 'select') {
2119     $reconstruct_active_formatting_elements->();
2120    
2121     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2122    
2123     $insertion_mode = 'in select';
2124     !!!next-token;
2125     return;
2126     } elsif ({
2127     caption => 1, col => 1, colgroup => 1, frame => 1,
2128     frameset => 1, head => 1, option => 1, optgroup => 1,
2129     tbody => 1, td => 1, tfoot => 1, th => 1,
2130     thead => 1, tr => 1,
2131     }->{$token->{tag_name}}) {
2132     !!!parse-error;
2133     ## Ignore the token
2134     !!!next-token;
2135     return;
2136    
2137     ## ISSUE: An issue on HTML5 new elements in the spec.
2138     } else {
2139     $reconstruct_active_formatting_elements->();
2140    
2141     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2142    
2143     !!!next-token;
2144     return;
2145     }
2146     } elsif ($token->{type} eq 'end tag') {
2147     if ($token->{tag_name} eq 'body') {
2148     if (@$open_elements > 1 and $open_elements->[1]->[1] eq 'body') {
2149     ## ISSUE: There is an issue in the spec.
2150     if ($open_elements->[-1]->[1] ne 'body') {
2151     !!!parse-error;
2152     }
2153     $insertion_mode = 'after body';
2154     !!!next-token;
2155     return;
2156     } else {
2157     !!!parse-error;
2158     ## Ignore the token
2159     !!!next-token;
2160     return;
2161     }
2162     } elsif ($token->{tag_name} eq 'html') {
2163     if (@$open_elements > 1 and $open_elements->[1]->[1] eq 'body') {
2164     ## ISSUE: There is an issue in the spec.
2165     if ($open_elements->[-1]->[1] ne 'body') {
2166     !!!parse-error;
2167     }
2168     $insertion_mode = 'after body';
2169     ## reprocess
2170     return;
2171     } else {
2172     !!!parse-error;
2173     ## Ignore the token
2174     !!!next-token;
2175     return;
2176     }
2177     } elsif ({
2178     address => 1, blockquote => 1, center => 1, dir => 1,
2179     div => 1, dl => 1, fieldset => 1, listing => 1,
2180     menu => 1, ol => 1, pre => 1, ul => 1,
2181     form => 1,
2182     p => 1,
2183     dd => 1, dt => 1, li => 1,
2184     button => 1, marquee => 1, object => 1,
2185     }->{$token->{tag_name}}) {
2186     ## has an element in scope
2187     my $i;
2188     INSCOPE: for (reverse 0..$#$open_elements) {
2189     my $node = $open_elements->[$_];
2190     if ($node->[1] eq $token->{tag_name}) {
2191     ## generate implied end tags
2192     if ({
2193     dd => ($token->{tag_name} ne 'dd'),
2194     dt => ($token->{tag_name} ne 'dt'),
2195     li => ($token->{tag_name} ne 'li'),
2196     p => ($token->{tag_name} ne 'p'),
2197     td => 1, th => 1, tr => 1,
2198     }->{$open_elements->[-1]->[1]}) {
2199     !!!back-token;
2200     $token = {type => 'end tag',
2201     tag_name => $open_elements->[-1]->[1]}; # MUST
2202     return;
2203     }
2204     $i = $_;
2205     last INSCOPE unless $token->{tag_name} eq 'p';
2206     } elsif ({
2207     table => 1, caption => 1, td => 1, th => 1,
2208     button => 1, marquee => 1, object => 1, html => 1,
2209     }->{$node->[1]}) {
2210     last INSCOPE;
2211     }
2212     } # INSCOPE
2213    
2214     if ($open_elements->[-1]->[1] ne $token->{tag_name}) {
2215     !!!parse-error;
2216     }
2217    
2218     splice @$open_elements, $i if defined $i;
2219     undef $form_element if $token->{tag_name} eq 'form';
2220     $clear_up_to_marker->()
2221     if {
2222     button => 1, marquee => 1, object => 1,
2223     }->{$token->{tag_name}};
2224     !!!next-token;
2225     return;
2226     } elsif ({
2227     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2228     }->{$token->{tag_name}}) {
2229     ## has an element in scope
2230     my $i;
2231     INSCOPE: for (reverse 0..$#$open_elements) {
2232     my $node = $open_elements->[$_];
2233     if ({
2234     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2235     }->{$node->[1]}) {
2236     ## generate implied end tags
2237     if ({
2238     dd => 1, dt => 1, li => 1, p => 1,
2239     td => 1, th => 1, tr => 1,
2240     }->{$open_elements->[-1]->[1]}) {
2241     !!!back-token;
2242     $token = {type => 'end tag',
2243     tag_name => $open_elements->[-1]->[1]}; # MUST
2244     return;
2245     }
2246     $i = $_;
2247     last INSCOPE;
2248     } elsif ({
2249     table => 1, caption => 1, td => 1, th => 1,
2250     button => 1, marquee => 1, object => 1, html => 1,
2251     }->{$node->[1]}) {
2252     last INSCOPE;
2253     }
2254     } # INSCOPE
2255    
2256     if ($open_elements->[-1]->[1] ne $token->{tag_name}) {
2257     !!!parse-error;
2258     }
2259    
2260     splice @$open_elements, $i if defined $i;
2261     !!!next-token;
2262     return;
2263     } elsif ({
2264     a => 1,
2265     b => 1, big => 1, em => 1, font => 1, i => 1,
2266     nobr => 1, s => 1, small => 1, strile => 1,
2267     strong => 1, tt => 1, u => 1,
2268     }->{$token->{tag_name}}) {
2269     $formatting_end_tag->($token->{tag_name});
2270     return;
2271     } elsif ({
2272     caption => 1, col => 1, colgroup => 1, frame => 1,
2273     frameset => 1, head => 1, option => 1, optgroup => 1,
2274     tbody => 1, td => 1, tfoot => 1, th => 1,
2275     thead => 1, tr => 1,
2276     area => 1, basefont => 1, bgsound => 1, br => 1,
2277     embed => 1, hr => 1, iframe => 1, image => 1,
2278     img => 1, input => 1, isindex=> 1, noembed => 1,
2279     noframes => 1, param => 1, select => 1, spacer => 1,
2280     table => 1, textarea => 1, wbr => 1,
2281     noscript => 0, ## TODO: if scripting is enabled
2282     }->{$token->{tag_name}}) {
2283     !!!parse-error;
2284     ## Ignore the token
2285     !!!next-token;
2286     return;
2287    
2288     ## ISSUE: Issue on HTML5 new elements in spec
2289    
2290     } else {
2291     ## Step 1
2292     my $node_i = -1;
2293     my $node = $open_elements->[$node_i];
2294    
2295     ## Step 2
2296     S2: {
2297     if ($node->[1] eq $token->{tag_name}) {
2298     ## Step 1
2299     ## generate implied end tags
2300     if ({
2301     dd => 1, dt => 1, li => 1, p => 1,
2302     td => 1, th => 1, tr => 1,
2303     }->{$open_elements->[-1]->[1]}) {
2304     !!!back-token;
2305     $token = {type => 'end tag',
2306     tag_name => $open_elements->[-1]->[1]}; # MUST
2307     return;
2308     }
2309    
2310     ## Step 2
2311     if ($token->{tag_name} ne $open_elements->[-1]->[1]) {
2312     !!!parse-error;
2313     }
2314    
2315     ## Step 3
2316     splice @$open_elements, $node_i;
2317     last S2;
2318     } else {
2319     ## Step 3
2320     if (not $formatting_category->{$node->[1]} and
2321     #not $phrasing_category->{$node->[1]} and
2322     ($special_category->{$node->[1]} or
2323     $scoping_category->{$node->[1]})) {
2324     !!!parse-error;
2325     ## Ignore the token
2326     !!!next-token;
2327     last S2;
2328     }
2329     }
2330    
2331     ## Step 4
2332     $node_i--;
2333     $node = $open_elements->[$node_i];
2334    
2335     ## Step 5;
2336     redo S2;
2337     } # S2
2338     }
2339     }
2340     }; # $in_body
2341    
2342     B: {
2343     if ($phase eq 'initial') {
2344     if ($token->{type} eq 'DOCTYPE') {
2345     if ($token->{error}) {
2346     ## ISSUE: Spec currently left this case undefined.
2347     }
2348     my $doctype = $self->{document}->create_document_type_definition
2349     ($token->{name});
2350     $self->{document}->append_child ($doctype);
2351     $phase = 'root element';
2352     !!!next-token;
2353     redo B;
2354     } elsif ({
2355     comment => 1,
2356     'start tag' => 1,
2357     'end tag' => 1,
2358     'end-of-file' => 1,
2359     }->{$token->{type}}) {
2360     ## ISSUE: Spec currently left this case undefined.
2361     $phase = 'root element';
2362     ## reprocess
2363     redo B;
2364     } elsif ($token->{type} eq 'character') {
2365     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2366     $self->{document}->manakai_append_text ($1);
2367     ## ISSUE: DOM3 Core does not allow Document > Text
2368     unless (length $token->{data}) {
2369     ## Stay in the phase
2370     !!!next-token;
2371     redo B;
2372     }
2373     }
2374     ## ISSUE: Spec currently left this case undefined.
2375     $phase = 'root element';
2376     ## reprocess
2377     redo B;
2378     } else {
2379     die "$0: $token->{type}: Unknown token";
2380     }
2381     } elsif ($phase eq 'root element') {
2382     if ($token->{type} eq 'DOCTYPE') {
2383     !!!parse-error;
2384     ## Ignore the token
2385     ## Stay in the phase
2386     !!!next-token;
2387     redo B;
2388     } elsif ($token->{type} eq 'comment') {
2389     my $comment = $self->{document}->create_comment ($token->{data});
2390     $self->{document}->append_child ($comment);
2391     ## Stay in the phase
2392     !!!next-token;
2393     redo B;
2394     } elsif ($token->{type} eq 'character') {
2395     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2396     $self->{document}->manakai_append_text ($1);
2397     ## ISSUE: DOM3 Core does not allow Document > Text
2398     unless (length $token->{data}) {
2399     ## Stay in the phase
2400     !!!next-token;
2401     redo B;
2402     }
2403     }
2404     #
2405     } elsif ({
2406     'start tag' => 1,
2407     'end tag' => 1,
2408     'end-of-file' => 1,
2409     }->{$token->{type}}) {
2410     ## ISSUE: There is an issue in the spec
2411     #
2412     } else {
2413     die "$0: $token->{type}: Unknown token";
2414     }
2415     my $root_element; !!!create-element ($root_element, 'html');
2416     $self->{document}->append_child ($root_element);
2417     $open_elements = [[$root_element, 'html']];
2418     $phase = 'main';
2419     ## reprocess
2420     redo B;
2421     } elsif ($phase eq 'main') {
2422     if ($token->{type} eq 'DOCTYPE') {
2423     !!!parse-error;
2424     ## Ignore the token
2425     ## Stay in the phase
2426     !!!next-token;
2427     redo B;
2428     } elsif ($token->{type} eq 'start tag' and
2429     $token->{tag_name} eq 'html') {
2430     ## TODO: unless it is the first start tag token, parse-error
2431     my $top_el = $open_elements->[0]->[0];
2432     for my $attr_name (keys %{$token->{attributes}}) {
2433     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
2434     $top_el->set_attribute_ns (undef, [undef, $attr_name],
2435     $token->{attributes}->{value});
2436     }
2437     }
2438     !!!next-token;
2439     redo B;
2440     } elsif ($token->{type} eq 'end-of-file') {
2441     ## Generate implied end tags
2442     if ({
2443     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
2444     }->{$open_elements->[-1]->[1]}) {
2445     !!!back-token;
2446     $token = {type => 'end tag', tag_name => $open_elements->[-1]->[1]};
2447     redo B;
2448     }
2449    
2450     if (@$open_elements > 2 or
2451     (@$open_elements == 2 and $open_elements->[1]->[1] ne 'body')) {
2452     !!!parse-error;
2453     } else {
2454     ## TODO: inner_html parser and @$open_elements > 1 and $open_elements->[1] ne 'body', then parse-error
2455     }
2456    
2457     ## Stop parsing
2458     last B;
2459    
2460     ## ISSUE: There is an issue in the spec.
2461     } else {
2462     if ($insertion_mode eq 'before head') {
2463     if ($token->{type} eq 'character') {
2464     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2465     $open_elements->[-1]->[0]->manakai_append_text ($1);
2466     unless (length $token->{data}) {
2467     !!!next-token;
2468     redo B;
2469     }
2470     }
2471     ## As if <head>
2472     !!!create-element ($head_element, 'head');
2473     $open_elements->[-1]->[0]->append_child ($head_element);
2474     push @$open_elements, [$head_element, 'head'];
2475     $insertion_mode = 'in head';
2476     ## reprocess
2477     redo B;
2478     } elsif ($token->{type} eq 'comment') {
2479     my $comment = $self->{document}->create_comment ($token->{data});
2480     $open_elements->[-1]->[0]->append_child ($comment);
2481     !!!next-token;
2482     redo B;
2483     } elsif ($token->{type} eq 'start tag') {
2484     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
2485     !!!create-element ($head_element, 'head', $attr);
2486     $open_elements->[-1]->[0]->append_child ($head_element);
2487     push @$open_elements, [$head_element, 'head'];
2488     $insertion_mode = 'in head';
2489     if ($token->{tag_name} eq 'head') {
2490     !!!next-token;
2491     #} elsif ({
2492     # base => 1, link => 1, meta => 1,
2493     # script => 1, style => 1, title => 1,
2494     # }->{$token->{tag_name}}) {
2495     # ## reprocess
2496     } else {
2497     ## reprocess
2498     }
2499     redo B;
2500     } elsif ($token->{type} eq 'end tag') {
2501     if ($token->{tag_name} eq 'html') {
2502     ## As if <head>
2503     !!!create-element ($head_element, 'head');
2504     $open_elements->[-1]->[0]->append_child ($head_element);
2505     push @$open_elements, [$head_element, 'head'];
2506     $insertion_mode = 'in head';
2507     ## reprocess
2508     redo B;
2509     } else {
2510     !!!parse-error;
2511     ## Ignore the token
2512     redo B;
2513     }
2514     } else {
2515     die "$0: $token->{type}: Unknown type";
2516     }
2517     } elsif ($insertion_mode eq 'in head') {
2518     if ($token->{type} eq 'character') {
2519     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2520     $open_elements->[-1]->[0]->manakai_append_text ($1);
2521     unless (length $token->{data}) {
2522     !!!next-token;
2523     redo B;
2524     }
2525     }
2526    
2527     #
2528     } elsif ($token->{type} eq 'comment') {
2529     my $comment = $self->{document}->create_comment ($token->{data});
2530     $open_elements->[-1]->[0]->append_child ($comment);
2531     !!!next-token;
2532     redo B;
2533     } elsif ($token->{type} eq 'start tag') {
2534     if ($token->{tag_name} eq 'title') {
2535     my $title_el; !!!create-element ($title_el, 'title');
2536     (defined $head_element ? $head_element : $open_elements->[-1]->[0])
2537     ->append_child ($title_el);
2538     $self->{content_model_flag} = 'RCDATA';
2539    
2540     my $text = '';
2541     !!!next-token;
2542     while ($token->{type} eq 'character') {
2543     $text .= $token->{data};
2544     !!!next-token;
2545     }
2546     if (length $text) {
2547     $title_el->manakai_append_text ($text);
2548     }
2549    
2550     $self->{content_model_flag} = 'PCDATA';
2551    
2552     if ($token->{type} eq 'end tag' and
2553     $token->{tag_name} eq 'title') {
2554     ## Ignore the token
2555     } else {
2556     !!!parse-error;
2557     ## ISSUE: And ignore?
2558     }
2559     !!!next-token;
2560     redo B;
2561     } elsif ($token->{tag_name} eq 'style') {
2562     $style_start_tag->();
2563     redo B;
2564     } elsif ($token->{tag_name} eq 'script') {
2565     $script_start_tag->();
2566     redo B;
2567     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
2568     ## NOTE: There are "as if in head" code clones
2569     my $el;
2570     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2571     (defined $head_element ? $head_element : $open_elements->[-1]->[0])
2572     ->append_child ($el);
2573    
2574     ## ISSUE: Issue on magical <base> in the spec
2575    
2576     !!!next-token;
2577     redo B;
2578     } elsif ($token->{tag_name} eq 'head') {
2579     !!!parse-error;
2580     ## Ignore the token
2581     !!!next-token;
2582     redo B;
2583     } else {
2584     #
2585     }
2586     } elsif ($token->{type} eq 'end tag') {
2587     if ($token->{tag_name} eq 'head') {
2588     if ($open_elements->[-1]->[1] eq 'head') {
2589     pop @$open_elements;
2590     } else {
2591     !!!parse-error;
2592     }
2593     $insertion_mode = 'after head';
2594     !!!next-token;
2595     redo B;
2596     } elsif ($token->{tag_name} eq 'html') {
2597     #
2598     } else {
2599     !!!parse-error;
2600     ## Ignore the token
2601     !!!next-token;
2602     redo B;
2603     }
2604     } else {
2605     #
2606     }
2607    
2608     if ($open_elements->[-1]->[1] eq 'head') {
2609     ## As if </head>
2610     pop @$open_elements;
2611     }
2612     $insertion_mode = 'after head';
2613     ## reprocess
2614     redo B;
2615    
2616     ## ISSUE: An issue in the spec.
2617     } elsif ($insertion_mode eq 'after head') {
2618     if ($token->{type} eq 'character') {
2619     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2620     $open_elements->[-1]->[0]->manakai_append_text ($1);
2621     unless (length $token->{data}) {
2622     !!!next-token;
2623     redo B;
2624     }
2625     }
2626    
2627     #
2628     } elsif ($token->{type} eq 'comment') {
2629     my $comment = $self->{document}->create_comment ($token->{data});
2630     $open_elements->[-1]->[0]->append_child ($comment);
2631     !!!next-token;
2632     redo B;
2633     } elsif ($token->{type} eq 'start tag') {
2634     if ($token->{tag_name} eq 'body') {
2635     !!!insert-element ('body', $token->{attributes});
2636     $insertion_mode = 'in body';
2637     !!!next-token;
2638     redo B;
2639     } elsif ($token->{tag_name} eq 'frameset') {
2640     !!!insert-element ('frameset', $token->{attributes});
2641     $insertion_mode = 'in frameset';
2642     !!!next-token;
2643     redo B;
2644     } elsif ({
2645     base => 1, link => 1, meta => 1,
2646     script=> 1, style => 1, title => 1,
2647     }->{$token->{tag_name}}) {
2648     !!!parse-error;
2649     $insertion_mode = 'in head';
2650     ## reprocess
2651     redo B;
2652     } else {
2653     #
2654     }
2655     } else {
2656     #
2657     }
2658    
2659     ## As if <body>
2660     !!!insert-element ('body');
2661     $insertion_mode = 'in body';
2662     ## reprocess
2663     redo B;
2664     } elsif ($insertion_mode eq 'in body') {
2665     if ($token->{type} eq 'character') {
2666     ## NOTE: There is a code clone of "character in body".
2667     $reconstruct_active_formatting_elements->();
2668    
2669     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
2670    
2671     !!!next-token;
2672     redo B;
2673     } elsif ($token->{type} eq 'comment') {
2674     ## NOTE: There is a code clone of "comment in body".
2675     my $comment = $self->{document}->create_comment ($token->{data});
2676     $open_elements->[-1]->[0]->append_child ($comment);
2677     !!!next-token;
2678     redo B;
2679     } else {
2680     $in_body->(sub {
2681     $open_elements->[-1]->[0]->append_child (shift);
2682     });
2683     redo B;
2684     }
2685     } elsif ($insertion_mode eq 'in table') {
2686     if ($token->{type} eq 'character') {
2687     $reconstruct_active_formatting_elements->();
2688    
2689     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
2690    
2691     !!!next-token;
2692     redo B;
2693     } elsif ($token->{type} eq 'comment') {
2694     my $comment = $self->{document}->create_comment ($token->{data});
2695     $open_elements->[-1]->[0]->append_child ($comment);
2696     !!!next-token;
2697     redo B;
2698     } elsif ($token->{type} eq 'start tag') {
2699     if ({
2700     caption => 1,
2701     colgroup => 1,
2702     tbody => 1, tfoot => 1, thead => 1,
2703     }->{$token->{tag_name}}) {
2704     ## Clear back to table context
2705     while ($open_elements->[-1]->[1] ne 'table' and
2706     $open_elements->[-1]->[1] ne 'html') {
2707     !!!parse-error;
2708     pop @$open_elements;
2709     }
2710    
2711     push @$active_formatting_elements, ['#marker', '']
2712     if $token->{tag_name} eq 'caption';
2713    
2714     !!!insert-element ($token->{tag_name}, $token->{attributes});
2715     $insertion_mode = {
2716     caption => 'in caption',
2717     colgroup => 'in column group',
2718     tbody => 'in table body',
2719     tfoot => 'in table body',
2720     thead => 'in table body',
2721     }->{$token->{tag_name}};
2722     !!!next-token;
2723     redo B;
2724     } elsif ({
2725     col => 1,
2726     td => 1, th => 1, tr => 1,
2727     }->{$token->{tag_name}}) {
2728     ## Clear back to table context
2729     while ($open_elements->[-1]->[1] ne 'table' and
2730     $open_elements->[-1]->[1] ne 'html') {
2731     !!!parse-error;
2732     pop @$open_elements;
2733     }
2734    
2735     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
2736     $insertion_mode = $token->{tag_name} eq 'col'
2737     ? 'in column group' : 'in table body';
2738     ## reprocess
2739     redo B;
2740     } elsif ($token->{tag_name} eq 'table') {
2741     ## NOTE: There are code clones for this "table in table"
2742     !!!parse-error;
2743    
2744     ## As if </table>
2745     ## have a table element in table scope
2746     my $i;
2747     INSCOPE: for (reverse 0..$#$open_elements) {
2748     my $node = $open_elements->[$_];
2749     if ($node->[1] eq 'table') {
2750     $i = $_;
2751     last INSCOPE;
2752     } elsif ({
2753     table => 1, html => 1,
2754     }->{$node->[1]}) {
2755     last INSCOPE;
2756     }
2757     } # INSCOPE
2758     unless (defined $i) {
2759     !!!parse-error;
2760     ## Ignore tokens </table><table>
2761     !!!next-token;
2762     redo B;
2763     }
2764    
2765     ## generate implied end tags
2766     if ({
2767     dd => 1, dt => 1, li => 1, p => 1,
2768     td => 1, th => 1, tr => 1,
2769     }->{$open_elements->[-1]->[1]}) {
2770     !!!back-token; # <table>
2771     $token = {type => 'end tag', tag_name => 'table'};
2772     !!!back-token;
2773     $token = {type => 'end tag',
2774     tag_name => $open_elements->[-1]->[1]}; # MUST
2775     redo B;
2776     }
2777    
2778     if ($open_elements->[-1]->[1] ne 'table') {
2779     !!!parse-error;
2780     }
2781    
2782     splice @$open_elements, $i;
2783    
2784     $reset_insertion_mode->();
2785    
2786     ## reprocess
2787     redo B;
2788     } else {
2789     #
2790     }
2791     } elsif ($token->{type} eq 'end tag') {
2792     if ($token->{tag_name} eq 'table') {
2793     ## have a table element in table scope
2794     my $i;
2795     INSCOPE: for (reverse 0..$#$open_elements) {
2796     my $node = $open_elements->[$_];
2797     if ($node->[1] eq $token->{tag_name}) {
2798     $i = $_;
2799     last INSCOPE;
2800     } elsif ({
2801     table => 1, html => 1,
2802     }->{$node->[1]}) {
2803     last INSCOPE;
2804     }
2805     } # INSCOPE
2806     unless (defined $i) {
2807     !!!parse-error;
2808     ## Ignore the token
2809     !!!next-token;
2810     redo B;
2811     }
2812    
2813     ## generate implied end tags
2814     if ({
2815     dd => 1, dt => 1, li => 1, p => 1,
2816     td => 1, th => 1, tr => 1,
2817     }->{$open_elements->[-1]->[1]}) {
2818     !!!back-token;
2819     $token = {type => 'end tag',
2820     tag_name => $open_elements->[-1]->[1]}; # MUST
2821     redo B;
2822     }
2823    
2824     if ($open_elements->[-1]->[1] ne 'table') {
2825     !!!parse-error;
2826     }
2827    
2828     splice @$open_elements, $i;
2829    
2830     $reset_insertion_mode->();
2831    
2832     !!!next-token;
2833     redo B;
2834     } elsif ({
2835     body => 1, caption => 1, col => 1, colgroup => 1,
2836     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
2837     thead => 1, tr => 1,
2838     }->{$token->{tag_name}}) {
2839     !!!parse-error;
2840     ## Ignore the token
2841     !!!next-token;
2842     redo B;
2843     } else {
2844     #
2845     }
2846     } else {
2847     #
2848     }
2849    
2850     ## NOTE: There are code clones of "misc in table".
2851     !!!parse-error;
2852     $in_body->(sub {
2853     my $child = shift;
2854     if ({
2855     table => 1, tbody => 1, tfoot => 1,
2856     thead => 1, tr => 1,
2857     }->{$open_elements->[-1]->[1]}) {
2858     # MUST
2859     my $foster_parent_element;
2860     my $next_sibling;
2861     OE: for (reverse 0..$#$open_elements) {
2862     if ($open_elements->[$_]->[1] eq 'table') {
2863     my $parent = $open_elements->[$_]->[0]->parent_node;
2864     if (defined $parent and $parent->node_type == 1) {
2865     $foster_parent_element = $parent;
2866     $next_sibling = $open_elements->[$_]->[0];
2867     } else {
2868     $foster_parent_element
2869     = $open_elements->[$_ - 1]->[0];
2870     }
2871     last OE;
2872     }
2873     } # OE
2874     $foster_parent_element = $open_elements->[0]->[0]
2875     unless defined $foster_parent_element;
2876     $foster_parent_element->insert_before
2877     ($child, $next_sibling);
2878     } else {
2879     $open_elements->[-1]->[0]->append_child ($child);
2880     }
2881     });
2882     redo B;
2883     } elsif ($insertion_mode eq 'in caption') {
2884     if ($token->{type} eq 'start tag') {
2885     if ({
2886     caption => 1, col => 1, colgroup => 1, tbody => 1,
2887     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2888     }->{$token->{tag_name}}) {
2889     !!!parse-error;
2890    
2891     ## As if </caption>
2892     ## have a table element in table scope
2893     my $i;
2894     INSCOPE: for (reverse 0..$#$open_elements) {
2895     my $node = $open_elements->[$_];
2896     if ($node->[1] eq 'caption') {
2897     $i = $_;
2898     last INSCOPE;
2899     } elsif ({
2900     table => 1, html => 1,
2901     }->{$node->[1]}) {
2902     last INSCOPE;
2903     }
2904     } # INSCOPE
2905     unless (defined $i) {
2906     !!!parse-error;
2907     ## Ignore the token
2908     !!!next-token;
2909     redo B;
2910     }
2911    
2912     ## generate implied end tags
2913     if ({
2914     dd => 1, dt => 1, li => 1, p => 1,
2915     td => 1, th => 1, tr => 1,
2916     }->{$open_elements->[-1]->[1]}) {
2917     !!!back-token; # <?>
2918     $token = {type => 'end tag', tag_name => 'caption'};
2919     !!!back-token;
2920     $token = {type => 'end tag',
2921     tag_name => $open_elements->[-1]->[1]}; # MUST
2922     redo B;
2923     }
2924    
2925     if ($open_elements->[-1]->[1] ne 'caption') {
2926     !!!parse-error;
2927     }
2928    
2929     splice @$open_elements, $i;
2930    
2931     $clear_up_to_marker->();
2932    
2933     $insertion_mode = 'in table';
2934    
2935     ## reprocess
2936     redo B;
2937     } else {
2938     #
2939     }
2940     } elsif ($token->{type} eq 'end tag') {
2941     if ($token->{tag_name} eq 'caption') {
2942     ## have a table element in table scope
2943     my $i;
2944     INSCOPE: for (reverse 0..$#$open_elements) {
2945     my $node = $open_elements->[$_];
2946     if ($node->[1] eq $token->{tag_name}) {
2947     $i = $_;
2948     last INSCOPE;
2949     } elsif ({
2950     table => 1, html => 1,
2951     }->{$node->[1]}) {
2952     last INSCOPE;
2953     }
2954     } # INSCOPE
2955     unless (defined $i) {
2956     !!!parse-error;
2957     ## Ignore the token
2958     !!!next-token;
2959     redo B;
2960     }
2961    
2962     ## generate implied end tags
2963     if ({
2964     dd => 1, dt => 1, li => 1, p => 1,
2965     td => 1, th => 1, tr => 1,
2966     }->{$open_elements->[-1]->[1]}) {
2967     !!!back-token;
2968     $token = {type => 'end tag',
2969     tag_name => $open_elements->[-1]->[1]}; # MUST
2970     redo B;
2971     }
2972    
2973     if ($open_elements->[-1]->[1] ne 'caption') {
2974     !!!parse-error;
2975     }
2976    
2977     splice @$open_elements, $i;
2978    
2979     $clear_up_to_marker->();
2980    
2981     $insertion_mode = 'in table';
2982    
2983     !!!next-token;
2984     redo B;
2985     } elsif ($token->{tag_name} eq 'table') {
2986     !!!parse-error;
2987    
2988     ## As if </caption>
2989     ## have a table element in table scope
2990     my $i;
2991     INSCOPE: for (reverse 0..$#$open_elements) {
2992     my $node = $open_elements->[$_];
2993     if ($node->[1] eq 'caption') {
2994     $i = $_;
2995     last INSCOPE;
2996     } elsif ({
2997     table => 1, html => 1,
2998     }->{$node->[1]}) {
2999     last INSCOPE;
3000     }
3001     } # INSCOPE
3002     unless (defined $i) {
3003     !!!parse-error;
3004     ## Ignore the token
3005     !!!next-token;
3006     redo B;
3007     }
3008    
3009     ## generate implied end tags
3010     if ({
3011     dd => 1, dt => 1, li => 1, p => 1,
3012     td => 1, th => 1, tr => 1,
3013     }->{$open_elements->[-1]->[1]}) {
3014     !!!back-token; # </table>
3015     $token = {type => 'end tag', tag_name => 'caption'};
3016     !!!back-token;
3017     $token = {type => 'end tag',
3018     tag_name => $open_elements->[-1]->[1]}; # MUST
3019     redo B;
3020     }
3021    
3022     if ($open_elements->[-1]->[1] ne 'caption') {
3023     !!!parse-error;
3024     }
3025    
3026     splice @$open_elements, $i;
3027    
3028     $clear_up_to_marker->();
3029    
3030     $insertion_mode = 'in table';
3031    
3032     ## reprocess
3033     redo B;
3034     } elsif ({
3035     body => 1, col => 1, colgroup => 1,
3036     html => 1, tbody => 1, td => 1, tfoot => 1,
3037     th => 1, thead => 1, tr => 1,
3038     }->{$token->{tag_name}}) {
3039     !!!parse-error;
3040     ## Ignore the token
3041     redo B;
3042     } else {
3043     #
3044     }
3045     } else {
3046     #
3047     }
3048    
3049     $in_body->(sub {
3050     $open_elements->[-1]->[0]->append_child (shift);
3051     });
3052     redo B;
3053     } elsif ($insertion_mode eq 'in column group') {
3054     if ($token->{type} eq 'character') {
3055     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3056     $open_elements->[-1]->[0]->manakai_append_text ($1);
3057     unless (length $token->{data}) {
3058     !!!next-token;
3059     redo B;
3060     }
3061     }
3062    
3063     #
3064     } elsif ($token->{type} eq 'comment') {
3065     my $comment = $self->{document}->create_comment ($token->{data});
3066     $open_elements->[-1]->[0]->append_child ($comment);
3067     !!!next-token;
3068     redo B;
3069     } elsif ($token->{type} eq 'start tag') {
3070     if ($token->{tag_name} eq 'col') {
3071     !!!insert-element ($token->{tag_name}, $token->{attributes});
3072     pop @$open_elements;
3073     !!!next-token;
3074     redo B;
3075     } else {
3076     #
3077     }
3078     } elsif ($token->{type} eq 'end tag') {
3079     if ($token->{tag_name} eq 'colgroup') {
3080     if ($open_elements->[-1]->[1] eq 'html') {
3081     !!!parse-error;
3082     ## Ignore the token
3083     !!!next-token;
3084     redo B;
3085     } else {
3086     pop @$open_elements; # colgroup
3087     $insertion_mode = 'in table';
3088     !!!next-token;
3089     redo B;
3090     }
3091     } elsif ($token->{tag_name} eq 'col') {
3092     !!!parse-error;
3093     ## Ignore the token
3094     !!!next-token;
3095     redo B;
3096     } else {
3097     #
3098     }
3099     } else {
3100     #
3101     }
3102    
3103     ## As if </colgroup>
3104     if ($open_elements->[-1]->[1] eq 'html') {
3105     !!!parse-error;
3106     ## Ignore the token
3107     !!!next-token;
3108     redo B;
3109     } else {
3110     pop @$open_elements; # colgroup
3111     $insertion_mode = 'in table';
3112     ## reprocess
3113     redo B;
3114     }
3115     } elsif ($insertion_mode eq 'in table body') {
3116     if ($token->{type} eq 'character') {
3117     ## Copied from 'in table'
3118     $reconstruct_active_formatting_elements->();
3119    
3120     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3121    
3122     !!!next-token;
3123     redo B;
3124     } elsif ($token->{type} eq 'comment') {
3125     ## Copied from 'in table'
3126     my $comment = $self->{document}->create_comment ($token->{data});
3127     $open_elements->[-1]->[0]->append_child ($comment);
3128     !!!next-token;
3129     redo B;
3130     } elsif ($token->{type} eq 'start tag') {
3131     if ({
3132     tr => 1,
3133     th => 1, td => 1,
3134     }->{$token->{tag_name}}) {
3135     ## Clear back to table body context
3136     while (not {
3137     tbody => 1, tfoot => 1, thead => 1, html => 1,
3138     }->{$open_elements->[-1]->[1]}) {
3139     !!!parse-error;
3140     pop @$open_elements;
3141     }
3142    
3143     $insertion_mode = 'in row';
3144     if ($token->{tag_name} eq 'tr') {
3145     !!!insert-element ($token->{tag_name}, $token->{attributes});
3146     !!!next-token;
3147     } else {
3148     !!!insert-element ('tr');
3149     ## reprocess
3150     }
3151     redo B;
3152     } elsif ({
3153     caption => 1, col => 1, colgroup => 1,
3154     tbody => 1, tfoot => 1, thead => 1,
3155     }->{$token->{tag_name}}) {
3156     ## have an element in table scope
3157     my $i;
3158     INSCOPE: for (reverse 0..$#$open_elements) {
3159     my $node = $open_elements->[$_];
3160     if ({
3161     tbody => 1, thead => 1, tfoot => 1,
3162     }->{$node->[1]}) {
3163     $i = $_;
3164     last INSCOPE;
3165     } elsif ({
3166     table => 1, html => 1,
3167     }->{$node->[1]}) {
3168     last INSCOPE;
3169     }
3170     } # INSCOPE
3171     unless (defined $i) {
3172     !!!parse-error;
3173     ## Ignore the token
3174     !!!next-token;
3175     redo B;
3176     }
3177    
3178     ## Clear back to table body context
3179     while (not {
3180     tbody => 1, tfoot => 1, thead => 1, html => 1,
3181     }->{$open_elements->[-1]->[1]}) {
3182     !!!parse-error;
3183     pop @$open_elements;
3184     }
3185    
3186     ## As if <{current node}>
3187     ## have an element in table scope
3188     ## true by definition
3189    
3190     ## Clear back to table body context
3191     ## nop by definition
3192    
3193     pop @$open_elements;
3194     $insertion_mode = 'in table';
3195     ## reprocess
3196     redo B;
3197     } elsif ($token->{tag_name} eq 'table') {
3198     ## NOTE: This is a code clone of "table in table"
3199     !!!parse-error;
3200    
3201     ## As if </table>
3202     ## have a table element in table scope
3203     my $i;
3204     INSCOPE: for (reverse 0..$#$open_elements) {
3205     my $node = $open_elements->[$_];
3206     if ($node->[1] eq 'table') {
3207     $i = $_;
3208     last INSCOPE;
3209     } elsif ({
3210     table => 1, html => 1,
3211     }->{$node->[1]}) {
3212     last INSCOPE;
3213     }
3214     } # INSCOPE
3215     unless (defined $i) {
3216     !!!parse-error;
3217     ## Ignore tokens </table><table>
3218     !!!next-token;
3219     redo B;
3220     }
3221    
3222     ## generate implied end tags
3223     if ({
3224     dd => 1, dt => 1, li => 1, p => 1,
3225     td => 1, th => 1, tr => 1,
3226     }->{$open_elements->[-1]->[1]}) {
3227     !!!back-token; # <table>
3228     $token = {type => 'end tag', tag_name => 'table'};
3229     !!!back-token;
3230     $token = {type => 'end tag',
3231     tag_name => $open_elements->[-1]->[1]}; # MUST
3232     redo B;
3233     }
3234    
3235     if ($open_elements->[-1]->[1] ne 'table') {
3236     !!!parse-error;
3237     }
3238    
3239     splice @$open_elements, $i;
3240    
3241     $reset_insertion_mode->();
3242    
3243     ## reprocess
3244     redo B;
3245     } else {
3246     #
3247     }
3248     } elsif ($token->{type} eq 'end tag') {
3249     if ({
3250     tbody => 1, tfoot => 1, thead => 1,
3251     }->{$token->{tag_name}}) {
3252     ## have an element in table scope
3253     my $i;
3254     INSCOPE: for (reverse 0..$#$open_elements) {
3255     my $node = $open_elements->[$_];
3256     if ($node->[1] eq $token->{tag_name}) {
3257     $i = $_;
3258     last INSCOPE;
3259     } elsif ({
3260     table => 1, html => 1,
3261     }->{$node->[1]}) {
3262     last INSCOPE;
3263     }
3264     } # INSCOPE
3265     unless (defined $i) {
3266     !!!parse-error;
3267     ## Ignore the token
3268     !!!next-token;
3269     redo B;
3270     }
3271    
3272     ## Clear back to table body context
3273     while (not {
3274     tbody => 1, tfoot => 1, thead => 1, html => 1,
3275     }->{$open_elements->[-1]->[1]}) {
3276     !!!parse-error;
3277     pop @$open_elements;
3278     }
3279    
3280     pop @$open_elements;
3281     $insertion_mode = 'in table';
3282     !!!next-token;
3283     redo B;
3284     } elsif ($token->{tag_name} eq 'table') {
3285     ## have an element in table scope
3286     my $i;
3287     INSCOPE: for (reverse 0..$#$open_elements) {
3288     my $node = $open_elements->[$_];
3289     if ({
3290     tbody => 1, thead => 1, tfoot => 1,
3291     }->{$node->[1]}) {
3292     $i = $_;
3293     last INSCOPE;
3294     } elsif ({
3295     table => 1, html => 1,
3296     }->{$node->[1]}) {
3297     last INSCOPE;
3298     }
3299     } # INSCOPE
3300     unless (defined $i) {
3301     !!!parse-error;
3302     ## Ignore the token
3303     !!!next-token;
3304     redo B;
3305     }
3306    
3307     ## Clear back to table body context
3308     while (not {
3309     tbody => 1, tfoot => 1, thead => 1, html => 1,
3310     }->{$open_elements->[-1]->[1]}) {
3311     !!!parse-error;
3312     pop @$open_elements;
3313     }
3314    
3315     ## As if <{current node}>
3316     ## have an element in table scope
3317     ## true by definition
3318    
3319     ## Clear back to table body context
3320     ## nop by definition
3321    
3322     pop @$open_elements;
3323     $insertion_mode = 'in table';
3324     ## reprocess
3325     redo B;
3326     } elsif ({
3327     body => 1, caption => 1, col => 1, colgroup => 1,
3328     html => 1, td => 1, th => 1, tr => 1,
3329     }->{$token->{tag_name}}) {
3330     !!!parse-error;
3331     ## Ignore the token
3332     !!!next-token;
3333     redo B;
3334     } else {
3335     #
3336     }
3337     } else {
3338     #
3339     }
3340    
3341     ## As if in table
3342     ## NOTE: This is a code clone of "misc in table".
3343     !!!parse-error;
3344     $in_body->(sub {
3345     my $child = shift;
3346     if ({
3347     table => 1, tbody => 1, tfoot => 1,
3348     thead => 1, tr => 1,
3349     }->{$open_elements->[-1]->[1]}) {
3350     # MUST
3351     my $foster_parent_element;
3352     my $next_sibling;
3353     OE: for (reverse 0..$#$open_elements) {
3354     if ($open_elements->[$_]->[1] eq 'table') {
3355     my $parent = $open_elements->[$_]->[0]->parent_node;
3356     if (defined $parent and $parent->node_type == 1) {
3357     $foster_parent_element = $parent;
3358     $next_sibling = $open_elements->[$_]->[0];
3359     } else {
3360     $foster_parent_element
3361     = $open_elements->[$_ - 1]->[0];
3362     }
3363     last OE;
3364     }
3365     } # OE
3366     $foster_parent_element = $open_elements->[0]->[0]
3367     unless defined $foster_parent_element;
3368     $foster_parent_element->insert_before
3369     ($child, $next_sibling);
3370     } else {
3371     $open_elements->[-1]->[0]->append_child ($child);
3372     }
3373     });
3374     redo B;
3375     } elsif ($insertion_mode eq 'in row') {
3376     if ($token->{type} eq 'character') {
3377     ## Copied from 'in table'
3378     $reconstruct_active_formatting_elements->();
3379    
3380     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3381    
3382     !!!next-token;
3383     redo B;
3384     } elsif ($token->{type} eq 'comment') {
3385     ## Copied from 'in table'
3386     my $comment = $self->{document}->create_comment ($token->{data});
3387     $open_elements->[-1]->[0]->append_child ($comment);
3388     !!!next-token;
3389     redo B;
3390     } elsif ($token->{type} eq 'start tag') {
3391     if ($token->{tag_name} eq 'th' or
3392     $token->{tag_name} eq 'td') {
3393     ## Clear back to table row context
3394     while (not {
3395     th => 1, td => 1, html => 1,
3396     }->{$open_elements->[-1]->[1]}) {
3397     !!!parse-error;
3398     pop @$open_elements;
3399     }
3400    
3401     !!!insert-element ($token->{tag_name}, $token->{attributes});
3402     $insertion_mode = 'in cell';
3403    
3404     push @$active_formatting_elements, ['#marker', ''];
3405    
3406     !!!next-token;
3407     redo B;
3408     } elsif ({
3409     caption => 1, col => 1, colgroup => 1,
3410     tbody => 1, tfoot => 1, thead => 1, tr => 1,
3411     }->{$token->{tag_name}}) {
3412     ## As if </tr>
3413     ## have an element in table scope
3414     my $i;
3415     INSCOPE: for (reverse 0..$#$open_elements) {
3416     my $node = $open_elements->[$_];
3417     if ($node->[1] eq 'tr') {
3418     $i = $_;
3419     last INSCOPE;
3420     } elsif ({
3421     table => 1, html => 1,
3422     }->{$node->[1]}) {
3423     last INSCOPE;
3424     }
3425     } # INSCOPE
3426     unless (defined $i) {
3427     !!!parse-error;
3428     ## Ignore the token
3429     !!!next-token;
3430     redo B;
3431     }
3432    
3433     ## Clear back to table row context
3434     while (not {
3435     tr => 1, html => 1,
3436     }->{$open_elements->[-1]->[1]}) {
3437     !!!parse-error;
3438     pop @$open_elements;
3439     }
3440    
3441     pop @$open_elements; # tr
3442     $insertion_mode = 'in table body';
3443     ## reprocess
3444     redo B;
3445     } elsif ($token->{tag_name} eq 'table') {
3446     ## NOTE: This is a code clone of "table in table"
3447     !!!parse-error;
3448    
3449     ## As if </table>
3450     ## have a table element in table scope
3451     my $i;
3452     INSCOPE: for (reverse 0..$#$open_elements) {
3453     my $node = $open_elements->[$_];
3454     if ($node->[1] eq 'table') {
3455     $i = $_;
3456     last INSCOPE;
3457     } elsif ({
3458     table => 1, html => 1,
3459     }->{$node->[1]}) {
3460     last INSCOPE;
3461     }
3462     } # INSCOPE
3463     unless (defined $i) {
3464     !!!parse-error;
3465     ## Ignore tokens </table><table>
3466     !!!next-token;
3467     redo B;
3468     }
3469    
3470     ## generate implied end tags
3471     if ({
3472     dd => 1, dt => 1, li => 1, p => 1,
3473     td => 1, th => 1, tr => 1,
3474     }->{$open_elements->[-1]->[1]}) {
3475     !!!back-token; # <table>
3476     $token = {type => 'end tag', tag_name => 'table'};
3477     !!!back-token;
3478     $token = {type => 'end tag',
3479     tag_name => $open_elements->[-1]->[1]}; # MUST
3480     redo B;
3481     }
3482    
3483     if ($open_elements->[-1]->[1] ne 'table') {
3484     !!!parse-error;
3485     }
3486    
3487     splice @$open_elements, $i;
3488    
3489     $reset_insertion_mode->();
3490    
3491     ## reprocess
3492     redo B;
3493     } else {
3494     #
3495     }
3496     } elsif ($token->{type} eq 'end tag') {
3497     if ($token->{tag_name} eq 'tr') {
3498     ## have an element in table scope
3499     my $i;
3500     INSCOPE: for (reverse 0..$#$open_elements) {
3501     my $node = $open_elements->[$_];
3502     if ($node->[1] eq $token->{tag_name}) {
3503     $i = $_;
3504     last INSCOPE;
3505     } elsif ({
3506     table => 1, html => 1,
3507     }->{$node->[1]}) {
3508     last INSCOPE;
3509     }
3510     } # INSCOPE
3511     unless (defined $i) {
3512     !!!parse-error;
3513     ## Ignore the token
3514     !!!next-token;
3515     redo B;
3516     }
3517    
3518     ## Clear back to table row context
3519     while (not {
3520     tr => 1, html => 1,
3521     }->{$open_elements->[-1]->[1]}) {
3522     !!!parse-error;
3523     pop @$open_elements;
3524     }
3525    
3526     pop @$open_elements; # tr
3527     $insertion_mode = 'in table body';
3528     !!!next-token;
3529     redo B;
3530     } elsif ($token->{tag_name} eq 'table') {
3531     ## As if </tr>
3532     ## have an element in table scope
3533     my $i;
3534     INSCOPE: for (reverse 0..$#$open_elements) {
3535     my $node = $open_elements->[$_];
3536     if ($node->[1] eq 'tr') {
3537     $i = $_;
3538     last INSCOPE;
3539     } elsif ({
3540     table => 1, html => 1,
3541     }->{$node->[1]}) {
3542     last INSCOPE;
3543     }
3544     } # INSCOPE
3545     unless (defined $i) {
3546     !!!parse-error;
3547     ## Ignore the token
3548     !!!next-token;
3549     redo B;
3550     }
3551    
3552     ## Clear back to table row context
3553     while (not {
3554     tr => 1, html => 1,
3555     }->{$open_elements->[-1]->[1]}) {
3556     !!!parse-error;
3557     pop @$open_elements;
3558     }
3559    
3560     pop @$open_elements; # tr
3561     $insertion_mode = 'in table body';
3562     ## reprocess
3563     redo B;
3564     } elsif ({
3565     tbody => 1, tfoot => 1, thead => 1,
3566     }->{$token->{tag_name}}) {
3567     ## have an element in table scope
3568     my $i;
3569     INSCOPE: for (reverse 0..$#$open_elements) {
3570     my $node = $open_elements->[$_];
3571     if ($node->[1] eq $token->{tag_name}) {
3572     $i = $_;
3573     last INSCOPE;
3574     } elsif ({
3575     table => 1, html => 1,
3576     }->{$node->[1]}) {
3577     last INSCOPE;
3578     }
3579     } # INSCOPE
3580     unless (defined $i) {
3581     !!!parse-error;
3582     ## Ignore the token
3583     !!!next-token;
3584     redo B;
3585     }
3586    
3587     ## As if </tr>
3588     ## have an element in table scope
3589     my $i;
3590     INSCOPE: for (reverse 0..$#$open_elements) {
3591     my $node = $open_elements->[$_];
3592     if ($node->[1] eq 'tr') {
3593     $i = $_;
3594     last INSCOPE;
3595     } elsif ({
3596     table => 1, html => 1,
3597     }->{$node->[1]}) {
3598     last INSCOPE;
3599     }
3600     } # INSCOPE
3601     unless (defined $i) {
3602     !!!parse-error;
3603     ## Ignore the token
3604     !!!next-token;
3605     redo B;
3606     }
3607    
3608     ## Clear back to table row context
3609     while (not {
3610     tr => 1, html => 1,
3611     }->{$open_elements->[-1]->[1]}) {
3612     !!!parse-error;
3613     pop @$open_elements;
3614     }
3615    
3616     pop @$open_elements; # tr
3617     $insertion_mode = 'in table body';
3618     ## reprocess
3619     redo B;
3620     } elsif ({
3621     body => 1, caption => 1, col => 1,
3622     colgroup => 1, html => 1, td => 1, th => 1,
3623     }->{$token->{tag_name}}) {
3624     !!!parse-error;
3625     ## Ignore the token
3626     !!!next-token;
3627     redo B;
3628     } else {
3629     #
3630     }
3631     } else {
3632     #
3633     }
3634    
3635     ## As if in table
3636     ## NOTE: This is a code clone of "misc in table".
3637     !!!parse-error;
3638     $in_body->(sub {
3639     my $child = shift;
3640     if ({
3641     table => 1, tbody => 1, tfoot => 1,
3642     thead => 1, tr => 1,
3643     }->{$open_elements->[-1]->[1]}) {
3644     # MUST
3645     my $foster_parent_element;
3646     my $next_sibling;
3647     OE: for (reverse 0..$#$open_elements) {
3648     if ($open_elements->[$_]->[1] eq 'table') {
3649     my $parent = $open_elements->[$_]->[0]->parent_node;
3650     if (defined $parent and $parent->node_type == 1) {
3651     $foster_parent_element = $parent;
3652     $next_sibling = $open_elements->[$_]->[0];
3653     } else {
3654     $foster_parent_element
3655     = $open_elements->[$_ - 1]->[0];
3656     }
3657     last OE;
3658     }
3659     } # OE
3660     $foster_parent_element = $open_elements->[0]->[0]
3661     unless defined $foster_parent_element;
3662     $foster_parent_element->insert_before
3663     ($child, $next_sibling);
3664     } else {
3665     $open_elements->[-1]->[0]->append_child ($child);
3666     }
3667     });
3668     redo B;
3669     } elsif ($insertion_mode eq 'in cell') {
3670     if ($token->{type} eq 'character') {
3671     ## NOTE: This is a code clone of "character in body".
3672     $reconstruct_active_formatting_elements->();
3673    
3674     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3675    
3676     !!!next-token;
3677     redo B;
3678     } elsif ($token->{type} eq 'comment') {
3679     ## NOTE: This is a code clone of "comment in body".
3680     my $comment = $self->{document}->create_comment ($token->{data});
3681     $open_elements->[-1]->[0]->append_child ($comment);
3682     !!!next-token;
3683     redo B;
3684     } elsif ($token->{type} eq 'start tag') {
3685     if ({
3686     caption => 1, col => 1, colgroup => 1,
3687     tbody => 1, td => 1, tfoot => 1, th => 1,
3688     thead => 1, tr => 1,
3689     }->{$token->{tag_name}}) {
3690     ## have an element in table scope
3691     my $tn;
3692     INSCOPE: for (reverse 0..$#$open_elements) {
3693     my $node = $open_elements->[$_];
3694     if ($node->[1] eq 'td' or $node->[1] eq 'th') {
3695     $tn = $node->[1];
3696     last INSCOPE;
3697     } elsif ({
3698     table => 1, html => 1,
3699     }->{$node->[1]}) {
3700     last INSCOPE;
3701     }
3702     } # INSCOPE
3703     unless (defined $tn) {
3704     !!!parse-error;
3705     ## Ignore the token
3706     !!!next-token;
3707     redo B;
3708     }
3709    
3710     ## Close the cell
3711     !!!back-token; # <?>
3712     $token = {type => 'end tag', tag_name => $tn};
3713     redo B;
3714     } else {
3715     #
3716     }
3717     } elsif ($token->{type} eq 'end tag') {
3718     if ($token->{type} eq 'td' or $token->{type} eq 'th') {
3719     ## have an element in table scope
3720     my $i;
3721     INSCOPE: for (reverse 0..$#$open_elements) {
3722     my $node = $open_elements->[$_];
3723     if ($node->[1] eq $token->{tag_name}) {
3724     $i = $_;
3725     last INSCOPE;
3726     } elsif ({
3727     table => 1, html => 1,
3728     }->{$node->[1]}) {
3729     last INSCOPE;
3730     }
3731     } # INSCOPE
3732     unless (defined $i) {
3733     !!!parse-error;
3734     ## Ignore the token
3735     !!!next-token;
3736     redo B;
3737     }
3738    
3739     ## generate implied end tags
3740     if ({
3741     dd => 1, dt => 1, li => 1, p => 1,
3742     td => ($token->{tag_name} eq 'th'),
3743     th => ($token->{tag_name} eq 'td'),
3744     tr => 1,
3745     }->{$open_elements->[-1]->[1]}) {
3746     !!!back-token;
3747     $token = {type => 'end tag',
3748     tag_name => $open_elements->[-1]->[1]}; # MUST
3749     redo B;
3750     }
3751    
3752     if ($open_elements->[-1]->[1] ne $token->{tag_name}) {
3753     !!!parse-error;
3754     }
3755    
3756     splice @$open_elements, $i;
3757    
3758     $clear_up_to_marker->();
3759    
3760     $insertion_mode = 'in row';
3761    
3762     !!!next-token;
3763     redo B;
3764     } elsif ({
3765     body => 1, caption => 1, col => 1,
3766     colgroup => 1, html => 1,
3767     }->{$token->{tag_name}}) {
3768     !!!parse-error;
3769     ## Ignore the token
3770     !!!next-token;
3771     redo B;
3772     } elsif ({
3773     table => 1, tbody => 1, tfoot => 1,
3774     thead => 1, tr => 1,
3775     }->{$token->{tag_name}}) {
3776     ## have an element in table scope
3777     my $i;
3778     my $tn;
3779     INSCOPE: for (reverse 0..$#$open_elements) {
3780     my $node = $open_elements->[$_];
3781     if ($node->[1] eq $token->{tag_name}) {
3782     $i = $_;
3783     $tn = $node->[1];
3784     last INSCOPE;
3785     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
3786     $tn = $node->[1];
3787     ## NOTE: There is exactly one |td| or |th| element
3788     ## in scope in the stack of open elements by definition.
3789     } elsif ({
3790     table => 1, html => 1,
3791     }->{$node->[1]}) {
3792     last INSCOPE;
3793     }
3794     } # INSCOPE
3795     unless (defined $i) {
3796     !!!parse-error;
3797     ## Ignore the token
3798     !!!next-token;
3799     redo B;
3800     }
3801    
3802     ## Close the cell
3803     !!!back-token; # </?>
3804     $token = {type => 'end tag', tag_name => $tn};
3805     redo B;
3806     } else {
3807     #
3808     }
3809     } else {
3810     #
3811     }
3812    
3813     $in_body->(sub {
3814     $open_elements->[-1]->[0]->append_child (shift);
3815     });
3816     redo B;
3817     } elsif ($insertion_mode eq 'in select') {
3818     if ($token->{type} eq 'character') {
3819     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3820     !!!next-token;
3821     redo B;
3822     } elsif ($token->{type} eq 'comment') {
3823     my $comment = $self->{document}->create_comment ($token->{data});
3824     $open_elements->[-1]->[0]->append_child ($comment);
3825     !!!next-token;
3826     redo B;
3827     } elsif ($token->{type} eq 'start tag') {
3828     if ($token->{tag_name} eq 'option') {
3829     if ($open_elements->[-1]->[1] eq 'option') {
3830     ## As if </option>
3831     pop @$open_elements;
3832     }
3833    
3834     !!!insert-element ($token->{tag_name}, $token->{attributes});
3835     !!!next-token;
3836     redo B;
3837     } elsif ($token->{tag_name} eq 'optgroup') {
3838     if ($open_elements->[-1]->[1] eq 'option') {
3839     ## As if </option>
3840     pop @$open_elements;
3841     }
3842    
3843     if ($open_elements->[-1]->[1] eq 'optgroup') {
3844     ## As if </optgroup>
3845     pop @$open_elements;
3846     }
3847    
3848     !!!insert-element ($token->{tag_name}, $token->{attributes});
3849     !!!next-token;
3850     redo B;
3851     } elsif ($token->{tag_name} eq 'select') {
3852     !!!parse-error;
3853     ## As if </select> instead
3854     ## have an element in table scope
3855     my $i;
3856     INSCOPE: for (reverse 0..$#$open_elements) {
3857     my $node = $open_elements->[$_];
3858     if ($node->[1] eq $token->{tag_name}) {
3859     $i = $_;
3860     last INSCOPE;
3861     } elsif ({
3862     table => 1, html => 1,
3863     }->{$node->[1]}) {
3864     last INSCOPE;
3865     }
3866     } # INSCOPE
3867     unless (defined $i) {
3868     !!!parse-error;
3869     ## Ignore the token
3870     !!!next-token;
3871     redo B;
3872     }
3873    
3874     splice @$open_elements, $i;
3875    
3876     $reset_insertion_mode->();
3877    
3878     !!!next-token;
3879     redo B;
3880     } else {
3881     #
3882     }
3883     } elsif ($token->{type} eq 'end tag') {
3884     if ($token->{tag_name} eq 'optgroup') {
3885     if ($open_elements->[-1]->[1] eq 'option' and
3886     $open_elements->[-2]->[1] eq 'optgroup') {
3887     ## As if </option>
3888     splice @$open_elements, -2;
3889     } elsif ($open_elements->[-1]->[1] eq 'optgroup') {
3890     pop @$open_elements;
3891     } else {
3892     !!!parse-error;
3893     ## Ignore the token
3894     }
3895     !!!next-token;
3896     redo B;
3897     } elsif ($token->{tag_name} eq 'option') {
3898     if ($open_elements->[-1]->[1] eq 'option') {
3899     pop @$open_elements;
3900     } else {
3901     !!!parse-error;
3902     ## Ignore the token
3903     }
3904     !!!next-token;
3905     redo B;
3906     } elsif ($token->{tag_name} eq 'select') {
3907     ## have an element in table scope
3908     my $i;
3909     INSCOPE: for (reverse 0..$#$open_elements) {
3910     my $node = $open_elements->[$_];
3911     if ($node->[1] eq $token->{tag_name}) {
3912     $i = $_;
3913     last INSCOPE;
3914     } elsif ({
3915     table => 1, html => 1,
3916     }->{$node->[1]}) {
3917     last INSCOPE;
3918     }
3919     } # INSCOPE
3920     unless (defined $i) {
3921     !!!parse-error;
3922     ## Ignore the token
3923     !!!next-token;
3924     redo B;
3925     }
3926    
3927     splice @$open_elements, $i;
3928    
3929     $reset_insertion_mode->();
3930    
3931     !!!next-token;
3932     redo B;
3933     } elsif ({
3934     caption => 1, table => 1, tbody => 1,
3935     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
3936     }->{$token->{tag_name}}) {
3937     !!!parse-error;
3938    
3939     ## have an element in table scope
3940     my $i;
3941     INSCOPE: for (reverse 0..$#$open_elements) {
3942     my $node = $open_elements->[$_];
3943     if ($node->[1] eq $token->{tag_name}) {
3944     $i = $_;
3945     last INSCOPE;
3946     } elsif ({
3947     table => 1, html => 1,
3948     }->{$node->[1]}) {
3949     last INSCOPE;
3950     }
3951     } # INSCOPE
3952     unless (defined $i) {
3953     ## Ignore the token
3954     !!!next-token;
3955     redo B;
3956     }
3957    
3958     ## As if </select>
3959     ## have an element in table scope
3960     undef $i;
3961     INSCOPE: for (reverse 0..$#$open_elements) {
3962     my $node = $open_elements->[$_];
3963     if ($node->[1] eq 'select') {
3964     $i = $_;
3965     last INSCOPE;
3966     } elsif ({
3967     table => 1, html => 1,
3968     }->{$node->[1]}) {
3969     last INSCOPE;
3970     }
3971     } # INSCOPE
3972     unless (defined $i) {
3973     !!!parse-error;
3974     ## Ignore the </select> token
3975     !!!next-token; ## TODO: ok?
3976     redo B;
3977     }
3978    
3979     splice @$open_elements, $i;
3980    
3981     $reset_insertion_mode->();
3982    
3983     ## reprocess
3984     redo B;
3985     } else {
3986     #
3987     }
3988     } else {
3989     #
3990     }
3991    
3992     !!!parse-error;
3993     ## Ignore the token
3994     redo B;
3995     } elsif ($insertion_mode eq 'after body') {
3996     if ($token->{type} eq 'character') {
3997     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3998     ## As if in body
3999     $reconstruct_active_formatting_elements->();
4000    
4001     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4002    
4003     unless (length $token->{data}) {
4004     !!!next-token;
4005     redo B;
4006     }
4007     }
4008    
4009     #
4010     } elsif ($token->{type} eq 'comment') {
4011     my $comment = $self->{document}->create_comment ($token->{data});
4012     $open_elements->[0]->[0]->append_child ($comment);
4013     !!!next-token;
4014     redo B;
4015     } elsif ($token->{type} eq 'end tag') {
4016     if ($token->{type} eq 'html') {
4017     ## TODO: if inner_html, parse-error, ignore the token; otherwise,
4018    
4019     $phase = 'trailing end';
4020     !!!next-token;
4021     redo B;
4022     } else {
4023     #
4024     }
4025     } else {
4026     #
4027     }
4028    
4029     !!!parse-error;
4030     $insertion_mode = 'in body';
4031     ## reprocess
4032     redo B;
4033     } elsif ($insertion_mode eq 'in frameset') {
4034     if ($token->{type} eq 'character') {
4035     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4036     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4037    
4038     unless (length $token->{data}) {
4039     !!!next-token;
4040     redo B;
4041     }
4042     }
4043    
4044     #
4045     } elsif ($token->{type} eq 'comment') {
4046     my $comment = $self->{document}->create_comment ($token->{data});
4047     $open_elements->[-1]->[0]->append_child ($comment);
4048     !!!next-token;
4049     redo B;
4050     } elsif ($token->{type} eq 'start tag') {
4051     if ($token->{tag_name} eq 'frameset') {
4052     !!!insert-element ($token->{tag_name}, $token->{attributes});
4053     !!!next-token;
4054     redo B;
4055     } elsif ($token->{tag_name} eq 'frame') {
4056     !!!insert-element ($token->{tag_name}, $token->{attributes});
4057     pop @$open_elements;
4058     !!!next-token;
4059     redo B;
4060     } elsif ($token->{tag_name} eq 'noframes') {
4061     $in_body->(sub {
4062     $open_elements->[-1]->[0]->append_child (shift);
4063     });
4064     redo B;
4065     } else {
4066     #
4067     }
4068     } elsif ($token->{type} eq 'end tag') {
4069     if ($token->{tag_name} eq 'frameset') {
4070     if ($open_elements->[-1]->[1] eq 'html' and
4071     @$open_elements == 1) {
4072     !!!parse-error;
4073     ## Ignore the token
4074     !!!next-token;
4075     } else {
4076     pop @$open_elements;
4077     !!!next-token;
4078     }
4079    
4080     ## if not inner_html and
4081     if ($open_elements->[-1]->[1] ne 'frameset') {
4082     $insertion_mode = 'after frameset';
4083     }
4084     redo B;
4085     } else {
4086     #
4087     }
4088     } else {
4089     #
4090     }
4091    
4092     !!!parse-error;
4093     ## Ignore the token
4094     !!!next-token;
4095     redo B;
4096     } elsif ($insertion_mode eq 'after frameset') {
4097     if ($token->{type} eq 'character') {
4098     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4099     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4100    
4101     unless (length $token->{data}) {
4102     !!!next-token;
4103     redo B;
4104     }
4105     }
4106    
4107     #
4108     } elsif ($token->{type} eq 'comment') {
4109     my $comment = $self->{document}->create_comment ($token->{data});
4110     $open_elements->[-1]->[0]->append_child ($comment);
4111     !!!next-token;
4112     redo B;
4113     } elsif ($token->{type} eq 'start tag') {
4114     if ($token->{tag_name} eq 'noframes') {
4115     $in_body->(sub {
4116     $open_elements->[-1]->[0]->append_child (shift);
4117     });
4118     redo B;
4119     } else {
4120     #
4121     }
4122     } elsif ($token->{type} eq 'end tag') {
4123     if ($token->{tag_name} eq 'html') {
4124     $phase = 'trailing end';
4125     !!!next-token;
4126     redo B;
4127     } else {
4128     #
4129     }
4130     } else {
4131     #
4132     }
4133    
4134     !!!parse-error;
4135     ## Ignore the token
4136     !!!next-token;
4137     redo B;
4138    
4139     ## ISSUE: An issue in spec there
4140     } else {
4141     die "$0: $insertion_mode: Unknown insertion mode";
4142     }
4143     }
4144     } elsif ($phase eq 'trailing end') {
4145     ## states in the main stage is preserved yet # MUST
4146    
4147     if ($token->{type} eq 'DOCTYPE') {
4148     !!!parse-error;
4149     ## Ignore the token
4150     !!!next-token;
4151     redo B;
4152     } elsif ($token->{type} eq 'comment') {
4153     my $comment = $self->{document}->create_comment ($token->{data});
4154     $self->{document}->append_child ($comment);
4155     !!!next-token;
4156     redo B;
4157     } elsif ($token->{type} eq 'character') {
4158     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4159     ## As if in the main phase.
4160     ## NOTE: The insertion mode in the main phase
4161     ## just before the phase has been changed to the trailing
4162     ## end phase is either "after body" or "after frameset".
4163     $reconstruct_active_formatting_elements->()
4164     if $phase eq 'main';
4165    
4166     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4167    
4168     unless (length $token->{data}) {
4169     !!!next-token;
4170     redo B;
4171     }
4172     }
4173    
4174     !!!parse-error;
4175     $phase = 'main';
4176     ## reprocess
4177     redo B;
4178     } elsif ($token->{type} eq 'start tag' or
4179     $token->{type} eq 'end tag') {
4180     !!!parse-error;
4181     $phase = 'main';
4182     ## reprocess
4183     redo B;
4184     } elsif ($token->{type} eq 'end-of-file') {
4185     ## Stop parsing
4186     last B;
4187     } else {
4188     die "$0: $token->{type}: Unknown token";
4189     }
4190     }
4191     } # B
4192    
4193     ## Stop parsing # MUST
4194    
4195     ## TODO: script stuffs
4196     } # _construct_tree
4197    
4198     sub inner_html ($$$) {
4199     my ($class, $node, $on_error) = @_;
4200    
4201     ## Step 1
4202     my $s = '';
4203    
4204     my $in_cdata;
4205     my $parent = $node;
4206     while (defined $parent) {
4207     if ($parent->node_type == 1 and
4208     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
4209     {
4210     style => 1, script => 1, xmp => 1, iframe => 1,
4211     noembed => 1, noframes => 1, noscript => 1,
4212     }->{$parent->local_name}) { ## TODO: case thingy
4213     $in_cdata = 1;
4214     }
4215     $parent = $parent->parent_node;
4216     }
4217    
4218     ## Step 2
4219     my @node = @{$node->child_nodes};
4220     C: while (@node) {
4221     my $child = shift @node;
4222     unless (ref $child) {
4223     if ($child eq 'cdata-out') {
4224     $in_cdata = 0;
4225     } else {
4226     $s .= $child; # end tag
4227     }
4228     next C;
4229     }
4230    
4231     my $nt = $child->node_type;
4232     if ($nt == 1) { # Element
4233     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
4234     $s .= '<' . $tag_name;
4235    
4236     ## ISSUE: Non-html elements
4237    
4238     my @attrs = @{$child->attributes}; # sort order MUST be stable
4239     for my $attr (@attrs) { # order is implementation dependent
4240     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
4241     $s .= ' ' . $attr_name . '="';
4242     my $attr_value = $attr->value;
4243     ## escape
4244     $attr_value =~ s/&/&amp;/g;
4245     $attr_value =~ s/</&lt;/g;
4246     $attr_value =~ s/>/&gt;/g;
4247     $attr_value =~ s/"/&quot;/g;
4248     $s .= $attr_value . '"';
4249     }
4250     $s .= '>';
4251    
4252     next C if {
4253     area => 1, base => 1, basefont => 1, bgsound => 1,
4254     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
4255     img => 1, input => 1, link => 1, meta => 1, param => 1,
4256     spacer => 1, wbr => 1,
4257     }->{$tag_name};
4258    
4259     if (not $in_cdata and {
4260     style => 1, script => 1, xmp => 1, iframe => 1,
4261     noembed => 1, noframes => 1, noscript => 1,
4262     }->{$tag_name}) {
4263     unshift @node, 'cdata-out';
4264     $in_cdata = 1;
4265     }
4266    
4267     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
4268     } elsif ($nt == 3 or $nt == 4) {
4269     if ($in_cdata) {
4270     $s .= $child->data;
4271     } else {
4272     my $value = $child->data;
4273     $value =~ s/&/&amp;/g;
4274     $value =~ s/</&lt;/g;
4275     $value =~ s/>/&gt;/g;
4276     $value =~ s/"/&quot;/g;
4277     $s .= $value;
4278     }
4279     } elsif ($nt == 8) {
4280     $s .= '<!--' . $child->data . '-->';
4281     } elsif ($nt == 10) {
4282     $s .= '<!DOCTYPE ' . $child->name . '>';
4283     } elsif ($nt == 5) { # entrefs
4284     push @node, @{$child->child_nodes};
4285     } else {
4286     $on_error->($child);
4287     }
4288     } # C
4289    
4290     ## Step 3
4291     return \$s;
4292     } # inner_html
4293    
4294 wakaba 1.1 1;
4295 wakaba 1.3 # $Date: 2007/04/30 07:41:50 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24