/[suikacvs]/markup/html/whatpm/What/HTML.pm.src
Suika

Contents of /markup/html/whatpm/What/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download) (as text)
Mon Apr 30 07:41:50 2007 UTC (17 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +3121 -27 lines
File MIME type: application/x-wais-source
++ whatpm/What/ChangeLog	30 Apr 2007 07:41:35 -0000
2007-04-30  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src: The tree construction stage is implemented.

	* mkhtmlparser.pl: New macros are added.

1 wakaba 1.1 package What::HTML;
2     use strict;
3 wakaba 1.2 our $VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is a very, very early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21 wakaba 1.2 my $special_category = {
22     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
23     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
24     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
25     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
26     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
27     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
28     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
29     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
30     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
31     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
32     };
33     my $scoping_category = {
34     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
35     table => 1, td => 1, th => 1,
36     };
37     my $formatting_category = {
38     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
39     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
40     };
41     # $phrasing_category: all other elements
42    
43 wakaba 1.1 sub new ($) {
44     my $class = shift;
45     my $self = bless {}, $class;
46     $self->{set_next_input_character} = sub {
47     $self->{next_input_character} = -1;
48     };
49     $self->{parse_error} = sub {
50     #
51     };
52     return $self;
53     } # new
54    
55     ## Implementations MUST act as if state machine in the spec
56    
57     sub _initialize_tokenizer ($) {
58     my $self = shift;
59     $self->{state} = 'data'; # MUST
60     $self->{content_model_flag} = 'PCDATA'; # be
61     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
62     undef $self->{current_attribute};
63     undef $self->{last_emitted_start_tag_name};
64     undef $self->{last_attribute_value_state};
65     $self->{char} = [];
66     # $self->{next_input_character}
67     !!!next-input-character;
68     $self->{token} = [];
69     } # _initialize_tokenizer
70    
71     ## A token has:
72     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
73     ## 'character', or 'end-of-file'
74     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
75     ## ISSUE: the spec need s/tagname/tag name/
76     ## ->{error} == 1 or 0 (DOCTYPE)
77     ## ->{attributes} isa HASH (start tag, end tag)
78     ## ->{data} (comment, character)
79    
80     ## Macros
81     ## Macros MUST be preceded by three EXCLAMATION MARKs.
82     ## emit ($token)
83     ## Emits the specified token.
84    
85     ## Emitted token MUST immediately be handled by the tree construction state.
86    
87     ## Before each step, UA MAY check to see if either one of the scripts in
88     ## "list of scripts that will execute as soon as possible" or the first
89     ## script in the "list of scripts that will execute asynchronously",
90     ## has completed loading. If one has, then it MUST be executed
91     ## and removed from the list.
92    
93     sub _get_next_token ($) {
94     my $self = shift;
95     if (@{$self->{token}}) {
96     return shift @{$self->{token}};
97     }
98    
99     A: {
100     if ($self->{state} eq 'data') {
101     if ($self->{next_input_character} == 0x0026) { # &
102     if ($self->{content_model_flag} eq 'PCDATA' or
103     $self->{content_model_flag} eq 'RCDATA') {
104     $self->{state} = 'entity data';
105     !!!next-input-character;
106     redo A;
107     } else {
108     #
109     }
110     } elsif ($self->{next_input_character} == 0x003C) { # <
111     if ($self->{content_model_flag} ne 'PLAINTEXT') {
112     $self->{state} = 'tag open';
113     !!!next-input-character;
114     redo A;
115     } else {
116     #
117     }
118     } elsif ($self->{next_input_character} == -1) {
119     !!!emit ({type => 'end-of-file'});
120     last A; ## TODO: ok?
121     }
122     # Anything else
123     my $token = {type => 'character',
124     data => chr $self->{next_input_character}};
125     ## Stay in the data state
126     !!!next-input-character;
127    
128     !!!emit ($token);
129    
130     redo A;
131     } elsif ($self->{state} eq 'entity data') {
132     ## (cannot happen in CDATA state)
133    
134     my $token = $self->_tokenize_attempt_to_consume_an_entity;
135    
136     $self->{state} = 'data';
137     # next-input-character is already done
138    
139     unless (defined $token) {
140     !!!emit ({type => 'character', data => '&'});
141     } else {
142     !!!emit ($token);
143     }
144    
145     redo A;
146     } elsif ($self->{state} eq 'tag open') {
147     if ($self->{content_model_flag} eq 'RCDATA' or
148     $self->{content_model_flag} eq 'CDATA') {
149     if ($self->{next_input_character} == 0x002F) { # /
150     !!!next-input-character;
151     $self->{state} = 'close tag open';
152     redo A;
153     } else {
154     ## reconsume
155     $self->{state} = 'data';
156    
157     !!!emit (type => 'character', data => {'/'});
158    
159     redo A;
160     }
161     } elsif ($self->{content_model_flag} eq 'PCDATA') {
162     if ($self->{next_input_character} == 0x0021) { # !
163     $self->{state} = 'markup declaration open';
164     !!!next-input-character;
165     redo A;
166     } elsif ($self->{next_input_character} == 0x002F) { # /
167     $self->{state} = 'close tag open';
168     !!!next-input-character;
169     redo A;
170     } elsif (0x0041 <= $self->{next_input_character} and
171     $self->{next_input_character} <= 0x005A) { # A..Z
172     $self->{current_token}
173     = {type => 'start tag',
174     tag_name => chr ($self->{next_input_character} + 0x0020)};
175     $self->{state} = 'tag name';
176     !!!next-input-character;
177     redo A;
178     } elsif (0x0061 <= $self->{next_input_character} and
179     $self->{next_input_character} <= 0x007A) { # a..z
180     $self->{current_token} = {type => 'start tag',
181     tag_name => chr ($self->{next_input_character})};
182     $self->{state} = 'tag name';
183     !!!next-input-character;
184     redo A;
185     } elsif ($self->{next_input_character} == 0x003E) { # >
186     !!!parse-error;
187     $self->{state} = 'data';
188     !!!next-input-character;
189    
190     !!!emit ({type => 'character', data => '>'});
191    
192     redo A;
193     } elsif ($self->{next_input_character} == 0x003F) { # ?
194     !!!parse-error;
195     $self->{state} = 'bogus comment';
196     ## $self->{next_input_character} is intentionally left as is
197     redo A;
198     } else {
199     !!!parse-error;
200     $self->{state} = 'data';
201     ## reconsume
202    
203     !!!emit ({type => 'character', data => '<'});
204    
205     redo A;
206     }
207     } else {
208     die "$0: $self->{content_model_flag}: Unknown content model flag";
209     }
210     } elsif ($self->{state} eq 'close tag open') {
211     if ($self->{content_model_flag} eq 'RCDATA' or
212     $self->{content_model_flag} eq 'CDATA') {
213     my @next_char;
214     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
215     push @next_char, $self->{next_input_character};
216     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
217     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
218     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
219     !!!next-input-character;
220     next TAGNAME;
221     } else {
222     !!!parse-error;
223     $self->{next_input_character} = shift @next_char; # reconsume
224     !!!back-next-input-character (@next_char);
225     $self->{state} = 'data';
226    
227     !!!emit ({type => 'character', data => '</'});
228    
229     redo A;
230     }
231     }
232 wakaba 1.2 push @next_char, $self->{next_input_character};
233 wakaba 1.1
234 wakaba 1.2 unless ($self->{next_input_character} == 0x0009 or # HT
235     $self->{next_input_character} == 0x000A or # LF
236     $self->{next_input_character} == 0x000B or # VT
237     $self->{next_input_character} == 0x000C or # FF
238     $self->{next_input_character} == 0x0020 or # SP
239     $self->{next_input_character} == 0x003E or # >
240     $self->{next_input_character} == 0x002F or # /
241     $self->{next_input_character} == 0x003C or # <
242 wakaba 1.1 $self->{next_input_character} == -1) {
243     !!!parse-error;
244     $self->{next_input_character} = shift @next_char; # reconsume
245     !!!back-next-input-character (@next_char);
246     $self->{state} = 'data';
247    
248     !!!emit ({type => 'character', data => '</'});
249    
250     redo A;
251     } else {
252     $self->{next_input_character} = shift @next_char;
253     !!!back-next-input-character (@next_char);
254     # and consume...
255     }
256     }
257    
258     if (0x0041 <= $self->{next_input_character} and
259     $self->{next_input_character} <= 0x005A) { # A..Z
260     $self->{current_token} = {type => 'end tag',
261     tag_name => chr ($self->{next_input_character} + 0x0020)};
262     $self->{state} = 'tag name';
263     !!!next-input-character;
264     redo A;
265     } elsif (0x0061 <= $self->{next_input_character} and
266     $self->{next_input_character} <= 0x007A) { # a..z
267     $self->{current_token} = {type => 'end tag',
268     tag_name => chr ($self->{next_input_character})};
269     $self->{state} = 'tag name';
270     !!!next-input-character;
271     redo A;
272     } elsif ($self->{next_input_character} == 0x003E) { # >
273     !!!parse-error;
274     $self->{state} = 'data';
275     !!!next-input-character;
276     redo A;
277     } elsif ($self->{next_input_character} == -1) {
278     !!!parse-error;
279     $self->{state} = 'data';
280     # reconsume
281    
282     !!!emit ({type => 'character', data => '</'});
283    
284     redo A;
285     } else {
286     !!!parse-error;
287     $self->{state} = 'bogus comment';
288     ## $self->{next_input_character} is intentionally left as is
289     redo A;
290     }
291     } elsif ($self->{state} eq 'tag name') {
292     if ($self->{next_input_character} == 0x0009 or # HT
293     $self->{next_input_character} == 0x000A or # LF
294     $self->{next_input_character} == 0x000B or # VT
295     $self->{next_input_character} == 0x000C or # FF
296     $self->{next_input_character} == 0x0020) { # SP
297     $self->{state} = 'before attribute name';
298     !!!next-input-character;
299     redo A;
300     } elsif ($self->{next_input_character} == 0x003E) { # >
301     if ($self->{current_token}->{type} eq 'start tag') {
302     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
303     } elsif ($self->{current_token}->{type} eq 'end tag') {
304     $self->{content_model_flag} = 'PCDATA'; # MUST
305 wakaba 1.2 if ($self->{current_token}->{attributes}) {
306 wakaba 1.1 !!!parse-error;
307     }
308     } else {
309     die "$0: $self->{current_token}->{type}: Unknown token type";
310     }
311     $self->{state} = 'data';
312     !!!next-input-character;
313    
314     !!!emit ($self->{current_token}); # start tag or end tag
315     undef $self->{current_token};
316    
317     redo A;
318     } elsif (0x0041 <= $self->{next_input_character} and
319     $self->{next_input_character} <= 0x005A) { # A..Z
320     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
321     # start tag or end tag
322     ## Stay in this state
323     !!!next-input-character;
324     redo A;
325     } elsif ($self->{next_input_character} == 0x003C or # <
326     $self->{next_input_character} == -1) {
327     !!!parse-error;
328     if ($self->{current_token}->{type} eq 'start tag') {
329     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
330     } elsif ($self->{current_token}->{type} eq 'end tag') {
331     $self->{content_model_flag} = 'PCDATA'; # MUST
332 wakaba 1.2 if ($self->{current_token}->{attributes}) {
333 wakaba 1.1 !!!parse-error;
334     }
335     } else {
336     die "$0: $self->{current_token}->{type}: Unknown token type";
337     }
338     $self->{state} = 'data';
339     # reconsume
340    
341     !!!emit ($self->{current_token}); # start tag or end tag
342     undef $self->{current_token};
343    
344     redo A;
345     } elsif ($self->{next_input_character} == 0x002F) { # /
346     !!!next-input-character;
347     if ($self->{next_input_character} == 0x003E and # >
348     $self->{current_token}->{type} eq 'start tag' and
349     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
350     # permitted slash
351     #
352     } else {
353     !!!parse-error;
354     }
355     $self->{state} = 'before attribute name';
356     # next-input-character is already done
357     redo A;
358     } else {
359     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
360     # start tag or end tag
361     ## Stay in the state
362     !!!next-input-character;
363     redo A;
364     }
365     } elsif ($self->{state} eq 'before attribute name') {
366     if ($self->{next_input_character} == 0x0009 or # HT
367     $self->{next_input_character} == 0x000A or # LF
368     $self->{next_input_character} == 0x000B or # VT
369     $self->{next_input_character} == 0x000C or # FF
370     $self->{next_input_character} == 0x0020) { # SP
371     ## Stay in the state
372     !!!next-input-character;
373     redo A;
374     } elsif ($self->{next_input_character} == 0x003E) { # >
375     if ($self->{current_token}->{type} eq 'start tag') {
376     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
377     } elsif ($self->{current_token}->{type} eq 'end tag') {
378     $self->{content_model_flag} = 'PCDATA'; # MUST
379 wakaba 1.2 if ($self->{current_token}->{attributes}) {
380 wakaba 1.1 !!!parse-error;
381     }
382     } else {
383     die "$0: $self->{current_token}->{type}: Unknown token type";
384     }
385     $self->{state} = 'data';
386     !!!next-input-character;
387    
388     !!!emit ($self->{current_token}); # start tag or end tag
389     undef $self->{current_token};
390    
391     redo A;
392     } elsif (0x0041 <= $self->{next_input_character} and
393     $self->{next_input_character} <= 0x005A) { # A..Z
394     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
395     value => ''};
396     $self->{state} = 'attribute name';
397     !!!next-input-character;
398     redo A;
399     } elsif ($self->{next_input_character} == 0x002F) { # /
400     !!!next-input-character;
401     if ($self->{next_input_character} == 0x003E and # >
402     $self->{current_token}->{type} eq 'start tag' and
403     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
404     # permitted slash
405     #
406     } else {
407     !!!parse-error;
408     }
409     ## Stay in the state
410     # next-input-character is already done
411     redo A;
412     } elsif ($self->{next_input_character} == 0x003C or # <
413     $self->{next_input_character} == -1) {
414     !!!parse-error;
415     if ($self->{current_token}->{type} eq 'start tag') {
416     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
417     } elsif ($self->{current_token}->{type} eq 'end tag') {
418     $self->{content_model_flag} = 'PCDATA'; # MUST
419 wakaba 1.2 if ($self->{current_token}->{attributes}) {
420 wakaba 1.1 !!!parse-error;
421     }
422     } else {
423     die "$0: $self->{current_token}->{type}: Unknown token type";
424     }
425     $self->{state} = 'data';
426     # reconsume
427    
428     !!!emit ($self->{current_token}); # start tag or end tag
429     undef $self->{current_token};
430    
431     redo A;
432     } else {
433     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
434     value => ''};
435     $self->{state} = 'attribute name';
436     !!!next-input-character;
437     redo A;
438     }
439     } elsif ($self->{state} eq 'attribute name') {
440     my $before_leave = sub {
441 wakaba 1.2 if (exists $self->{current_token}->{attributes} # start tag or end tag
442 wakaba 1.1 ->{$self->{current_attribute}->{name}}) { # MUST
443     !!!parse-error;
444     ## Discard $self->{current_attribute} # MUST
445     } else {
446 wakaba 1.2 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
447 wakaba 1.1 = $self->{current_attribute};
448     }
449     }; # $before_leave
450    
451     if ($self->{next_input_character} == 0x0009 or # HT
452     $self->{next_input_character} == 0x000A or # LF
453     $self->{next_input_character} == 0x000B or # VT
454     $self->{next_input_character} == 0x000C or # FF
455     $self->{next_input_character} == 0x0020) { # SP
456     $before_leave->();
457     $self->{state} = 'after attribute name';
458     !!!next-input-character;
459     redo A;
460     } elsif ($self->{next_input_character} == 0x003D) { # =
461     $before_leave->();
462     $self->{state} = 'before attribute value';
463     !!!next-input-character;
464     redo A;
465     } elsif ($self->{next_input_character} == 0x003E) { # >
466     $before_leave->();
467     if ($self->{current_token}->{type} eq 'start tag') {
468     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
469     } elsif ($self->{current_token}->{type} eq 'end tag') {
470     $self->{content_model_flag} = 'PCDATA'; # MUST
471 wakaba 1.2 if ($self->{current_token}->{attributes}) {
472 wakaba 1.1 !!!parse-error;
473     }
474     } else {
475     die "$0: $self->{current_token}->{type}: Unknown token type";
476     }
477     $self->{state} = 'data';
478     !!!next-input-character;
479    
480     !!!emit ($self->{current_token}); # start tag or end tag
481     undef $self->{current_token};
482    
483     redo A;
484     } elsif (0x0041 <= $self->{next_input_character} and
485     $self->{next_input_character} <= 0x005A) { # A..Z
486     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
487     ## Stay in the state
488     !!!next-input-character;
489     redo A;
490     } elsif ($self->{next_input_character} == 0x002F) { # /
491     $before_leave->();
492     !!!next-input-character;
493     if ($self->{next_input_character} == 0x003E and # >
494     $self->{current_token}->{type} eq 'start tag' and
495     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
496     # permitted slash
497     #
498     } else {
499     !!!parse-error;
500     }
501     $self->{state} = 'before attribute name';
502     # next-input-character is already done
503     redo A;
504     } elsif ($self->{next_input_character} == 0x003C or # <
505     $self->{next_input_character} == -1) {
506     !!!parse-error;
507     $before_leave->();
508     if ($self->{current_token}->{type} eq 'start tag') {
509     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
510     } elsif ($self->{current_token}->{type} eq 'end tag') {
511     $self->{content_model_flag} = 'PCDATA'; # MUST
512 wakaba 1.2 if ($self->{current_token}->{attributes}) {
513 wakaba 1.1 !!!parse-error;
514     }
515     } else {
516     die "$0: $self->{current_token}->{type}: Unknown token type";
517     }
518     $self->{state} = 'data';
519     # reconsume
520    
521     !!!emit ($self->{current_token}); # start tag or end tag
522     undef $self->{current_token};
523    
524     redo A;
525     } else {
526     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
527     ## Stay in the state
528     !!!next-input-character;
529     redo A;
530     }
531     } elsif ($self->{state} eq 'after attribute name') {
532     if ($self->{next_input_character} == 0x0009 or # HT
533     $self->{next_input_character} == 0x000A or # LF
534     $self->{next_input_character} == 0x000B or # VT
535     $self->{next_input_character} == 0x000C or # FF
536     $self->{next_input_character} == 0x0020) { # SP
537     ## Stay in the state
538     !!!next-input-character;
539     redo A;
540     } elsif ($self->{next_input_character} == 0x003D) { # =
541     $self->{state} = 'before attribute value';
542     !!!next-input-character;
543     redo A;
544     } elsif ($self->{next_input_character} == 0x003E) { # >
545     if ($self->{current_token}->{type} eq 'start tag') {
546     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
547     } elsif ($self->{current_token}->{type} eq 'end tag') {
548     $self->{content_model_flag} = 'PCDATA'; # MUST
549 wakaba 1.2 if ($self->{current_token}->{attributes}) {
550 wakaba 1.1 !!!parse-error;
551     }
552     } else {
553     die "$0: $self->{current_token}->{type}: Unknown token type";
554     }
555     $self->{state} = 'data';
556     !!!next-input-character;
557    
558     !!!emit ($self->{current_token}); # start tag or end tag
559     undef $self->{current_token};
560    
561     redo A;
562     } elsif (0x0041 <= $self->{next_input_character} and
563     $self->{next_input_character} <= 0x005A) { # A..Z
564     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
565     value => ''};
566     $self->{state} = 'attribute name';
567     !!!next-input-character;
568     redo A;
569     } elsif ($self->{next_input_character} == 0x002F) { # /
570     !!!next-input-character;
571     if ($self->{next_input_character} == 0x003E and # >
572     $self->{current_token}->{type} eq 'start tag' and
573     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
574     # permitted slash
575     #
576     } else {
577     !!!parse-error;
578     }
579     $self->{state} = 'before attribute name';
580     # next-input-character is already done
581     redo A;
582     } elsif ($self->{next_input_character} == 0x003C or # <
583     $self->{next_input_character} == -1) {
584     !!!parse-error;
585     if ($self->{current_token}->{type} eq 'start tag') {
586     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
587     } elsif ($self->{current_token}->{type} eq 'end tag') {
588     $self->{content_model_flag} = 'PCDATA'; # MUST
589 wakaba 1.2 if ($self->{current_token}->{attributes}) {
590 wakaba 1.1 !!!parse-error;
591     }
592     } else {
593     die "$0: $self->{current_token}->{type}: Unknown token type";
594     }
595     $self->{state} = 'data';
596     # reconsume
597    
598     !!!emit ($self->{current_token}); # start tag or end tag
599     undef $self->{current_token};
600    
601     redo A;
602     } else {
603     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
604     value => ''};
605     $self->{state} = 'attribute name';
606     !!!next-input-character;
607     redo A;
608     }
609     } elsif ($self->{state} eq 'before attribute value') {
610     if ($self->{next_input_character} == 0x0009 or # HT
611     $self->{next_input_character} == 0x000A or # LF
612     $self->{next_input_character} == 0x000B or # VT
613     $self->{next_input_character} == 0x000C or # FF
614     $self->{next_input_character} == 0x0020) { # SP
615     ## Stay in the state
616     !!!next-input-character;
617     redo A;
618     } elsif ($self->{next_input_character} == 0x0022) { # "
619     $self->{state} = 'attribute value (double-quoted)';
620     !!!next-input-character;
621     redo A;
622     } elsif ($self->{next_input_character} == 0x0026) { # &
623     $self->{state} = 'attribute value (unquoted)';
624     ## reconsume
625     redo A;
626     } elsif ($self->{next_input_character} == 0x0027) { # '
627     $self->{state} = 'attribute value (single-quoted)';
628     !!!next-input-character;
629     redo A;
630     } elsif ($self->{next_input_character} == 0x003E) { # >
631     if ($self->{current_token}->{type} eq 'start tag') {
632     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
633     } elsif ($self->{current_token}->{type} eq 'end tag') {
634     $self->{content_model_flag} = 'PCDATA'; # MUST
635 wakaba 1.2 if ($self->{current_token}->{attributes}) {
636 wakaba 1.1 !!!parse-error;
637     }
638     } else {
639     die "$0: $self->{current_token}->{type}: Unknown token type";
640     }
641     $self->{state} = 'data';
642     !!!next-input-character;
643    
644     !!!emit ($self->{current_token}); # start tag or end tag
645     undef $self->{current_token};
646    
647     redo A;
648     } elsif ($self->{next_input_character} == 0x003C or # <
649     $self->{next_input_character} == -1) {
650     !!!parse-error;
651     if ($self->{current_token}->{type} eq 'start tag') {
652     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
653     } elsif ($self->{current_token}->{type} eq 'end tag') {
654     $self->{content_model_flag} = 'PCDATA'; # MUST
655 wakaba 1.2 if ($self->{current_token}->{attributes}) {
656 wakaba 1.1 !!!parse-error;
657     }
658     } else {
659     die "$0: $self->{current_token}->{type}: Unknown token type";
660     }
661     $self->{state} = 'data';
662     ## reconsume
663    
664     !!!emit ($self->{current_token}); # start tag or end tag
665     undef $self->{current_token};
666    
667     redo A;
668     } else {
669     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
670     $self->{state} = 'attribute value (unquoted)';
671     !!!next-input-character;
672     redo A;
673     }
674     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
675     if ($self->{next_input_character} == 0x0022) { # "
676     $self->{state} = 'before attribute name';
677     !!!next-input-character;
678     redo A;
679     } elsif ($self->{next_input_character} == 0x0026) { # &
680     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
681     $self->{state} = 'entity in attribute value';
682     !!!next-input-character;
683     redo A;
684     } elsif ($self->{next_input_character} == -1) {
685     !!!parse-error;
686     if ($self->{current_token}->{type} eq 'start tag') {
687     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
688     } elsif ($self->{current_token}->{type} eq 'end tag') {
689     $self->{content_model_flag} = 'PCDATA'; # MUST
690 wakaba 1.2 if ($self->{current_token}->{attributes}) {
691 wakaba 1.1 !!!parse-error;
692     }
693     } else {
694     die "$0: $self->{current_token}->{type}: Unknown token type";
695     }
696     $self->{state} = 'data';
697     ## reconsume
698    
699     !!!emit ($self->{current_token}); # start tag or end tag
700     undef $self->{current_token};
701    
702     redo A;
703     } else {
704     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
705     ## Stay in the state
706     !!!next-input-character;
707     redo A;
708     }
709     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
710     if ($self->{next_input_character} == 0x0027) { # '
711     $self->{state} = 'before attribute name';
712     !!!next-input-character;
713     redo A;
714     } elsif ($self->{next_input_character} == 0x0026) { # &
715     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
716     $self->{state} = 'entity in attribute value';
717     !!!next-input-character;
718     redo A;
719     } elsif ($self->{next_input_character} == -1) {
720     !!!parse-error;
721     if ($self->{current_token}->{type} eq 'start tag') {
722     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
723     } elsif ($self->{current_token}->{type} eq 'end tag') {
724     $self->{content_model_flag} = 'PCDATA'; # MUST
725 wakaba 1.2 if ($self->{current_token}->{attributes}) {
726 wakaba 1.1 !!!parse-error;
727     }
728     } else {
729     die "$0: $self->{current_token}->{type}: Unknown token type";
730     }
731     $self->{state} = 'data';
732     ## reconsume
733    
734     !!!emit ($self->{current_token}); # start tag or end tag
735     undef $self->{current_token};
736    
737     redo A;
738     } else {
739     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
740     ## Stay in the state
741     !!!next-input-character;
742     redo A;
743     }
744     } elsif ($self->{state} eq 'attribute value (unquoted)') {
745     if ($self->{next_input_character} == 0x0009 or # HT
746     $self->{next_input_character} == 0x000A or # LF
747     $self->{next_input_character} == 0x000B or # HT
748     $self->{next_input_character} == 0x000C or # FF
749     $self->{next_input_character} == 0x0020) { # SP
750     $self->{state} = 'before attribute name';
751     !!!next-input-character;
752     redo A;
753     } elsif ($self->{next_input_character} == 0x0026) { # &
754     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
755     $self->{state} = 'entity in attribute value';
756     !!!next-input-character;
757     redo A;
758     } elsif ($self->{next_input_character} == 0x003E) { # >
759     if ($self->{current_token}->{type} eq 'start tag') {
760     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
761     } elsif ($self->{current_token}->{type} eq 'end tag') {
762     $self->{content_model_flag} = 'PCDATA'; # MUST
763 wakaba 1.2 if ($self->{current_token}->{attributes}) {
764 wakaba 1.1 !!!parse-error;
765     }
766     } else {
767     die "$0: $self->{current_token}->{type}: Unknown token type";
768     }
769     $self->{state} = 'data';
770     !!!next-input-character;
771    
772     !!!emit ($self->{current_token}); # start tag or end tag
773     undef $self->{current_token};
774    
775     redo A;
776     } elsif ($self->{next_input_character} == 0x003C or # <
777     $self->{next_input_character} == -1) {
778     !!!parse-error;
779     if ($self->{current_token}->{type} eq 'start tag') {
780     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
781     } elsif ($self->{current_token}->{type} eq 'end tag') {
782     $self->{content_model_flag} = 'PCDATA'; # MUST
783 wakaba 1.2 if ($self->{current_token}->{attributes}) {
784 wakaba 1.1 !!!parse-error;
785     }
786     } else {
787     die "$0: $self->{current_token}->{type}: Unknown token type";
788     }
789     $self->{state} = 'data';
790     ## reconsume
791    
792     !!!emit ($self->{current_token}); # start tag or end tag
793     undef $self->{current_token};
794    
795     redo A;
796     } else {
797     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
798     ## Stay in the state
799     !!!next-input-character;
800     redo A;
801     }
802     } elsif ($self->{state} eq 'entity in attribute value') {
803     my $token = $self->_tokenize_attempt_to_consume_an_entity;
804    
805     unless (defined $token) {
806     $self->{current_attribute}->{value} .= '&';
807     } else {
808     $self->{current_attribute}->{value} .= $token->{data};
809     ## ISSUE: spec says "append the returned character token to the current attribute's value"
810     }
811    
812     $self->{state} = $self->{last_attribute_value_state};
813     # next-input-character is already done
814     redo A;
815     } elsif ($self->{state} eq 'bogus comment') {
816     ## (only happen if PCDATA state)
817    
818     my $token = {type => 'comment', data => ''};
819    
820     BC: {
821     if ($self->{next_input_character} == 0x003E) { # >
822     $self->{state} = 'data';
823     !!!next-input-character;
824    
825     !!!emit ($token);
826    
827     redo A;
828     } elsif ($self->{next_input_character} == -1) {
829     $self->{state} = 'data';
830     ## reconsume
831    
832     !!!emit ($token);
833    
834     redo A;
835     } else {
836     $token->{data} .= chr ($self->{next_input_character});
837     !!!next-input-character;
838     redo BC;
839     }
840     } # BC
841     } elsif ($self->{state} eq 'markup declaration open') {
842     ## (only happen if PCDATA state)
843    
844     my @next_char;
845     push @next_char, $self->{next_input_character};
846    
847     if ($self->{next_input_character} == 0x002D) { # -
848     !!!next-input-character;
849     push @next_char, $self->{next_input_character};
850     if ($self->{next_input_character} == 0x002D) { # -
851     $self->{current_token} = {type => 'comment', data => ''};
852     $self->{state} = 'comment';
853     !!!next-input-character;
854     redo A;
855     }
856     } elsif ($self->{next_input_character} == 0x0044 or # D
857     $self->{next_input_character} == 0x0064) { # d
858     !!!next-input-character;
859     push @next_char, $self->{next_input_character};
860     if ($self->{next_input_character} == 0x004F or # O
861     $self->{next_input_character} == 0x006F) { # o
862     !!!next-input-character;
863     push @next_char, $self->{next_input_character};
864     if ($self->{next_input_character} == 0x0043 or # C
865     $self->{next_input_character} == 0x0063) { # c
866     !!!next-input-character;
867     push @next_char, $self->{next_input_character};
868     if ($self->{next_input_character} == 0x0054 or # T
869     $self->{next_input_character} == 0x0074) { # t
870     !!!next-input-character;
871     push @next_char, $self->{next_input_character};
872     if ($self->{next_input_character} == 0x0059 or # Y
873     $self->{next_input_character} == 0x0079) { # y
874     !!!next-input-character;
875     push @next_char, $self->{next_input_character};
876     if ($self->{next_input_character} == 0x0050 or # P
877     $self->{next_input_character} == 0x0070) { # p
878     !!!next-input-character;
879     push @next_char, $self->{next_input_character};
880     if ($self->{next_input_character} == 0x0045 or # E
881     $self->{next_input_character} == 0x0065) { # e
882     ## ISSUE: What a stupid code this is!
883     $self->{state} = 'DOCTYPE';
884     !!!next-input-character;
885     redo A;
886     }
887     }
888     }
889     }
890     }
891     }
892     }
893    
894     !!!parse-error;
895     $self->{next_input_character} = shift @next_char;
896     !!!back-next-input-character (@next_char);
897     $self->{state} = 'bogus comment';
898     redo A;
899    
900     ## ISSUE: typos in spec: chacacters, is is a parse error
901     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
902     } elsif ($self->{state} eq 'comment') {
903     if ($self->{next_input_character} == 0x002D) { # -
904     $self->{state} = 'comment dash';
905     !!!next-input-character;
906     redo A;
907     } elsif ($self->{next_input_character} == -1) {
908     !!!parse-error;
909     $self->{state} = 'data';
910     ## reconsume
911    
912     !!!emit ($self->{current_token}); # comment
913     undef $self->{current_token};
914    
915     redo A;
916     } else {
917     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
918     ## Stay in the state
919     !!!next-input-character;
920     redo A;
921     }
922     } elsif ($self->{state} eq 'comment dash') {
923     if ($self->{next_input_character} == 0x002D) { # -
924     $self->{state} = 'comment end';
925     !!!next-input-character;
926     redo A;
927     } elsif ($self->{next_input_character} == -1) {
928     !!!parse-error;
929     $self->{state} = 'data';
930     ## reconsume
931    
932     !!!emit ($self->{current_token}); # comment
933     undef $self->{current_token};
934    
935     redo A;
936     } else {
937     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
938     $self->{state} = 'comment';
939     !!!next-input-character;
940     redo A;
941     }
942     } elsif ($self->{state} eq 'comment end') {
943     if ($self->{next_input_character} == 0x003E) { # >
944     $self->{state} = 'data';
945     !!!next-input-character;
946    
947     !!!emit ($self->{current_token}); # comment
948     undef $self->{current_token};
949    
950     redo A;
951     } elsif ($self->{next_input_character} == 0x002D) { # -
952     !!!parse-error;
953     $self->{current_token}->{data} .= '-'; # comment
954     ## Stay in the state
955     !!!next-input-character;
956     redo A;
957     } elsif ($self->{next_input_character} == -1) {
958     !!!parse-error;
959     $self->{state} = 'data';
960     ## reconsume
961    
962     !!!emit ($self->{current_token}); # comment
963     undef $self->{current_token};
964    
965     redo A;
966     } else {
967     !!!parse-error;
968     $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
969     $self->{state} = 'comment';
970     !!!next-input-character;
971     redo A;
972     }
973     } elsif ($self->{state} eq 'DOCTYPE') {
974     if ($self->{next_input_character} == 0x0009 or # HT
975     $self->{next_input_character} == 0x000A or # LF
976     $self->{next_input_character} == 0x000B or # VT
977     $self->{next_input_character} == 0x000C or # FF
978     $self->{next_input_character} == 0x0020) { # SP
979     $self->{state} = 'before DOCTYPE name';
980     !!!next-input-character;
981     redo A;
982     } else {
983     !!!parse-error;
984     $self->{state} = 'before DOCTYPE name';
985     ## reconsume
986     redo A;
987     }
988     } elsif ($self->{state} eq 'before DOCTYPE name') {
989     if ($self->{next_input_character} == 0x0009 or # HT
990     $self->{next_input_character} == 0x000A or # LF
991     $self->{next_input_character} == 0x000B or # VT
992     $self->{next_input_character} == 0x000C or # FF
993     $self->{next_input_character} == 0x0020) { # SP
994     ## Stay in the state
995     !!!next-input-character;
996     redo A;
997     } elsif (0x0061 <= $self->{next_input_character} and
998     $self->{next_input_character} <= 0x007A) { # a..z
999     $self->{current_token} = {type => 'DOCTYPE',
1000     name => chr ($self->{next_input_character} - 0x0020),
1001     error => 1};
1002     $self->{state} = 'DOCTYPE name';
1003     !!!next-input-character;
1004     redo A;
1005     } elsif ($self->{next_input_character} == 0x003E) { # >
1006     !!!parse-error;
1007     $self->{state} = 'data';
1008     !!!next-input-character;
1009    
1010     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1011    
1012     redo A;
1013     } elsif ($self->{next_input_character} == -1) {
1014     !!!parse-error;
1015     $self->{state} = 'data';
1016     ## reconsume
1017    
1018     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1019    
1020     redo A;
1021     } else {
1022     $self->{current_token} = {type => 'DOCTYPE',
1023     name => chr ($self->{next_input_character}),
1024     error => 1};
1025     $self->{state} = 'DOCTYPE name';
1026     !!!next-input-character;
1027     redo A;
1028     }
1029     } elsif ($self->{state} eq 'DOCTYPE name') {
1030     if ($self->{next_input_character} == 0x0009 or # HT
1031     $self->{next_input_character} == 0x000A or # LF
1032     $self->{next_input_character} == 0x000B or # VT
1033     $self->{next_input_character} == 0x000C or # FF
1034     $self->{next_input_character} == 0x0020) { # SP
1035     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1036     $self->{state} = 'after DOCTYPE name';
1037     !!!next-input-character;
1038     redo A;
1039     } elsif ($self->{next_input_character} == 0x003E) { # >
1040     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1041     $self->{state} = 'data';
1042     !!!next-input-character;
1043    
1044     !!!emit ($self->{current_token}); # DOCTYPE
1045     undef $self->{current_token};
1046    
1047     redo A;
1048     } elsif (0x0061 <= $self->{next_input_character} and
1049     $self->{next_input_character} <= 0x007A) { # a..z
1050     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1051     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1052     ## Stay in the state
1053     !!!next-input-character;
1054     redo A;
1055     } elsif ($self->{next_input_character} == -1) {
1056     !!!parse-error;
1057     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1058     $self->{state} = 'data';
1059     ## reconsume
1060    
1061     !!!emit ($self->{current_token});
1062     undef $self->{current_token};
1063    
1064     redo A;
1065     } else {
1066     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1067     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1068     ## Stay in the state
1069     !!!next-input-character;
1070     redo A;
1071     }
1072     } elsif ($self->{state} eq 'after DOCTYPE name') {
1073     if ($self->{next_input_character} == 0x0009 or # HT
1074     $self->{next_input_character} == 0x000A or # LF
1075     $self->{next_input_character} == 0x000B or # VT
1076     $self->{next_input_character} == 0x000C or # FF
1077     $self->{next_input_character} == 0x0020) { # SP
1078     ## Stay in the state
1079     !!!next-input-character;
1080     redo A;
1081     } elsif ($self->{next_input_character} == 0x003E) { # >
1082     $self->{state} = 'data';
1083     !!!next-input-character;
1084    
1085     !!!emit ($self->{current_token}); # DOCTYPE
1086     undef $self->{current_token};
1087    
1088     redo A;
1089     } elsif ($self->{next_input_character} == -1) {
1090     !!!parse-error;
1091     $self->{state} = 'data';
1092     ## reconsume
1093    
1094     !!!emit ($self->{current_token}); # DOCTYPE
1095     undef $self->{current_token};
1096    
1097     redo A;
1098     } else {
1099     !!!parse-error;
1100     $self->{current_token}->{error} = 1; # DOCTYPE
1101     $self->{state} = 'bogus DOCTYPE';
1102     !!!next-input-character;
1103     redo A;
1104     }
1105     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1106     if ($self->{next_input_character} == 0x003E) { # >
1107     $self->{state} = 'data';
1108     !!!next-input-character;
1109    
1110     !!!emit ($self->{current_token}); # DOCTYPE
1111     undef $self->{current_token};
1112    
1113     redo A;
1114     } elsif ($self->{next_input_character} == -1) {
1115     !!!parse-error;
1116     $self->{state} = 'data';
1117     ## reconsume
1118    
1119     !!!emit ($self->{current_token}); # DOCTYPE
1120     undef $self->{current_token};
1121    
1122     redo A;
1123     } else {
1124     ## Stay in the state
1125     !!!next-input-character;
1126     redo A;
1127     }
1128     } else {
1129     die "$0: $self->{state}: Unknown state";
1130     }
1131     } # A
1132    
1133     die "$0: _get_next_token: unexpected case";
1134     } # _get_next_token
1135    
1136     sub _tokenize_attempt_to_consume_an_entity ($) {
1137     my $self = shift;
1138     my $r;
1139    
1140     if ($self->{next_input_character} == 0x0023) { # #
1141     !!!next-input-character;
1142     my $num;
1143     if ($self->{next_input_character} == 0x0078 or # x
1144     $self->{next_input_character} == 0x0058) { # X
1145     X: {
1146     my $x_char = $self->{next_input_character};
1147     !!!next-input-character;
1148     if (0x0030 <= $self->{next_input_character} and
1149     $self->{next_input_character} <= 0x0039) { # 0..9
1150     $num ||= 0;
1151     $num *= 0x10;
1152     $num += $self->{next_input_character} - 0x0030;
1153     redo X;
1154     } elsif (0x0061 <= $self->{next_input_character} and
1155     $self->{next_input_character} <= 0x0066) { # a..f
1156     ## ISSUE: the spec says U+0078, which is apparently incorrect
1157     $num ||= 0;
1158     $num *= 0x10;
1159     $num += $self->{next_input_character} - 0x0060 + 9;
1160     redo X;
1161     } elsif (0x0041 <= $self->{next_input_character} and
1162     $self->{next_input_character} <= 0x0046) { # A..F
1163     ## ISSUE: the spec says U+0058, which is apparently incorrect
1164     $num ||= 0;
1165     $num *= 0x10;
1166     $num += $self->{next_input_character} - 0x0040 + 9;
1167     redo X;
1168     } elsif (not defined $num) { # no hexadecimal digit
1169     !!!parse-error;
1170     $self->{next_input_character} = 0x0023; # #
1171     !!!back-next-input-character ($x_char);
1172     last X; ## nothing is returned
1173     } elsif ($self->{next_input_character} == 0x003B) { # ;
1174     !!!next-input-character;
1175     } else {
1176     !!!parse-error;
1177     }
1178    
1179     ## TODO: check the definition for |a valid Unicode character|.
1180     if ($num > 1114111 or $num == 0) {
1181     $num = 0xFFFD; # REPLACEMENT CHARACTER
1182     ## ISSUE: Why this is not an error?
1183     }
1184    
1185     $r = {type => 'character', data => chr $num};
1186     } # X
1187     } else {
1188     D: {
1189     if (0x0030 <= $self->{next_input_character} and
1190     $self->{next_input_character} <= 0x0039) { # 0..9
1191     $num *= 10;
1192     $num += $self->{next_input_character} - 0x0030;
1193     !!!next-input-character;
1194     redo D;
1195     } else {
1196     !!!parse-error;
1197     !!!back-next-input-character ($self->{next_input_character});
1198     $self->{next_input_character} = 0x0023; # #
1199     last D; ## nothing is returned
1200     }
1201    
1202     if ($self->{next_input_character} == 0x003B) { # ;
1203     !!!next-input-character;
1204     } else {
1205     !!!parse-error;
1206     }
1207    
1208     ## TODO: check the definition for |a valid Unicode character|.
1209     if ($num > 1114111 or $num == 0) {
1210     $num = 0xFFFD; # REPLACEMENT CHARACTER
1211     ## ISSUE: Why this is not an error?
1212     }
1213    
1214     $r = {type => 'character', data => chr $num};
1215     } # D
1216     }
1217     !!!consume-entity}
1218     return $r;
1219     } # _tokenize_attempt_to_consume_an_entity
1220    
1221 wakaba 1.2 sub _initialize_tree_constructor ($) {
1222     my $self = shift;
1223     require What::NanoDOM;
1224     $self->{document} = What::NanoDOM::Document->new;
1225     $self->{document}->strict_error_checking (0);
1226     ## TODO: Turn mutation events off # MUST
1227     ## TODO: Turn loose Document option (manakai extension) on
1228     } # _initialize_tree_constructor
1229    
1230     sub _terminate_tree_constructor ($) {
1231     my $self = shift;
1232     $self->{document}->strict_error_checking (1);
1233     ## TODO: Turn mutation events on
1234     } # _terminate_tree_constructor
1235    
1236     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1237    
1238     sub _construct_tree ($) {
1239     my ($self) = @_;
1240    
1241     ## When an interactive UA render the $self->{document} available
1242     ## to the user, or when it begin accepting user input, are
1243     ## not defined.
1244    
1245     ## Append a character: collect it and all subsequent consecutive
1246     ## characters and insert one Text node whose data is concatenation
1247     ## of all those characters. # MUST
1248    
1249     my $token;
1250     !!!next-token;
1251    
1252     my $phase = 'initial'; # MUST
1253    
1254     my $open_elements = [];
1255     my $active_formatting_elements = [];
1256     my $head_element;
1257     my $form_element;
1258     my $insertion_mode = 'before head';
1259    
1260     my $reconstruct_active_formatting_elements = sub { # MUST
1261     ## Step 1
1262     return unless @$active_formatting_elements;
1263    
1264     ## Step 3
1265     my $i = -1;
1266     my $entry = $active_formatting_elements->[$i];
1267    
1268     ## Step 2
1269     return if $entry->[0] eq '#marker';
1270     for (@$open_elements) {
1271     if ($entry->[0] eq $_->[0]) {
1272     return;
1273     }
1274     }
1275    
1276     ## Step 4
1277     S4: {
1278     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1279    
1280     ## Step 5
1281     $i--;
1282     $entry = $active_formatting_elements->[$i];
1283    
1284     ## Step 6
1285     if ($entry->[0] eq '#marker') {
1286     #
1287     } else {
1288     my $in_open_elements;
1289     OE: for (@$open_elements) {
1290     if ($entry->[0] eq $_->[0]) {
1291     $in_open_elements = 1;
1292     last OE;
1293     }
1294     }
1295     if ($in_open_elements) {
1296     #
1297     } else {
1298     redo S4;
1299     }
1300     }
1301    
1302     ## Step 7
1303     $i++;
1304     $entry = $active_formatting_elements->[$i];
1305     } # S4
1306    
1307     S7: {
1308     ## Step 8
1309     my $clone = $entry->[0]->clone_node (0);
1310    
1311     ## Step 9
1312     $open_elements->[-1]->[0]->append_child ($clone);
1313     push @$open_elements, [$clone, $entry->[1]];
1314    
1315     ## Step 10
1316     $active_formatting_elements->[$i] = $open_elements->[-1];
1317    
1318     unless ($i == $#$active_formatting_elements) {
1319     ## Step 7'
1320     $i++;
1321     $entry = $active_formatting_elements->[$i];
1322    
1323     redo S7;
1324     }
1325     } # S7
1326     }; # $reconstruct_active_formatting_elements
1327    
1328     my $clear_up_to_marker = sub {
1329     for (reverse 0..$#$active_formatting_elements) {
1330     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1331     splice @$active_formatting_elements, $_;
1332     return;
1333     }
1334     }
1335     }; # $clear_up_to_marker
1336    
1337     my $reset_insertion_mode = sub {
1338     ## Step 1
1339     my $last;
1340    
1341     ## Step 2
1342     my $i = -1;
1343     my $node = $open_elements->[$i];
1344    
1345     ## Step 3
1346     S3: {
1347     $last = 1 if $open_elements->[0]->[0] eq $node->[0];
1348     ## TODO: the element whose inner_html is set is neither td nor th, then $node = the element
1349    
1350     ## Step 4..13
1351     my $new_mode = {
1352     select => 'in select',
1353     td => 'in cell',
1354     th => 'in cell',
1355     tr => 'in row',
1356     tbody => 'in table body',
1357     thead => 'in table head',
1358     tfoot => 'in table foot',
1359     caption => 'in caption',
1360     colgroup => 'in column group',
1361     table => 'in table',
1362     head => 'in body', # not in head!
1363     body => 'in body',
1364     frameset => 'in frameset',
1365     }->{$node->[1]};
1366     $insertion_mode = $new_mode and return if defined $new_mode;
1367    
1368     ## Step 14
1369     if ($node->[1] eq 'html') {
1370     unless (defined $head_element) {
1371     $insertion_mode = 'before head';
1372     } else {
1373     $insertion_mode = 'after head';
1374     }
1375     return;
1376     }
1377    
1378     ## Step 15
1379     $insertion_mode = 'in body' and return if $last;
1380    
1381     ## Step 16
1382     $i--;
1383     $node = $open_elements->[$i];
1384    
1385     ## Step 17
1386     redo S3;
1387     } # S3
1388     }; # $reset_insertion_mode
1389    
1390     my $style_start_tag = sub {
1391     my $style_el; !!!create-element ($style_el, 'style');
1392     ## $insertion_mode eq 'in head' and ... (always true)
1393     (($insertion_mode eq 'in head' and defined $head_element)
1394     ? $head_element : $open_elements->[-1]->[0])
1395     ->append_child ($style_el);
1396     $self->{content_model_flag} = 'CDATA';
1397    
1398     my $text = '';
1399     !!!next-token;
1400     while ($token->{type} eq 'character') {
1401     $text .= $token->{data};
1402     !!!next-token;
1403     } # stop if non-character token or tokenizer stops tokenising
1404     if (length $text) {
1405     $style_el->manakai_append_text ($text);
1406     }
1407    
1408     $self->{content_model_flag} = 'PCDATA';
1409    
1410     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1411     ## Ignore the token
1412     } else {
1413     !!!parse-error;
1414     ## ISSUE: And ignore?
1415     }
1416     !!!next-token;
1417     }; # $style_start_tag
1418    
1419     my $script_start_tag = sub {
1420     my $script_el; !!!create-element ($script_el, 'script');
1421     ## TODO: mark as "parser-inserted"
1422    
1423     $self->{content_model_flag} = 'CDATA';
1424    
1425     my $text = '';
1426     !!!next-token;
1427     while ($token->{type} eq 'character') {
1428     $text .= $token->{data};
1429     !!!next-token;
1430     } # stop if non-character token or tokenizer stops tokenising
1431     if (length $text) {
1432     $script_el->manakai_append_text ($text);
1433     }
1434    
1435     $self->{content_model_flag} = 'PCDATA';
1436    
1437     if ($token->{type} eq 'end tag' and
1438     $token->{tag_name} eq 'script') {
1439     ## Ignore the token
1440     } else {
1441     !!!parse-error;
1442     ## ISSUE: And ignore?
1443     ## TODO: mark as "already executed"
1444     }
1445    
1446     ## TODO: inner_html mode then mark as "already executed" and skip
1447     if (1) {
1448     ## TODO: $old_insertion_point = current insertion point
1449     ## TODO: insertion point = just before the next input character
1450    
1451     (($insertion_mode eq 'in head' and defined $head_element)
1452     ? $head_element : $open_elements->[-1]->[0])->append_child ($script_el);
1453    
1454     ## TODO: insertion point = $old_insertion_point (might be "undefined")
1455    
1456     ## TODO: if there is a script that will execute as soon as the parser resume, then...
1457     }
1458    
1459     !!!next-token;
1460     }; # $script_start_tag
1461    
1462     my $formatting_end_tag = sub {
1463     my $tag_name = shift;
1464    
1465     FET: {
1466     ## Step 1
1467     my $formatting_element;
1468     my $formatting_element_i_in_active;
1469     AFE: for (reverse 0..$#$active_formatting_elements) {
1470     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
1471     $formatting_element = $active_formatting_elements->[$_];
1472     $formatting_element_i_in_active = $_;
1473     last AFE;
1474     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
1475     last AFE;
1476     }
1477     } # AFE
1478     unless (defined $formatting_element) {
1479     !!!parse-error;
1480     ## Ignore the token
1481     !!!next-token;
1482     return;
1483     }
1484     ## has an element in scope
1485     my $in_scope = 1;
1486     my $formatting_element_i_in_open;
1487     INSCOPE: for (reverse 0..$#$open_elements) {
1488     my $node = $open_elements->[$_];
1489     if ($node->[0] eq $formatting_element->[0]) {
1490     if ($in_scope) {
1491     $formatting_element_i_in_open = $_;
1492     last INSCOPE;
1493     } else { # in open elements but not in scope
1494     !!!parse-error;
1495     ## Ignore the token
1496     !!!next-token;
1497     return;
1498     }
1499     } elsif ({
1500     table => 1, caption => 1, td => 1, th => 1,
1501     button => 1, marquee => 1, object => 1, html => 1,
1502     }->{$node->[1]}) {
1503     $in_scope = 0;
1504     }
1505     } # INSCOPE
1506     unless (defined $formatting_element_i_in_open) {
1507     !!!parse-error;
1508     pop @$active_formatting_elements; # $formatting_element
1509     !!!next-token; ## TODO: ok?
1510     return;
1511     }
1512     if (not $open_elements->[-1]->[0] eq $formatting_element->[0]) {
1513     !!!parse-error;
1514     }
1515    
1516     ## Step 2
1517     my $furthest_block;
1518     my $furthest_block_i_in_open;
1519     OE: for (reverse 0..$#$open_elements) {
1520     my $node = $open_elements->[$_];
1521     if (not $formatting_category->{$node->[1]} and
1522     #not $phrasing_category->{$node->[1]} and
1523     ($special_category->{$node->[1]} or
1524     $scoping_category->{$node->[1]})) {
1525     $furthest_block = $node;
1526     $furthest_block_i_in_open = $_;
1527     } elsif ($node->[0] eq $formatting_element->[0]) {
1528     last OE;
1529     }
1530     } # OE
1531    
1532     ## Step 3
1533     unless (defined $furthest_block) { # MUST
1534     splice @$open_elements, $formatting_element_i_in_open;
1535     splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1536     !!!next-token;
1537     return;
1538     }
1539    
1540     ## Step 4
1541     my $common_ancestor_node = $open_elements->[$formatting_element_i_in_open - 1];
1542    
1543     ## Step 5
1544     my $furthest_block_parent = $furthest_block->[0]->parent_node;
1545     if (defined $furthest_block_parent) {
1546     $furthest_block_parent->remove_child ($furthest_block->[0]);
1547     }
1548    
1549     ## Step 6
1550     my $bookmark_prev_el
1551     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1552     ->[0];
1553    
1554     ## Step 7
1555     my $node = $furthest_block;
1556     my $node_i_in_open = $furthest_block_i_in_open;
1557     my $last_node = $furthest_block;
1558     S7: {
1559     ## Step 1
1560     $node_i_in_open--;
1561     $node = $open_elements->[$node_i_in_open];
1562    
1563     ## Step 2
1564     my $node_i_in_active;
1565     S7S2: {
1566     for (reverse 0..$#$active_formatting_elements) {
1567     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1568     $node_i_in_active = $_;
1569     last S7S2;
1570     }
1571     }
1572     splice @$open_elements, $node_i_in_open, 1;
1573     redo S7;
1574     } # S7S2
1575    
1576     ## Step 3
1577     last S7 if $node->[0] eq $formatting_element->[0];
1578    
1579     ## Step 4
1580     if ($last_node->[0] eq $furthest_block->[0]) {
1581     $bookmark_prev_el = $node->[0];
1582     }
1583    
1584     ## Step 5
1585     if ($node->[0]->has_child_nodes ()) {
1586     my $clone = [$node->[0]->clone_node (0), $node->[1]];
1587     $active_formatting_elements->[$node_i_in_active] = $clone;
1588     $open_elements->[$node_i_in_open] = $clone;
1589     $node = $clone;
1590     }
1591    
1592     ## Step 6
1593     $node->append_child ($last_node);
1594    
1595     ## Step 7
1596     $last_node = $node;
1597    
1598     ## Step 8
1599     redo S7;
1600     } # S7
1601    
1602     ## Step 8
1603     $common_ancestor_node->append_child ($last_node);
1604    
1605     ## Step 9
1606     my $clone = [$formatting_element->[0]->clone_node (0),
1607     $formatting_element->[1]];
1608    
1609     ## Step 10
1610     my @cn = @{$furthest_block->[0]->child_nodes};
1611     $clone->[0]->append_child ($_) for @cn;
1612    
1613     ## Step 11
1614     $furthest_block->[0]->append_child ($clone->[0]);
1615    
1616     ## Step 12
1617     my $i;
1618     AFE: for (reverse 0..$#$active_formatting_elements) {
1619     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1620     splice @$active_formatting_elements, $_, 1;
1621     $i-- and last AFE if defined $i;
1622     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1623     $i = $_;
1624     }
1625     } # AFE
1626     splice @$active_formatting_elements, $i + 1, 0, $clone;
1627    
1628     ## Step 13
1629     undef $i;
1630     OE: for (reverse 0..$#$open_elements) {
1631     if ($open_elements->[$_]->[0] eq $formatting_element->[0]) {
1632     splice @$open_elements, $_, 1;
1633     $i-- and last OE if defined $i;
1634     } elsif ($open_elements->[$_]->[0] eq $furthest_block->[0]) {
1635     $i = $_;
1636     }
1637     } # OE
1638     splice @$open_elements, $i + 1, 1, $clone;
1639    
1640     ## Step 14
1641     redo FET;
1642     } # FET
1643     }; # $formatting_end_tag
1644    
1645     my $in_body = sub {
1646     my $insert = shift;
1647     if ($token->{type} eq 'start tag') {
1648     if ($token->{tag_name} eq 'script') {
1649     $script_start_tag->();
1650     return;
1651     } elsif ($token->{tag_name} eq 'style') {
1652     $style_start_tag->();
1653     return;
1654     } elsif ({
1655     base => 1, link => 1, meta => 1, title => 1,
1656     }->{$token->{tag_name}}) {
1657     !!!parse-error;
1658     ## NOTE: This is an "as if in head" code clone
1659     my $el;
1660     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
1661     if (defined $head_element) {
1662     $head_element->append_child ($el);
1663     } else {
1664     $insert->($el);
1665     }
1666    
1667     ## ISSUE: Issue on magical <base> in the spec
1668    
1669     !!!next-token;
1670     return;
1671     } elsif ($token->{tag_name} eq 'body') {
1672     !!!parse-error;
1673    
1674     if (@$open_elements == 1 or
1675     $open_elements->[1]->[1] ne 'body') {
1676     ## Ignore the token
1677     } else {
1678     my $body_el = $open_elements->[1]->[0];
1679     for my $attr_name (keys %{$token->{attributes}}) {
1680     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
1681     $body_el->set_attribute_ns
1682     (undef, [undef, $attr_name],
1683     $token->{attributes}->{$attr_name}->{value});
1684     }
1685     }
1686     }
1687     !!!next-token;
1688     return;
1689     } elsif ({
1690     address => 1, blockquote => 1, center => 1, dir => 1,
1691     div => 1, dl => 1, fieldset => 1, listing => 1,
1692     menu => 1, ol => 1, p => 1, ul => 1,
1693     pre => 1,
1694     }->{$token->{tag_name}}) {
1695     ## has a p element in scope
1696     INSCOPE: for (reverse @$open_elements) {
1697     if ($_->[1] eq 'p') {
1698     !!!back-token;
1699     $token = {type => 'end tag', tag_name => 'p'};
1700     return;
1701     } elsif ({
1702     table => 1, caption => 1, td => 1, th => 1,
1703     button => 1, marquee => 1, object => 1, html => 1,
1704     }->{$_->[1]}) {
1705     last INSCOPE;
1706     }
1707     } # INSCOPE
1708    
1709     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1710     if ($token->{tag_name} eq 'pre') {
1711     !!!next-token;
1712     if ($token->{type} eq 'character') {
1713     $token->{data} =~ s/^\x0A//;
1714     unless (length $token->{data}) {
1715     !!!next-token;
1716     }
1717     }
1718     } else {
1719     !!!next-token;
1720     }
1721     return;
1722     } elsif ($token->{tag_name} eq 'form') {
1723     if (defined $form_element) {
1724     !!!parse-error;
1725     ## Ignore the token
1726     } else {
1727     ## has a p element in scope
1728     INSCOPE: for (reverse @$open_elements) {
1729     if ($_->[1] eq 'p') {
1730     !!!back-token;
1731     $token = {type => 'end tag', tag_name => 'p'};
1732     return;
1733     } elsif ({
1734     table => 1, caption => 1, td => 1, th => 1,
1735     button => 1, marquee => 1, object => 1, html => 1,
1736     }->{$_->[1]}) {
1737     last INSCOPE;
1738     }
1739     } # INSCOPE
1740    
1741     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1742     $form_element = $open_elements->[-1]->[0];
1743     !!!next-token;
1744     return;
1745     }
1746     } elsif ($token->{tag_name} eq 'li') {
1747     ## has a p element in scope
1748     INSCOPE: for (reverse @$open_elements) {
1749     if ($_->[1] eq 'p') {
1750     !!!back-token;
1751     $token = {type => 'end tag', tag_name => 'p'};
1752     return;
1753     } elsif ({
1754     table => 1, caption => 1, td => 1, th => 1,
1755     button => 1, marquee => 1, object => 1, html => 1,
1756     }->{$_->[1]}) {
1757     last INSCOPE;
1758     }
1759     } # INSCOPE
1760    
1761     ## Step 1
1762     my $i = -1;
1763     my $node = $open_elements->[$i];
1764     LI: {
1765     ## Step 2
1766     if ($node->[1] eq 'li') {
1767     splice @$open_elements, $i;
1768     last LI;
1769     }
1770    
1771     ## Step 3
1772     if (not $formatting_category->{$node->[1]} and
1773     #not $phrasing_category->{$node->[1]} and
1774     ($special_category->{$node->[1]} or
1775     $scoping_category->{$node->[1]}) and
1776     $node->[1] ne 'address' and $node->[1] ne 'div') {
1777     last LI;
1778     }
1779    
1780     ## Step 4
1781     $i++;
1782     $node = $open_elements->[$i];
1783     redo LI;
1784     } # LI
1785    
1786     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1787     !!!next-token;
1788     return;
1789     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
1790     ## has a p element in scope
1791     INSCOPE: for (reverse @$open_elements) {
1792     if ($_->[1] eq 'p') {
1793     !!!back-token;
1794     $token = {type => 'end tag', tag_name => 'p'};
1795     return;
1796     } elsif ({
1797     table => 1, caption => 1, td => 1, th => 1,
1798     button => 1, marquee => 1, object => 1, html => 1,
1799     }->{$_->[1]}) {
1800     last INSCOPE;
1801     }
1802     } # INSCOPE
1803    
1804     ## Step 1
1805     my $i = -1;
1806     my $node = $open_elements->[$i];
1807     LI: {
1808     ## Step 2
1809     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
1810     splice @$open_elements, $i;
1811     last LI;
1812     }
1813    
1814     ## Step 3
1815     if (not $formatting_category->{$node->[1]} and
1816     #not $phrasing_category->{$node->[1]} and
1817     ($special_category->{$node->[1]} or
1818     $scoping_category->{$node->[1]}) and
1819     $node->[1] ne 'address' and $node->[1] ne 'div') {
1820     last LI;
1821     }
1822    
1823     ## Step 4
1824     $i++;
1825     $node = $open_elements->[$i];
1826     redo LI;
1827     } # LI
1828    
1829     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1830     !!!next-token;
1831     return;
1832     } elsif ($token->{tag_name} eq 'plaintext') {
1833     ## has a p element in scope
1834     INSCOPE: for (reverse @$open_elements) {
1835     if ($_->[1] eq 'p') {
1836     !!!back-token;
1837     $token = {type => 'end tag', tag_name => 'p'};
1838     return;
1839     } elsif ({
1840     table => 1, caption => 1, td => 1, th => 1,
1841     button => 1, marquee => 1, object => 1, html => 1,
1842     }->{$_->[1]}) {
1843     last INSCOPE;
1844     }
1845     } # INSCOPE
1846    
1847     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1848    
1849     $self->{content_model_flag} = 'PLAINTEXT';
1850    
1851     !!!next-token;
1852     return;
1853     } elsif ({
1854     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
1855     }->{$token->{tag_name}}) {
1856     ## has a p element in scope
1857     INSCOPE: for (reverse 0..$#$open_elements) {
1858     my $node = $open_elements->[$_];
1859     if ($node->[1] eq 'p') {
1860     !!!back-token;
1861     $token = {type => 'end tag', tag_name => 'p'};
1862     return;
1863     } elsif ({
1864     table => 1, caption => 1, td => 1, th => 1,
1865     button => 1, marquee => 1, object => 1, html => 1,
1866     }->{$node->[1]}) {
1867     last INSCOPE;
1868     }
1869     } # INSCOPE
1870    
1871     ## has an element in scope
1872     my $i;
1873     INSCOPE: for (reverse 0..$#$open_elements) {
1874     my $node = $open_elements->[$_];
1875     if ({
1876     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
1877     }->{$node->[1]}) {
1878     $i = $_;
1879     last INSCOPE;
1880     } elsif ({
1881     table => 1, caption => 1, td => 1, th => 1,
1882     button => 1, marquee => 1, object => 1, html => 1,
1883     }->{$node->[1]}) {
1884     last INSCOPE;
1885     }
1886     } # INSCOPE
1887    
1888     if (defined $i) {
1889     !!!parse-error;
1890     splice @$open_elements, $i;
1891     }
1892    
1893     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1894    
1895     !!!next-token;
1896     return;
1897     } elsif ($token->{tag_name} eq 'a') {
1898     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
1899     my $node = $active_formatting_elements->[$i];
1900     if ($node->[1] eq 'a') {
1901     !!!parse-error;
1902    
1903     !!!back-token;
1904     $token = {type => 'end tag', tag_name => 'a'};
1905     $formatting_end_tag->($token->{tag_name});
1906    
1907     splice @$active_formatting_elements, $i;
1908     OE: for (reverse 0..$#$open_elements) {
1909     if ($open_elements->[$_]->[0] eq $node->[0]) {
1910     splice @$open_elements, $_;
1911     last OE;
1912     }
1913     } # OE
1914     last AFE;
1915     } elsif ($node->[0] eq '#marker') {
1916     last AFE;
1917     }
1918     } # AFE
1919    
1920     $reconstruct_active_formatting_elements->();
1921    
1922     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1923     push @$active_formatting_elements, $open_elements->[-1];
1924    
1925     !!!next-token;
1926     return;
1927     } elsif ({
1928     b => 1, big => 1, em => 1, font => 1, i => 1,
1929     nobr => 1, s => 1, small => 1, strile => 1,
1930     strong => 1, tt => 1, u => 1,
1931     }->{$token->{tag_name}}) {
1932     $reconstruct_active_formatting_elements->();
1933    
1934     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1935     push @$active_formatting_elements, $open_elements->[-1];
1936    
1937     !!!next-token;
1938     return;
1939     } elsif ($token->{tag_name} eq 'button') {
1940     ## has a button element in scope
1941     INSCOPE: for (reverse 0..$#$open_elements) {
1942     my $node = $open_elements->[$_];
1943     if ($node->[1] eq 'button') {
1944     !!!parse-error;
1945     !!!back-token;
1946     $token = {type => 'end tag', tag_name => 'button'};
1947     return;
1948     } elsif ({
1949     table => 1, caption => 1, td => 1, th => 1,
1950     button => 1, marquee => 1, object => 1, html => 1,
1951     }->{$node->[1]}) {
1952     last INSCOPE;
1953     }
1954     } # INSCOPE
1955    
1956     $reconstruct_active_formatting_elements->();
1957    
1958     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1959     push @$active_formatting_elements, ['#marker', ''];
1960    
1961     !!!next-token;
1962     return;
1963     } elsif ($token->{tag_name} eq 'marquee' or
1964     $token->{tag_name} eq 'object') {
1965     $reconstruct_active_formatting_elements->();
1966    
1967     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1968     push @$active_formatting_elements, ['#marker', ''];
1969    
1970     !!!next-token;
1971     return;
1972     } elsif ($token->{tag_name} eq 'xmp') {
1973     $reconstruct_active_formatting_elements->();
1974    
1975     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1976    
1977     $self->{content_model_flag} = 'CDATA';
1978    
1979     !!!next-token;
1980     return;
1981     } elsif ($token->{tag_name} eq 'tbale') {
1982     ## has a p element in scope
1983     INSCOPE: for (reverse @$open_elements) {
1984     if ($_->[1] eq 'p') {
1985     !!!back-token;
1986     $token = {type => 'end tag', tag_name => 'p'};
1987     return;
1988     } elsif ({
1989     table => 1, caption => 1, td => 1, th => 1,
1990     button => 1, marquee => 1, object => 1, html => 1,
1991     }->{$_->[1]}) {
1992     last INSCOPE;
1993     }
1994     } # INSCOPE
1995    
1996     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1997    
1998     $insertion_mode = 'in table';
1999    
2000     !!!next-token;
2001     return;
2002     } elsif ({
2003     area => 1, basefont => 1, bgsound => 1, br => 1,
2004     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2005     image => 1,
2006     }->{$token->{tag_name}}) {
2007     if ($token->{tag_name} eq 'image') {
2008     !!!parse-error;
2009     $token->{tag_name} = 'img';
2010     }
2011    
2012     $reconstruct_active_formatting_elements->();
2013    
2014     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2015     pop @$open_elements;
2016    
2017     !!!next-token;
2018     return;
2019     } elsif ($token->{tag_name} eq 'hr') {
2020     ## has a p element in scope
2021     INSCOPE: for (reverse @$open_elements) {
2022     if ($_->[1] eq 'p') {
2023     !!!back-token;
2024     $token = {type => 'end tag', tag_name => 'p'};
2025     return;
2026     } elsif ({
2027     table => 1, caption => 1, td => 1, th => 1,
2028     button => 1, marquee => 1, object => 1, html => 1,
2029     }->{$_->[1]}) {
2030     last INSCOPE;
2031     }
2032     } # INSCOPE
2033    
2034     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2035     pop @$open_elements;
2036    
2037     !!!next-token;
2038     return;
2039     } elsif ($token->{tag_name} eq 'input') {
2040     $reconstruct_active_formatting_elements->();
2041    
2042     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2043     ## TODO: associate with $form_element if defined
2044     pop @$open_elements;
2045    
2046     !!!next-token;
2047     return;
2048     } elsif ($token->{tag_name} eq 'isindex') {
2049     !!!parse-error;
2050    
2051     if (defined $form_element) {
2052     ## Ignore the token
2053     !!!next-token;
2054     return;
2055     } else {
2056     my $at = $token->{attributes};
2057     $at->{name} = {name => 'name', value => 'isindex'};
2058     my @tokens = (
2059     {type => 'start tag', tag_name => 'form'},
2060     {type => 'start tag', tag_name => 'hr'},
2061     {type => 'start tag', tag_name => 'p'},
2062     {type => 'start tag', tag_name => 'label'},
2063     {type => 'character',
2064     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2065     ## TODO: make this configurable
2066     {type => 'start tag', tag_name => 'input', attributes => $at},
2067     #{type => 'character', data => ''}, # SHOULD
2068     {type => 'end tag', tag_name => 'label'},
2069     {type => 'end tag', tag_name => 'p'},
2070     {type => 'start tag', tag_name => 'hr'},
2071     {type => 'end tag', tag_name => 'form'},
2072     );
2073     $token = shift @tokens;
2074     !!!back-token (@tokens);
2075     return;
2076     }
2077     } elsif ({
2078     textarea => 1,
2079     noembed => 1,
2080     noframes => 1,
2081     noscript => 0, ## TODO: 1 if scripting is enabled
2082     }->{$token->{tag_name}}) {
2083     my $tag_name = $token->{tag_name};
2084     my $el;
2085     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2086    
2087     if ($token->{tag_name} eq 'textarea') {
2088     ## TODO: form_element if defined
2089     $self->{content_model_flag} = 'RCDATA';
2090     } else {
2091     $self->{content_model_flag} = 'CDATA';
2092     }
2093    
2094     $insert->($el);
2095    
2096     my $text = '';
2097     !!!next-token;
2098     while ($token->{type} eq 'character') {
2099     $text .= $token->{data};
2100     !!!next-token;
2101     }
2102     if (length $text) {
2103     $el->manakai_append_text ($text);
2104     }
2105    
2106     $self->{content_model_flag} = 'PCDATA';
2107    
2108     if ($token->{type} eq 'end tag' and
2109     $token->{tag_name} eq $tag_name) {
2110     ## Ignore the token
2111     } else {
2112     !!!parse-error;
2113     ## ISSUE: And ignore?
2114     }
2115     !!!next-token;
2116     return;
2117     } elsif ($token->{type} eq 'select') {
2118     $reconstruct_active_formatting_elements->();
2119    
2120     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2121    
2122     $insertion_mode = 'in select';
2123     !!!next-token;
2124     return;
2125     } elsif ({
2126     caption => 1, col => 1, colgroup => 1, frame => 1,
2127     frameset => 1, head => 1, option => 1, optgroup => 1,
2128     tbody => 1, td => 1, tfoot => 1, th => 1,
2129     thead => 1, tr => 1,
2130     }->{$token->{tag_name}}) {
2131     !!!parse-error;
2132     ## Ignore the token
2133     !!!next-token;
2134     return;
2135    
2136     ## ISSUE: An issue on HTML5 new elements in the spec.
2137     } else {
2138     $reconstruct_active_formatting_elements->();
2139    
2140     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2141    
2142     !!!next-token;
2143     return;
2144     }
2145     } elsif ($token->{type} eq 'end tag') {
2146     if ($token->{tag_name} eq 'body') {
2147     if (@$open_elements > 1 and $open_elements->[1]->[1] eq 'body') {
2148     ## ISSUE: There is an issue in the spec.
2149     if ($open_elements->[-1]->[1] ne 'body') {
2150     !!!parse-error;
2151     }
2152     $insertion_mode = 'after body';
2153     !!!next-token;
2154     return;
2155     } else {
2156     !!!parse-error;
2157     ## Ignore the token
2158     !!!next-token;
2159     return;
2160     }
2161     } elsif ($token->{tag_name} eq 'html') {
2162     if (@$open_elements > 1 and $open_elements->[1]->[1] eq 'body') {
2163     ## ISSUE: There is an issue in the spec.
2164     if ($open_elements->[-1]->[1] ne 'body') {
2165     !!!parse-error;
2166     }
2167     $insertion_mode = 'after body';
2168     ## reprocess
2169     return;
2170     } else {
2171     !!!parse-error;
2172     ## Ignore the token
2173     !!!next-token;
2174     return;
2175     }
2176     } elsif ({
2177     address => 1, blockquote => 1, center => 1, dir => 1,
2178     div => 1, dl => 1, fieldset => 1, listing => 1,
2179     menu => 1, ol => 1, pre => 1, ul => 1,
2180     form => 1,
2181     p => 1,
2182     dd => 1, dt => 1, li => 1,
2183     button => 1, marquee => 1, object => 1,
2184     }->{$token->{tag_name}}) {
2185     ## has an element in scope
2186     my $i;
2187     INSCOPE: for (reverse 0..$#$open_elements) {
2188     my $node = $open_elements->[$_];
2189     if ($node->[1] eq $token->{tag_name}) {
2190     ## generate implied end tags
2191     if ({
2192     dd => ($token->{tag_name} ne 'dd'),
2193     dt => ($token->{tag_name} ne 'dt'),
2194     li => ($token->{tag_name} ne 'li'),
2195     p => ($token->{tag_name} ne 'p'),
2196     td => 1, th => 1, tr => 1,
2197     }->{$open_elements->[-1]->[1]}) {
2198     !!!back-token;
2199     $token = {type => 'end tag',
2200     tag_name => $open_elements->[-1]->[1]}; # MUST
2201     return;
2202     }
2203     $i = $_;
2204     last INSCOPE unless $token->{tag_name} eq 'p';
2205     } elsif ({
2206     table => 1, caption => 1, td => 1, th => 1,
2207     button => 1, marquee => 1, object => 1, html => 1,
2208     }->{$node->[1]}) {
2209     last INSCOPE;
2210     }
2211     } # INSCOPE
2212    
2213     if ($open_elements->[-1]->[1] ne $token->{tag_name}) {
2214     !!!parse-error;
2215     }
2216    
2217     splice @$open_elements, $i if defined $i;
2218     undef $form_element if $token->{tag_name} eq 'form';
2219     $clear_up_to_marker->()
2220     if {
2221     button => 1, marquee => 1, object => 1,
2222     }->{$token->{tag_name}};
2223     !!!next-token;
2224     return;
2225     } elsif ({
2226     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2227     }->{$token->{tag_name}}) {
2228     ## has an element in scope
2229     my $i;
2230     INSCOPE: for (reverse 0..$#$open_elements) {
2231     my $node = $open_elements->[$_];
2232     if ({
2233     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2234     }->{$node->[1]}) {
2235     ## generate implied end tags
2236     if ({
2237     dd => 1, dt => 1, li => 1, p => 1,
2238     td => 1, th => 1, tr => 1,
2239     }->{$open_elements->[-1]->[1]}) {
2240     !!!back-token;
2241     $token = {type => 'end tag',
2242     tag_name => $open_elements->[-1]->[1]}; # MUST
2243     return;
2244     }
2245     $i = $_;
2246     last INSCOPE;
2247     } elsif ({
2248     table => 1, caption => 1, td => 1, th => 1,
2249     button => 1, marquee => 1, object => 1, html => 1,
2250     }->{$node->[1]}) {
2251     last INSCOPE;
2252     }
2253     } # INSCOPE
2254    
2255     if ($open_elements->[-1]->[1] ne $token->{tag_name}) {
2256     !!!parse-error;
2257     }
2258    
2259     splice @$open_elements, $i if defined $i;
2260     !!!next-token;
2261     return;
2262     } elsif ({
2263     a => 1,
2264     b => 1, big => 1, em => 1, font => 1, i => 1,
2265     nobr => 1, s => 1, small => 1, strile => 1,
2266     strong => 1, tt => 1, u => 1,
2267     }->{$token->{tag_name}}) {
2268     $formatting_end_tag->($token->{tag_name});
2269     return;
2270     } elsif ({
2271     caption => 1, col => 1, colgroup => 1, frame => 1,
2272     frameset => 1, head => 1, option => 1, optgroup => 1,
2273     tbody => 1, td => 1, tfoot => 1, th => 1,
2274     thead => 1, tr => 1,
2275     area => 1, basefont => 1, bgsound => 1, br => 1,
2276     embed => 1, hr => 1, iframe => 1, image => 1,
2277     img => 1, input => 1, isindex=> 1, noembed => 1,
2278     noframes => 1, param => 1, select => 1, spacer => 1,
2279     table => 1, textarea => 1, wbr => 1,
2280     noscript => 0, ## TODO: if scripting is enabled
2281     }->{$token->{tag_name}}) {
2282     !!!parse-error;
2283     ## Ignore the token
2284     !!!next-token;
2285     return;
2286    
2287     ## ISSUE: Issue on HTML5 new elements in spec
2288    
2289     } else {
2290     ## Step 1
2291     my $node_i = -1;
2292     my $node = $open_elements->[$node_i];
2293    
2294     ## Step 2
2295     S2: {
2296     if ($node->[1] eq $token->{tag_name}) {
2297     ## Step 1
2298     ## generate implied end tags
2299     if ({
2300     dd => 1, dt => 1, li => 1, p => 1,
2301     td => 1, th => 1, tr => 1,
2302     }->{$open_elements->[-1]->[1]}) {
2303     !!!back-token;
2304     $token = {type => 'end tag',
2305     tag_name => $open_elements->[-1]->[1]}; # MUST
2306     return;
2307     }
2308    
2309     ## Step 2
2310     if ($token->{tag_name} ne $open_elements->[-1]->[1]) {
2311     !!!parse-error;
2312     }
2313    
2314     ## Step 3
2315     splice @$open_elements, $node_i;
2316     last S2;
2317     } else {
2318     ## Step 3
2319     if (not $formatting_category->{$node->[1]} and
2320     #not $phrasing_category->{$node->[1]} and
2321     ($special_category->{$node->[1]} or
2322     $scoping_category->{$node->[1]})) {
2323     !!!parse-error;
2324     ## Ignore the token
2325     !!!next-token;
2326     last S2;
2327     }
2328     }
2329    
2330     ## Step 4
2331     $node_i--;
2332     $node = $open_elements->[$node_i];
2333    
2334     ## Step 5;
2335     redo S2;
2336     } # S2
2337     }
2338     }
2339     }; # $in_body
2340    
2341     B: {
2342     if ($phase eq 'initial') {
2343     if ($token->{type} eq 'DOCTYPE') {
2344     if ($token->{error}) {
2345     ## ISSUE: Spec currently left this case undefined.
2346     }
2347     my $doctype = $self->{document}->create_document_type_definition
2348     ($token->{name});
2349     $self->{document}->append_child ($doctype);
2350     $phase = 'root element';
2351     !!!next-token;
2352     redo B;
2353     } elsif ({
2354     comment => 1,
2355     'start tag' => 1,
2356     'end tag' => 1,
2357     'end-of-file' => 1,
2358     }->{$token->{type}}) {
2359     ## ISSUE: Spec currently left this case undefined.
2360     $phase = 'root element';
2361     ## reprocess
2362     redo B;
2363     } elsif ($token->{type} eq 'character') {
2364     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2365     $self->{document}->manakai_append_text ($1);
2366     ## ISSUE: DOM3 Core does not allow Document > Text
2367     unless (length $token->{data}) {
2368     ## Stay in the phase
2369     !!!next-token;
2370     redo B;
2371     }
2372     }
2373     ## ISSUE: Spec currently left this case undefined.
2374     $phase = 'root element';
2375     ## reprocess
2376     redo B;
2377     } else {
2378     die "$0: $token->{type}: Unknown token";
2379     }
2380     } elsif ($phase eq 'root element') {
2381     if ($token->{type} eq 'DOCTYPE') {
2382     !!!parse-error;
2383     ## Ignore the token
2384     ## Stay in the phase
2385     !!!next-token;
2386     redo B;
2387     } elsif ($token->{type} eq 'comment') {
2388     my $comment = $self->{document}->create_comment ($token->{data});
2389     $self->{document}->append_child ($comment);
2390     ## Stay in the phase
2391     !!!next-token;
2392     redo B;
2393     } elsif ($token->{type} eq 'character') {
2394     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2395     $self->{document}->manakai_append_text ($1);
2396     ## ISSUE: DOM3 Core does not allow Document > Text
2397     unless (length $token->{data}) {
2398     ## Stay in the phase
2399     !!!next-token;
2400     redo B;
2401     }
2402     }
2403     #
2404     } elsif ({
2405     'start tag' => 1,
2406     'end tag' => 1,
2407     'end-of-file' => 1,
2408     }->{$token->{type}}) {
2409     ## ISSUE: There is an issue in the spec
2410     #
2411     } else {
2412     die "$0: $token->{type}: Unknown token";
2413     }
2414     my $root_element; !!!create-element ($root_element, 'html');
2415     $self->{document}->append_child ($root_element);
2416     $open_elements = [[$root_element, 'html']];
2417     $phase = 'main';
2418     ## reprocess
2419     redo B;
2420     } elsif ($phase eq 'main') {
2421     if ($token->{type} eq 'DOCTYPE') {
2422     !!!parse-error;
2423     ## Ignore the token
2424     ## Stay in the phase
2425     !!!next-token;
2426     redo B;
2427     } elsif ($token->{type} eq 'start tag' and
2428     $token->{tag_name} eq 'html') {
2429     ## TODO: unless it is the first start tag token, parse-error
2430     my $top_el = $open_elements->[0]->[0];
2431     for my $attr_name (keys %{$token->{attributes}}) {
2432     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
2433     $top_el->set_attribute_ns (undef, [undef, $attr_name],
2434     $token->{attributes}->{value});
2435     }
2436     }
2437     !!!next-token;
2438     redo B;
2439     } elsif ($token->{type} eq 'end-of-file') {
2440     ## Generate implied end tags
2441     if ({
2442     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
2443     }->{$open_elements->[-1]->[1]}) {
2444     !!!back-token;
2445     $token = {type => 'end tag', tag_name => $open_elements->[-1]->[1]};
2446     redo B;
2447     }
2448    
2449     if (@$open_elements > 2 or
2450     (@$open_elements == 2 and $open_elements->[1]->[1] ne 'body')) {
2451     !!!parse-error;
2452     } else {
2453     ## TODO: inner_html parser and @$open_elements > 1 and $open_elements->[1] ne 'body', then parse-error
2454     }
2455    
2456     ## Stop parsing
2457     last B;
2458    
2459     ## ISSUE: There is an issue in the spec.
2460     } else {
2461     if ($insertion_mode eq 'before head') {
2462     if ($token->{type} eq 'character') {
2463     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2464     $open_elements->[-1]->[0]->manakai_append_text ($1);
2465     unless (length $token->{data}) {
2466     !!!next-token;
2467     redo B;
2468     }
2469     }
2470     ## As if <head>
2471     !!!create-element ($head_element, 'head');
2472     $open_elements->[-1]->[0]->append_child ($head_element);
2473     push @$open_elements, [$head_element, 'head'];
2474     $insertion_mode = 'in head';
2475     ## reprocess
2476     redo B;
2477     } elsif ($token->{type} eq 'comment') {
2478     my $comment = $self->{document}->create_comment ($token->{data});
2479     $open_elements->[-1]->[0]->append_child ($comment);
2480     !!!next-token;
2481     redo B;
2482     } elsif ($token->{type} eq 'start tag') {
2483     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
2484     !!!create-element ($head_element, 'head', $attr);
2485     $open_elements->[-1]->[0]->append_child ($head_element);
2486     push @$open_elements, [$head_element, 'head'];
2487     $insertion_mode = 'in head';
2488     if ($token->{tag_name} eq 'head') {
2489     !!!next-token;
2490     #} elsif ({
2491     # base => 1, link => 1, meta => 1,
2492     # script => 1, style => 1, title => 1,
2493     # }->{$token->{tag_name}}) {
2494     # ## reprocess
2495     } else {
2496     ## reprocess
2497     }
2498     redo B;
2499     } elsif ($token->{type} eq 'end tag') {
2500     if ($token->{tag_name} eq 'html') {
2501     ## As if <head>
2502     !!!create-element ($head_element, 'head');
2503     $open_elements->[-1]->[0]->append_child ($head_element);
2504     push @$open_elements, [$head_element, 'head'];
2505     $insertion_mode = 'in head';
2506     ## reprocess
2507     redo B;
2508     } else {
2509     !!!parse-error;
2510     ## Ignore the token
2511     redo B;
2512     }
2513     } else {
2514     die "$0: $token->{type}: Unknown type";
2515     }
2516     } elsif ($insertion_mode eq 'in head') {
2517     if ($token->{type} eq 'character') {
2518     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2519     $open_elements->[-1]->[0]->manakai_append_text ($1);
2520     unless (length $token->{data}) {
2521     !!!next-token;
2522     redo B;
2523     }
2524     }
2525    
2526     #
2527     } elsif ($token->{type} eq 'comment') {
2528     my $comment = $self->{document}->create_comment ($token->{data});
2529     $open_elements->[-1]->[0]->append_child ($comment);
2530     !!!next-token;
2531     redo B;
2532     } elsif ($token->{type} eq 'start tag') {
2533     if ($token->{tag_name} eq 'title') {
2534     my $title_el; !!!create-element ($title_el, 'title');
2535     (defined $head_element ? $head_element : $open_elements->[-1]->[0])
2536     ->append_child ($title_el);
2537     $self->{content_model_flag} = 'RCDATA';
2538    
2539     my $text = '';
2540     !!!next-token;
2541     while ($token->{type} eq 'character') {
2542     $text .= $token->{data};
2543     !!!next-token;
2544     }
2545     if (length $text) {
2546     $title_el->manakai_append_text ($text);
2547     }
2548    
2549     $self->{content_model_flag} = 'PCDATA';
2550    
2551     if ($token->{type} eq 'end tag' and
2552     $token->{tag_name} eq 'title') {
2553     ## Ignore the token
2554     } else {
2555     !!!parse-error;
2556     ## ISSUE: And ignore?
2557     }
2558     !!!next-token;
2559     redo B;
2560     } elsif ($token->{tag_name} eq 'style') {
2561     $style_start_tag->();
2562     redo B;
2563     } elsif ($token->{tag_name} eq 'script') {
2564     $script_start_tag->();
2565     redo B;
2566     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
2567     ## NOTE: There are "as if in head" code clones
2568     my $el;
2569     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2570     (defined $head_element ? $head_element : $open_elements->[-1]->[0])
2571     ->append_child ($el);
2572    
2573     ## ISSUE: Issue on magical <base> in the spec
2574    
2575     !!!next-token;
2576     redo B;
2577     } elsif ($token->{tag_name} eq 'head') {
2578     !!!parse-error;
2579     ## Ignore the token
2580     !!!next-token;
2581     redo B;
2582     } else {
2583     #
2584     }
2585     } elsif ($token->{type} eq 'end tag') {
2586     if ($token->{tag_name} eq 'head') {
2587     if ($open_elements->[-1]->[1] eq 'head') {
2588     pop @$open_elements;
2589     } else {
2590     !!!parse-error;
2591     }
2592     $insertion_mode = 'after head';
2593     !!!next-token;
2594     redo B;
2595     } elsif ($token->{tag_name} eq 'html') {
2596     #
2597     } else {
2598     !!!parse-error;
2599     ## Ignore the token
2600     !!!next-token;
2601     redo B;
2602     }
2603     } else {
2604     #
2605     }
2606    
2607     if ($open_elements->[-1]->[1] eq 'head') {
2608     ## As if </head>
2609     pop @$open_elements;
2610     }
2611     $insertion_mode = 'after head';
2612     ## reprocess
2613     redo B;
2614    
2615     ## ISSUE: An issue in the spec.
2616     } elsif ($insertion_mode eq 'after head') {
2617     if ($token->{type} eq 'character') {
2618     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2619     $open_elements->[-1]->[0]->manakai_append_text ($1);
2620     unless (length $token->{data}) {
2621     !!!next-token;
2622     redo B;
2623     }
2624     }
2625    
2626     #
2627     } elsif ($token->{type} eq 'comment') {
2628     my $comment = $self->{document}->create_comment ($token->{data});
2629     $open_elements->[-1]->[0]->append_child ($comment);
2630     !!!next-token;
2631     redo B;
2632     } elsif ($token->{type} eq 'start tag') {
2633     if ($token->{tag_name} eq 'body') {
2634     !!!insert-element ('body', $token->{attributes});
2635     $insertion_mode = 'in body';
2636     !!!next-token;
2637     redo B;
2638     } elsif ($token->{tag_name} eq 'frameset') {
2639     !!!insert-element ('frameset', $token->{attributes});
2640     $insertion_mode = 'in frameset';
2641     !!!next-token;
2642     redo B;
2643     } elsif ({
2644     base => 1, link => 1, meta => 1,
2645     script=> 1, style => 1, title => 1,
2646     }->{$token->{tag_name}}) {
2647     !!!parse-error;
2648     $insertion_mode = 'in head';
2649     ## reprocess
2650     redo B;
2651     } else {
2652     #
2653     }
2654     } else {
2655     #
2656     }
2657    
2658     ## As if <body>
2659     !!!insert-element ('body');
2660     $insertion_mode = 'in body';
2661     ## reprocess
2662     redo B;
2663     } elsif ($insertion_mode eq 'in body') {
2664     if ($token->{type} eq 'character') {
2665     ## NOTE: There is a code clone of "character in body".
2666     $reconstruct_active_formatting_elements->();
2667    
2668     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
2669    
2670     !!!next-token;
2671     redo B;
2672     } elsif ($token->{type} eq 'comment') {
2673     ## NOTE: There is a code clone of "comment in body".
2674     my $comment = $self->{document}->create_comment ($token->{data});
2675     $open_elements->[-1]->[0]->append_child ($comment);
2676     !!!next-token;
2677     redo B;
2678     } else {
2679     $in_body->(sub {
2680     $open_elements->[-1]->[0]->append_child (shift);
2681     });
2682     redo B;
2683     }
2684     } elsif ($insertion_mode eq 'in table') {
2685     if ($token->{type} eq 'character') {
2686     $reconstruct_active_formatting_elements->();
2687    
2688     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
2689    
2690     !!!next-token;
2691     redo B;
2692     } elsif ($token->{type} eq 'comment') {
2693     my $comment = $self->{document}->create_comment ($token->{data});
2694     $open_elements->[-1]->[0]->append_child ($comment);
2695     !!!next-token;
2696     redo B;
2697     } elsif ($token->{type} eq 'start tag') {
2698     if ({
2699     caption => 1,
2700     colgroup => 1,
2701     tbody => 1, tfoot => 1, thead => 1,
2702     }->{$token->{tag_name}}) {
2703     ## Clear back to table context
2704     while ($open_elements->[-1]->[1] ne 'table' and
2705     $open_elements->[-1]->[1] ne 'html') {
2706     !!!parse-error;
2707     pop @$open_elements;
2708     }
2709    
2710     push @$active_formatting_elements, ['#marker', '']
2711     if $token->{tag_name} eq 'caption';
2712    
2713     !!!insert-element ($token->{tag_name}, $token->{attributes});
2714     $insertion_mode = {
2715     caption => 'in caption',
2716     colgroup => 'in column group',
2717     tbody => 'in table body',
2718     tfoot => 'in table body',
2719     thead => 'in table body',
2720     }->{$token->{tag_name}};
2721     !!!next-token;
2722     redo B;
2723     } elsif ({
2724     col => 1,
2725     td => 1, th => 1, tr => 1,
2726     }->{$token->{tag_name}}) {
2727     ## Clear back to table context
2728     while ($open_elements->[-1]->[1] ne 'table' and
2729     $open_elements->[-1]->[1] ne 'html') {
2730     !!!parse-error;
2731     pop @$open_elements;
2732     }
2733    
2734     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
2735     $insertion_mode = $token->{tag_name} eq 'col'
2736     ? 'in column group' : 'in table body';
2737     ## reprocess
2738     redo B;
2739     } elsif ($token->{tag_name} eq 'table') {
2740     ## NOTE: There are code clones for this "table in table"
2741     !!!parse-error;
2742    
2743     ## As if </table>
2744     ## have a table element in table scope
2745     my $i;
2746     INSCOPE: for (reverse 0..$#$open_elements) {
2747     my $node = $open_elements->[$_];
2748     if ($node->[1] eq 'table') {
2749     $i = $_;
2750     last INSCOPE;
2751     } elsif ({
2752     table => 1, html => 1,
2753     }->{$node->[1]}) {
2754     last INSCOPE;
2755     }
2756     } # INSCOPE
2757     unless (defined $i) {
2758     !!!parse-error;
2759     ## Ignore tokens </table><table>
2760     !!!next-token;
2761     redo B;
2762     }
2763    
2764     ## generate implied end tags
2765     if ({
2766     dd => 1, dt => 1, li => 1, p => 1,
2767     td => 1, th => 1, tr => 1,
2768     }->{$open_elements->[-1]->[1]}) {
2769     !!!back-token; # <table>
2770     $token = {type => 'end tag', tag_name => 'table'};
2771     !!!back-token;
2772     $token = {type => 'end tag',
2773     tag_name => $open_elements->[-1]->[1]}; # MUST
2774     redo B;
2775     }
2776    
2777     if ($open_elements->[-1]->[1] ne 'table') {
2778     !!!parse-error;
2779     }
2780    
2781     splice @$open_elements, $i;
2782    
2783     $reset_insertion_mode->();
2784    
2785     ## reprocess
2786     redo B;
2787     } else {
2788     #
2789     }
2790     } elsif ($token->{type} eq 'end tag') {
2791     if ($token->{tag_name} eq 'table') {
2792     ## have a table element in table scope
2793     my $i;
2794     INSCOPE: for (reverse 0..$#$open_elements) {
2795     my $node = $open_elements->[$_];
2796     if ($node->[1] eq $token->{tag_name}) {
2797     $i = $_;
2798     last INSCOPE;
2799     } elsif ({
2800     table => 1, html => 1,
2801     }->{$node->[1]}) {
2802     last INSCOPE;
2803     }
2804     } # INSCOPE
2805     unless (defined $i) {
2806     !!!parse-error;
2807     ## Ignore the token
2808     !!!next-token;
2809     redo B;
2810     }
2811    
2812     ## generate implied end tags
2813     if ({
2814     dd => 1, dt => 1, li => 1, p => 1,
2815     td => 1, th => 1, tr => 1,
2816     }->{$open_elements->[-1]->[1]}) {
2817     !!!back-token;
2818     $token = {type => 'end tag',
2819     tag_name => $open_elements->[-1]->[1]}; # MUST
2820     redo B;
2821     }
2822    
2823     if ($open_elements->[-1]->[1] ne 'table') {
2824     !!!parse-error;
2825     }
2826    
2827     splice @$open_elements, $i;
2828    
2829     $reset_insertion_mode->();
2830    
2831     !!!next-token;
2832     redo B;
2833     } elsif ({
2834     body => 1, caption => 1, col => 1, colgroup => 1,
2835     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
2836     thead => 1, tr => 1,
2837     }->{$token->{tag_name}}) {
2838     !!!parse-error;
2839     ## Ignore the token
2840     !!!next-token;
2841     redo B;
2842     } else {
2843     #
2844     }
2845     } else {
2846     #
2847     }
2848    
2849     ## NOTE: There are code clones of "misc in table".
2850     !!!parse-error;
2851     $in_body->(sub {
2852     my $child = shift;
2853     if ({
2854     table => 1, tbody => 1, tfoot => 1,
2855     thead => 1, tr => 1,
2856     }->{$open_elements->[-1]->[1]}) {
2857     # MUST
2858     my $foster_parent_element;
2859     my $next_sibling;
2860     OE: for (reverse 0..$#$open_elements) {
2861     if ($open_elements->[$_]->[1] eq 'table') {
2862     my $parent = $open_elements->[$_]->[0]->parent_node;
2863     if (defined $parent and $parent->node_type == 1) {
2864     $foster_parent_element = $parent;
2865     $next_sibling = $open_elements->[$_]->[0];
2866     } else {
2867     $foster_parent_element
2868     = $open_elements->[$_ - 1]->[0];
2869     }
2870     last OE;
2871     }
2872     } # OE
2873     $foster_parent_element = $open_elements->[0]->[0]
2874     unless defined $foster_parent_element;
2875     $foster_parent_element->insert_before
2876     ($child, $next_sibling);
2877     } else {
2878     $open_elements->[-1]->[0]->append_child ($child);
2879     }
2880     });
2881     redo B;
2882     } elsif ($insertion_mode eq 'in caption') {
2883     if ($token->{type} eq 'start tag') {
2884     if ({
2885     caption => 1, col => 1, colgroup => 1, tbody => 1,
2886     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2887     }->{$token->{tag_name}}) {
2888     !!!parse-error;
2889    
2890     ## As if </caption>
2891     ## have a table element in table scope
2892     my $i;
2893     INSCOPE: for (reverse 0..$#$open_elements) {
2894     my $node = $open_elements->[$_];
2895     if ($node->[1] eq 'caption') {
2896     $i = $_;
2897     last INSCOPE;
2898     } elsif ({
2899     table => 1, html => 1,
2900     }->{$node->[1]}) {
2901     last INSCOPE;
2902     }
2903     } # INSCOPE
2904     unless (defined $i) {
2905     !!!parse-error;
2906     ## Ignore the token
2907     !!!next-token;
2908     redo B;
2909     }
2910    
2911     ## generate implied end tags
2912     if ({
2913     dd => 1, dt => 1, li => 1, p => 1,
2914     td => 1, th => 1, tr => 1,
2915     }->{$open_elements->[-1]->[1]}) {
2916     !!!back-token; # <?>
2917     $token = {type => 'end tag', tag_name => 'caption'};
2918     !!!back-token;
2919     $token = {type => 'end tag',
2920     tag_name => $open_elements->[-1]->[1]}; # MUST
2921     redo B;
2922     }
2923    
2924     if ($open_elements->[-1]->[1] ne 'caption') {
2925     !!!parse-error;
2926     }
2927    
2928     splice @$open_elements, $i;
2929    
2930     $clear_up_to_marker->();
2931    
2932     $insertion_mode = 'in table';
2933    
2934     ## reprocess
2935     redo B;
2936     } else {
2937     #
2938     }
2939     } elsif ($token->{type} eq 'end tag') {
2940     if ($token->{tag_name} eq 'caption') {
2941     ## have a table element in table scope
2942     my $i;
2943     INSCOPE: for (reverse 0..$#$open_elements) {
2944     my $node = $open_elements->[$_];
2945     if ($node->[1] eq $token->{tag_name}) {
2946     $i = $_;
2947     last INSCOPE;
2948     } elsif ({
2949     table => 1, html => 1,
2950     }->{$node->[1]}) {
2951     last INSCOPE;
2952     }
2953     } # INSCOPE
2954     unless (defined $i) {
2955     !!!parse-error;
2956     ## Ignore the token
2957     !!!next-token;
2958     redo B;
2959     }
2960    
2961     ## generate implied end tags
2962     if ({
2963     dd => 1, dt => 1, li => 1, p => 1,
2964     td => 1, th => 1, tr => 1,
2965     }->{$open_elements->[-1]->[1]}) {
2966     !!!back-token;
2967     $token = {type => 'end tag',
2968     tag_name => $open_elements->[-1]->[1]}; # MUST
2969     redo B;
2970     }
2971    
2972     if ($open_elements->[-1]->[1] ne 'caption') {
2973     !!!parse-error;
2974     }
2975    
2976     splice @$open_elements, $i;
2977    
2978     $clear_up_to_marker->();
2979    
2980     $insertion_mode = 'in table';
2981    
2982     !!!next-token;
2983     redo B;
2984     } elsif ($token->{tag_name} eq 'table') {
2985     !!!parse-error;
2986    
2987     ## As if </caption>
2988     ## have a table element in table scope
2989     my $i;
2990     INSCOPE: for (reverse 0..$#$open_elements) {
2991     my $node = $open_elements->[$_];
2992     if ($node->[1] eq 'caption') {
2993     $i = $_;
2994     last INSCOPE;
2995     } elsif ({
2996     table => 1, html => 1,
2997     }->{$node->[1]}) {
2998     last INSCOPE;
2999     }
3000     } # INSCOPE
3001     unless (defined $i) {
3002     !!!parse-error;
3003     ## Ignore the token
3004     !!!next-token;
3005     redo B;
3006     }
3007    
3008     ## generate implied end tags
3009     if ({
3010     dd => 1, dt => 1, li => 1, p => 1,
3011     td => 1, th => 1, tr => 1,
3012     }->{$open_elements->[-1]->[1]}) {
3013     !!!back-token; # </table>
3014     $token = {type => 'end tag', tag_name => 'caption'};
3015     !!!back-token;
3016     $token = {type => 'end tag',
3017     tag_name => $open_elements->[-1]->[1]}; # MUST
3018     redo B;
3019     }
3020    
3021     if ($open_elements->[-1]->[1] ne 'caption') {
3022     !!!parse-error;
3023     }
3024    
3025     splice @$open_elements, $i;
3026    
3027     $clear_up_to_marker->();
3028    
3029     $insertion_mode = 'in table';
3030    
3031     ## reprocess
3032     redo B;
3033     } elsif ({
3034     body => 1, col => 1, colgroup => 1,
3035     html => 1, tbody => 1, td => 1, tfoot => 1,
3036     th => 1, thead => 1, tr => 1,
3037     }->{$token->{tag_name}}) {
3038     !!!parse-error;
3039     ## Ignore the token
3040     redo B;
3041     } else {
3042     #
3043     }
3044     } else {
3045     #
3046     }
3047    
3048     $in_body->(sub {
3049     $open_elements->[-1]->[0]->append_child (shift);
3050     });
3051     redo B;
3052     } elsif ($insertion_mode eq 'in column group') {
3053     if ($token->{type} eq 'character') {
3054     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3055     $open_elements->[-1]->[0]->manakai_append_text ($1);
3056     unless (length $token->{data}) {
3057     !!!next-token;
3058     redo B;
3059     }
3060     }
3061    
3062     #
3063     } elsif ($token->{type} eq 'comment') {
3064     my $comment = $self->{document}->create_comment ($token->{data});
3065     $open_elements->[-1]->[0]->append_child ($comment);
3066     !!!next-token;
3067     redo B;
3068     } elsif ($token->{type} eq 'start tag') {
3069     if ($token->{tag_name} eq 'col') {
3070     !!!insert-element ($token->{tag_name}, $token->{attributes});
3071     pop @$open_elements;
3072     !!!next-token;
3073     redo B;
3074     } else {
3075     #
3076     }
3077     } elsif ($token->{type} eq 'end tag') {
3078     if ($token->{tag_name} eq 'colgroup') {
3079     if ($open_elements->[-1]->[1] eq 'html') {
3080     !!!parse-error;
3081     ## Ignore the token
3082     !!!next-token;
3083     redo B;
3084     } else {
3085     pop @$open_elements; # colgroup
3086     $insertion_mode = 'in table';
3087     !!!next-token;
3088     redo B;
3089     }
3090     } elsif ($token->{tag_name} eq 'col') {
3091     !!!parse-error;
3092     ## Ignore the token
3093     !!!next-token;
3094     redo B;
3095     } else {
3096     #
3097     }
3098     } else {
3099     #
3100     }
3101    
3102     ## As if </colgroup>
3103     if ($open_elements->[-1]->[1] eq 'html') {
3104     !!!parse-error;
3105     ## Ignore the token
3106     !!!next-token;
3107     redo B;
3108     } else {
3109     pop @$open_elements; # colgroup
3110     $insertion_mode = 'in table';
3111     ## reprocess
3112     redo B;
3113     }
3114     } elsif ($insertion_mode eq 'in table body') {
3115     if ($token->{type} eq 'character') {
3116     ## Copied from 'in table'
3117     $reconstruct_active_formatting_elements->();
3118    
3119     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3120    
3121     !!!next-token;
3122     redo B;
3123     } elsif ($token->{type} eq 'comment') {
3124     ## Copied from 'in table'
3125     my $comment = $self->{document}->create_comment ($token->{data});
3126     $open_elements->[-1]->[0]->append_child ($comment);
3127     !!!next-token;
3128     redo B;
3129     } elsif ($token->{type} eq 'start tag') {
3130     if ({
3131     tr => 1,
3132     th => 1, td => 1,
3133     }->{$token->{tag_name}}) {
3134     ## Clear back to table body context
3135     while (not {
3136     tbody => 1, tfoot => 1, thead => 1, html => 1,
3137     }->{$open_elements->[-1]->[1]}) {
3138     !!!parse-error;
3139     pop @$open_elements;
3140     }
3141    
3142     $insertion_mode = 'in row';
3143     if ($token->{tag_name} eq 'tr') {
3144     !!!insert-element ($token->{tag_name}, $token->{attributes});
3145     !!!next-token;
3146     } else {
3147     !!!insert-element ('tr');
3148     ## reprocess
3149     }
3150     redo B;
3151     } elsif ({
3152     caption => 1, col => 1, colgroup => 1,
3153     tbody => 1, tfoot => 1, thead => 1,
3154     }->{$token->{tag_name}}) {
3155     ## have an element in table scope
3156     my $i;
3157     INSCOPE: for (reverse 0..$#$open_elements) {
3158     my $node = $open_elements->[$_];
3159     if ({
3160     tbody => 1, thead => 1, tfoot => 1,
3161     }->{$node->[1]}) {
3162     $i = $_;
3163     last INSCOPE;
3164     } elsif ({
3165     table => 1, html => 1,
3166     }->{$node->[1]}) {
3167     last INSCOPE;
3168     }
3169     } # INSCOPE
3170     unless (defined $i) {
3171     !!!parse-error;
3172     ## Ignore the token
3173     !!!next-token;
3174     redo B;
3175     }
3176    
3177     ## Clear back to table body context
3178     while (not {
3179     tbody => 1, tfoot => 1, thead => 1, html => 1,
3180     }->{$open_elements->[-1]->[1]}) {
3181     !!!parse-error;
3182     pop @$open_elements;
3183     }
3184    
3185     ## As if <{current node}>
3186     ## have an element in table scope
3187     ## true by definition
3188    
3189     ## Clear back to table body context
3190     ## nop by definition
3191    
3192     pop @$open_elements;
3193     $insertion_mode = 'in table';
3194     ## reprocess
3195     redo B;
3196     } elsif ($token->{tag_name} eq 'table') {
3197     ## NOTE: This is a code clone of "table in table"
3198     !!!parse-error;
3199    
3200     ## As if </table>
3201     ## have a table element in table scope
3202     my $i;
3203     INSCOPE: for (reverse 0..$#$open_elements) {
3204     my $node = $open_elements->[$_];
3205     if ($node->[1] eq 'table') {
3206     $i = $_;
3207     last INSCOPE;
3208     } elsif ({
3209     table => 1, html => 1,
3210     }->{$node->[1]}) {
3211     last INSCOPE;
3212     }
3213     } # INSCOPE
3214     unless (defined $i) {
3215     !!!parse-error;
3216     ## Ignore tokens </table><table>
3217     !!!next-token;
3218     redo B;
3219     }
3220    
3221     ## generate implied end tags
3222     if ({
3223     dd => 1, dt => 1, li => 1, p => 1,
3224     td => 1, th => 1, tr => 1,
3225     }->{$open_elements->[-1]->[1]}) {
3226     !!!back-token; # <table>
3227     $token = {type => 'end tag', tag_name => 'table'};
3228     !!!back-token;
3229     $token = {type => 'end tag',
3230     tag_name => $open_elements->[-1]->[1]}; # MUST
3231     redo B;
3232     }
3233    
3234     if ($open_elements->[-1]->[1] ne 'table') {
3235     !!!parse-error;
3236     }
3237    
3238     splice @$open_elements, $i;
3239    
3240     $reset_insertion_mode->();
3241    
3242     ## reprocess
3243     redo B;
3244     } else {
3245     #
3246     }
3247     } elsif ($token->{type} eq 'end tag') {
3248     if ({
3249     tbody => 1, tfoot => 1, thead => 1,
3250     }->{$token->{tag_name}}) {
3251     ## have an element in table scope
3252     my $i;
3253     INSCOPE: for (reverse 0..$#$open_elements) {
3254     my $node = $open_elements->[$_];
3255     if ($node->[1] eq $token->{tag_name}) {
3256     $i = $_;
3257     last INSCOPE;
3258     } elsif ({
3259     table => 1, html => 1,
3260     }->{$node->[1]}) {
3261     last INSCOPE;
3262     }
3263     } # INSCOPE
3264     unless (defined $i) {
3265     !!!parse-error;
3266     ## Ignore the token
3267     !!!next-token;
3268     redo B;
3269     }
3270    
3271     ## Clear back to table body context
3272     while (not {
3273     tbody => 1, tfoot => 1, thead => 1, html => 1,
3274     }->{$open_elements->[-1]->[1]}) {
3275     !!!parse-error;
3276     pop @$open_elements;
3277     }
3278    
3279     pop @$open_elements;
3280     $insertion_mode = 'in table';
3281     !!!next-token;
3282     redo B;
3283     } elsif ($token->{tag_name} eq 'table') {
3284     ## have an element in table scope
3285     my $i;
3286     INSCOPE: for (reverse 0..$#$open_elements) {
3287     my $node = $open_elements->[$_];
3288     if ({
3289     tbody => 1, thead => 1, tfoot => 1,
3290     }->{$node->[1]}) {
3291     $i = $_;
3292     last INSCOPE;
3293     } elsif ({
3294     table => 1, html => 1,
3295     }->{$node->[1]}) {
3296     last INSCOPE;
3297     }
3298     } # INSCOPE
3299     unless (defined $i) {
3300     !!!parse-error;
3301     ## Ignore the token
3302     !!!next-token;
3303     redo B;
3304     }
3305    
3306     ## Clear back to table body context
3307     while (not {
3308     tbody => 1, tfoot => 1, thead => 1, html => 1,
3309     }->{$open_elements->[-1]->[1]}) {
3310     !!!parse-error;
3311     pop @$open_elements;
3312     }
3313    
3314     ## As if <{current node}>
3315     ## have an element in table scope
3316     ## true by definition
3317    
3318     ## Clear back to table body context
3319     ## nop by definition
3320    
3321     pop @$open_elements;
3322     $insertion_mode = 'in table';
3323     ## reprocess
3324     redo B;
3325     } elsif ({
3326     body => 1, caption => 1, col => 1, colgroup => 1,
3327     html => 1, td => 1, th => 1, tr => 1,
3328     }->{$token->{tag_name}}) {
3329     !!!parse-error;
3330     ## Ignore the token
3331     !!!next-token;
3332     redo B;
3333     } else {
3334     #
3335     }
3336     } else {
3337     #
3338     }
3339    
3340     ## As if in table
3341     ## NOTE: This is a code clone of "misc in table".
3342     !!!parse-error;
3343     $in_body->(sub {
3344     my $child = shift;
3345     if ({
3346     table => 1, tbody => 1, tfoot => 1,
3347     thead => 1, tr => 1,
3348     }->{$open_elements->[-1]->[1]}) {
3349     # MUST
3350     my $foster_parent_element;
3351     my $next_sibling;
3352     OE: for (reverse 0..$#$open_elements) {
3353     if ($open_elements->[$_]->[1] eq 'table') {
3354     my $parent = $open_elements->[$_]->[0]->parent_node;
3355     if (defined $parent and $parent->node_type == 1) {
3356     $foster_parent_element = $parent;
3357     $next_sibling = $open_elements->[$_]->[0];
3358     } else {
3359     $foster_parent_element
3360     = $open_elements->[$_ - 1]->[0];
3361     }
3362     last OE;
3363     }
3364     } # OE
3365     $foster_parent_element = $open_elements->[0]->[0]
3366     unless defined $foster_parent_element;
3367     $foster_parent_element->insert_before
3368     ($child, $next_sibling);
3369     } else {
3370     $open_elements->[-1]->[0]->append_child ($child);
3371     }
3372     });
3373     redo B;
3374     } elsif ($insertion_mode eq 'in row') {
3375     if ($token->{type} eq 'character') {
3376     ## Copied from 'in table'
3377     $reconstruct_active_formatting_elements->();
3378    
3379     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3380    
3381     !!!next-token;
3382     redo B;
3383     } elsif ($token->{type} eq 'comment') {
3384     ## Copied from 'in table'
3385     my $comment = $self->{document}->create_comment ($token->{data});
3386     $open_elements->[-1]->[0]->append_child ($comment);
3387     !!!next-token;
3388     redo B;
3389     } elsif ($token->{type} eq 'start tag') {
3390     if ($token->{tag_name} eq 'th' or
3391     $token->{tag_name} eq 'td') {
3392     ## Clear back to table row context
3393     while (not {
3394     th => 1, td => 1, html => 1,
3395     }->{$open_elements->[-1]->[1]}) {
3396     !!!parse-error;
3397     pop @$open_elements;
3398     }
3399    
3400     !!!insert-element ($token->{tag_name}, $token->{attributes});
3401     $insertion_mode = 'in cell';
3402    
3403     push @$active_formatting_elements, ['#marker', ''];
3404    
3405     !!!next-token;
3406     redo B;
3407     } elsif ({
3408     caption => 1, col => 1, colgroup => 1,
3409     tbody => 1, tfoot => 1, thead => 1, tr => 1,
3410     }->{$token->{tag_name}}) {
3411     ## As if </tr>
3412     ## have an element in table scope
3413     my $i;
3414     INSCOPE: for (reverse 0..$#$open_elements) {
3415     my $node = $open_elements->[$_];
3416     if ($node->[1] eq 'tr') {
3417     $i = $_;
3418     last INSCOPE;
3419     } elsif ({
3420     table => 1, html => 1,
3421     }->{$node->[1]}) {
3422     last INSCOPE;
3423     }
3424     } # INSCOPE
3425     unless (defined $i) {
3426     !!!parse-error;
3427     ## Ignore the token
3428     !!!next-token;
3429     redo B;
3430     }
3431    
3432     ## Clear back to table row context
3433     while (not {
3434     tr => 1, html => 1,
3435     }->{$open_elements->[-1]->[1]}) {
3436     !!!parse-error;
3437     pop @$open_elements;
3438     }
3439    
3440     pop @$open_elements; # tr
3441     $insertion_mode = 'in table body';
3442     ## reprocess
3443     redo B;
3444     } elsif ($token->{tag_name} eq 'table') {
3445     ## NOTE: This is a code clone of "table in table"
3446     !!!parse-error;
3447    
3448     ## As if </table>
3449     ## have a table element in table scope
3450     my $i;
3451     INSCOPE: for (reverse 0..$#$open_elements) {
3452     my $node = $open_elements->[$_];
3453     if ($node->[1] eq 'table') {
3454     $i = $_;
3455     last INSCOPE;
3456     } elsif ({
3457     table => 1, html => 1,
3458     }->{$node->[1]}) {
3459     last INSCOPE;
3460     }
3461     } # INSCOPE
3462     unless (defined $i) {
3463     !!!parse-error;
3464     ## Ignore tokens </table><table>
3465     !!!next-token;
3466     redo B;
3467     }
3468    
3469     ## generate implied end tags
3470     if ({
3471     dd => 1, dt => 1, li => 1, p => 1,
3472     td => 1, th => 1, tr => 1,
3473     }->{$open_elements->[-1]->[1]}) {
3474     !!!back-token; # <table>
3475     $token = {type => 'end tag', tag_name => 'table'};
3476     !!!back-token;
3477     $token = {type => 'end tag',
3478     tag_name => $open_elements->[-1]->[1]}; # MUST
3479     redo B;
3480     }
3481    
3482     if ($open_elements->[-1]->[1] ne 'table') {
3483     !!!parse-error;
3484     }
3485    
3486     splice @$open_elements, $i;
3487    
3488     $reset_insertion_mode->();
3489    
3490     ## reprocess
3491     redo B;
3492     } else {
3493     #
3494     }
3495     } elsif ($token->{type} eq 'end tag') {
3496     if ($token->{tag_name} eq 'tr') {
3497     ## have an element in table scope
3498     my $i;
3499     INSCOPE: for (reverse 0..$#$open_elements) {
3500     my $node = $open_elements->[$_];
3501     if ($node->[1] eq $token->{tag_name}) {
3502     $i = $_;
3503     last INSCOPE;
3504     } elsif ({
3505     table => 1, html => 1,
3506     }->{$node->[1]}) {
3507     last INSCOPE;
3508     }
3509     } # INSCOPE
3510     unless (defined $i) {
3511     !!!parse-error;
3512     ## Ignore the token
3513     !!!next-token;
3514     redo B;
3515     }
3516    
3517     ## Clear back to table row context
3518     while (not {
3519     tr => 1, html => 1,
3520     }->{$open_elements->[-1]->[1]}) {
3521     !!!parse-error;
3522     pop @$open_elements;
3523     }
3524    
3525     pop @$open_elements; # tr
3526     $insertion_mode = 'in table body';
3527     !!!next-token;
3528     redo B;
3529     } elsif ($token->{tag_name} eq 'table') {
3530     ## As if </tr>
3531     ## have an element in table scope
3532     my $i;
3533     INSCOPE: for (reverse 0..$#$open_elements) {
3534     my $node = $open_elements->[$_];
3535     if ($node->[1] eq 'tr') {
3536     $i = $_;
3537     last INSCOPE;
3538     } elsif ({
3539     table => 1, html => 1,
3540     }->{$node->[1]}) {
3541     last INSCOPE;
3542     }
3543     } # INSCOPE
3544     unless (defined $i) {
3545     !!!parse-error;
3546     ## Ignore the token
3547     !!!next-token;
3548     redo B;
3549     }
3550    
3551     ## Clear back to table row context
3552     while (not {
3553     tr => 1, html => 1,
3554     }->{$open_elements->[-1]->[1]}) {
3555     !!!parse-error;
3556     pop @$open_elements;
3557     }
3558    
3559     pop @$open_elements; # tr
3560     $insertion_mode = 'in table body';
3561     ## reprocess
3562     redo B;
3563     } elsif ({
3564     tbody => 1, tfoot => 1, thead => 1,
3565     }->{$token->{tag_name}}) {
3566     ## have an element in table scope
3567     my $i;
3568     INSCOPE: for (reverse 0..$#$open_elements) {
3569     my $node = $open_elements->[$_];
3570     if ($node->[1] eq $token->{tag_name}) {
3571     $i = $_;
3572     last INSCOPE;
3573     } elsif ({
3574     table => 1, html => 1,
3575     }->{$node->[1]}) {
3576     last INSCOPE;
3577     }
3578     } # INSCOPE
3579     unless (defined $i) {
3580     !!!parse-error;
3581     ## Ignore the token
3582     !!!next-token;
3583     redo B;
3584     }
3585    
3586     ## As if </tr>
3587     ## have an element in table scope
3588     my $i;
3589     INSCOPE: for (reverse 0..$#$open_elements) {
3590     my $node = $open_elements->[$_];
3591     if ($node->[1] eq 'tr') {
3592     $i = $_;
3593     last INSCOPE;
3594     } elsif ({
3595     table => 1, html => 1,
3596     }->{$node->[1]}) {
3597     last INSCOPE;
3598     }
3599     } # INSCOPE
3600     unless (defined $i) {
3601     !!!parse-error;
3602     ## Ignore the token
3603     !!!next-token;
3604     redo B;
3605     }
3606    
3607     ## Clear back to table row context
3608     while (not {
3609     tr => 1, html => 1,
3610     }->{$open_elements->[-1]->[1]}) {
3611     !!!parse-error;
3612     pop @$open_elements;
3613     }
3614    
3615     pop @$open_elements; # tr
3616     $insertion_mode = 'in table body';
3617     ## reprocess
3618     redo B;
3619     } elsif ({
3620     body => 1, caption => 1, col => 1,
3621     colgroup => 1, html => 1, td => 1, th => 1,
3622     }->{$token->{tag_name}}) {
3623     !!!parse-error;
3624     ## Ignore the token
3625     !!!next-token;
3626     redo B;
3627     } else {
3628     #
3629     }
3630     } else {
3631     #
3632     }
3633    
3634     ## As if in table
3635     ## NOTE: This is a code clone of "misc in table".
3636     !!!parse-error;
3637     $in_body->(sub {
3638     my $child = shift;
3639     if ({
3640     table => 1, tbody => 1, tfoot => 1,
3641     thead => 1, tr => 1,
3642     }->{$open_elements->[-1]->[1]}) {
3643     # MUST
3644     my $foster_parent_element;
3645     my $next_sibling;
3646     OE: for (reverse 0..$#$open_elements) {
3647     if ($open_elements->[$_]->[1] eq 'table') {
3648     my $parent = $open_elements->[$_]->[0]->parent_node;
3649     if (defined $parent and $parent->node_type == 1) {
3650     $foster_parent_element = $parent;
3651     $next_sibling = $open_elements->[$_]->[0];
3652     } else {
3653     $foster_parent_element
3654     = $open_elements->[$_ - 1]->[0];
3655     }
3656     last OE;
3657     }
3658     } # OE
3659     $foster_parent_element = $open_elements->[0]->[0]
3660     unless defined $foster_parent_element;
3661     $foster_parent_element->insert_before
3662     ($child, $next_sibling);
3663     } else {
3664     $open_elements->[-1]->[0]->append_child ($child);
3665     }
3666     });
3667     redo B;
3668     } elsif ($insertion_mode eq 'in cell') {
3669     if ($token->{type} eq 'character') {
3670     ## NOTE: This is a code clone of "character in body".
3671     $reconstruct_active_formatting_elements->();
3672    
3673     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3674    
3675     !!!next-token;
3676     redo B;
3677     } elsif ($token->{type} eq 'comment') {
3678     ## NOTE: This is a code clone of "comment in body".
3679     my $comment = $self->{document}->create_comment ($token->{data});
3680     $open_elements->[-1]->[0]->append_child ($comment);
3681     !!!next-token;
3682     redo B;
3683     } elsif ($token->{type} eq 'start tag') {
3684     if ({
3685     caption => 1, col => 1, colgroup => 1,
3686     tbody => 1, td => 1, tfoot => 1, th => 1,
3687     thead => 1, tr => 1,
3688     }->{$token->{tag_name}}) {
3689     ## have an element in table scope
3690     my $tn;
3691     INSCOPE: for (reverse 0..$#$open_elements) {
3692     my $node = $open_elements->[$_];
3693     if ($node->[1] eq 'td' or $node->[1] eq 'th') {
3694     $tn = $node->[1];
3695     last INSCOPE;
3696     } elsif ({
3697     table => 1, html => 1,
3698     }->{$node->[1]}) {
3699     last INSCOPE;
3700     }
3701     } # INSCOPE
3702     unless (defined $tn) {
3703     !!!parse-error;
3704     ## Ignore the token
3705     !!!next-token;
3706     redo B;
3707     }
3708    
3709     ## Close the cell
3710     !!!back-token; # <?>
3711     $token = {type => 'end tag', tag_name => $tn};
3712     redo B;
3713     } else {
3714     #
3715     }
3716     } elsif ($token->{type} eq 'end tag') {
3717     if ($token->{type} eq 'td' or $token->{type} eq 'th') {
3718     ## have an element in table scope
3719     my $i;
3720     INSCOPE: for (reverse 0..$#$open_elements) {
3721     my $node = $open_elements->[$_];
3722     if ($node->[1] eq $token->{tag_name}) {
3723     $i = $_;
3724     last INSCOPE;
3725     } elsif ({
3726     table => 1, html => 1,
3727     }->{$node->[1]}) {
3728     last INSCOPE;
3729     }
3730     } # INSCOPE
3731     unless (defined $i) {
3732     !!!parse-error;
3733     ## Ignore the token
3734     !!!next-token;
3735     redo B;
3736     }
3737    
3738     ## generate implied end tags
3739     if ({
3740     dd => 1, dt => 1, li => 1, p => 1,
3741     td => ($token->{tag_name} eq 'th'),
3742     th => ($token->{tag_name} eq 'td'),
3743     tr => 1,
3744     }->{$open_elements->[-1]->[1]}) {
3745     !!!back-token;
3746     $token = {type => 'end tag',
3747     tag_name => $open_elements->[-1]->[1]}; # MUST
3748     redo B;
3749     }
3750    
3751     if ($open_elements->[-1]->[1] ne $token->{tag_name}) {
3752     !!!parse-error;
3753     }
3754    
3755     splice @$open_elements, $i;
3756    
3757     $clear_up_to_marker->();
3758    
3759     $insertion_mode = 'in row';
3760    
3761     !!!next-token;
3762     redo B;
3763     } elsif ({
3764     body => 1, caption => 1, col => 1,
3765     colgroup => 1, html => 1,
3766     }->{$token->{tag_name}}) {
3767     !!!parse-error;
3768     ## Ignore the token
3769     !!!next-token;
3770     redo B;
3771     } elsif ({
3772     table => 1, tbody => 1, tfoot => 1,
3773     thead => 1, tr => 1,
3774     }->{$token->{tag_name}}) {
3775     ## have an element in table scope
3776     my $i;
3777     my $tn;
3778     INSCOPE: for (reverse 0..$#$open_elements) {
3779     my $node = $open_elements->[$_];
3780     if ($node->[1] eq $token->{tag_name}) {
3781     $i = $_;
3782     $tn = $node->[1];
3783     last INSCOPE;
3784     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
3785     $tn = $node->[1];
3786     ## NOTE: There is exactly one |td| or |th| element
3787     ## in scope in the stack of open elements by definition.
3788     } elsif ({
3789     table => 1, html => 1,
3790     }->{$node->[1]}) {
3791     last INSCOPE;
3792     }
3793     } # INSCOPE
3794     unless (defined $i) {
3795     !!!parse-error;
3796     ## Ignore the token
3797     !!!next-token;
3798     redo B;
3799     }
3800    
3801     ## Close the cell
3802     !!!back-token; # </?>
3803     $token = {type => 'end tag', tag_name => $tn};
3804     redo B;
3805     } else {
3806     #
3807     }
3808     } else {
3809     #
3810     }
3811    
3812     $in_body->(sub {
3813     $open_elements->[-1]->[0]->append_child (shift);
3814     });
3815     redo B;
3816     } elsif ($insertion_mode eq 'in select') {
3817     if ($token->{type} eq 'character') {
3818     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3819     !!!next-token;
3820     redo B;
3821     } elsif ($token->{type} eq 'comment') {
3822     my $comment = $self->{document}->create_comment ($token->{data});
3823     $open_elements->[-1]->[0]->append_child ($comment);
3824     !!!next-token;
3825     redo B;
3826     } elsif ($token->{type} eq 'start tag') {
3827     if ($token->{tag_name} eq 'option') {
3828     if ($open_elements->[-1]->[1] eq 'option') {
3829     ## As if </option>
3830     pop @$open_elements;
3831     }
3832    
3833     !!!insert-element ($token->{tag_name}, $token->{attributes});
3834     !!!next-token;
3835     redo B;
3836     } elsif ($token->{tag_name} eq 'optgroup') {
3837     if ($open_elements->[-1]->[1] eq 'option') {
3838     ## As if </option>
3839     pop @$open_elements;
3840     }
3841    
3842     if ($open_elements->[-1]->[1] eq 'optgroup') {
3843     ## As if </optgroup>
3844     pop @$open_elements;
3845     }
3846    
3847     !!!insert-element ($token->{tag_name}, $token->{attributes});
3848     !!!next-token;
3849     redo B;
3850     } elsif ($token->{tag_name} eq 'select') {
3851     !!!parse-error;
3852     ## As if </select> instead
3853     ## have an element in table scope
3854     my $i;
3855     INSCOPE: for (reverse 0..$#$open_elements) {
3856     my $node = $open_elements->[$_];
3857     if ($node->[1] eq $token->{tag_name}) {
3858     $i = $_;
3859     last INSCOPE;
3860     } elsif ({
3861     table => 1, html => 1,
3862     }->{$node->[1]}) {
3863     last INSCOPE;
3864     }
3865     } # INSCOPE
3866     unless (defined $i) {
3867     !!!parse-error;
3868     ## Ignore the token
3869     !!!next-token;
3870     redo B;
3871     }
3872    
3873     splice @$open_elements, $i;
3874    
3875     $reset_insertion_mode->();
3876    
3877     !!!next-token;
3878     redo B;
3879     } else {
3880     #
3881     }
3882     } elsif ($token->{type} eq 'end tag') {
3883     if ($token->{tag_name} eq 'optgroup') {
3884     if ($open_elements->[-1]->[1] eq 'option' and
3885     $open_elements->[-2]->[1] eq 'optgroup') {
3886     ## As if </option>
3887     splice @$open_elements, -2;
3888     } elsif ($open_elements->[-1]->[1] eq 'optgroup') {
3889     pop @$open_elements;
3890     } else {
3891     !!!parse-error;
3892     ## Ignore the token
3893     }
3894     !!!next-token;
3895     redo B;
3896     } elsif ($token->{tag_name} eq 'option') {
3897     if ($open_elements->[-1]->[1] eq 'option') {
3898     pop @$open_elements;
3899     } else {
3900     !!!parse-error;
3901     ## Ignore the token
3902     }
3903     !!!next-token;
3904     redo B;
3905     } elsif ($token->{tag_name} eq 'select') {
3906     ## have an element in table scope
3907     my $i;
3908     INSCOPE: for (reverse 0..$#$open_elements) {
3909     my $node = $open_elements->[$_];
3910     if ($node->[1] eq $token->{tag_name}) {
3911     $i = $_;
3912     last INSCOPE;
3913     } elsif ({
3914     table => 1, html => 1,
3915     }->{$node->[1]}) {
3916     last INSCOPE;
3917     }
3918     } # INSCOPE
3919     unless (defined $i) {
3920     !!!parse-error;
3921     ## Ignore the token
3922     !!!next-token;
3923     redo B;
3924     }
3925    
3926     splice @$open_elements, $i;
3927    
3928     $reset_insertion_mode->();
3929    
3930     !!!next-token;
3931     redo B;
3932     } elsif ({
3933     caption => 1, table => 1, tbody => 1,
3934     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
3935     }->{$token->{tag_name}}) {
3936     !!!parse-error;
3937    
3938     ## have an element in table scope
3939     my $i;
3940     INSCOPE: for (reverse 0..$#$open_elements) {
3941     my $node = $open_elements->[$_];
3942     if ($node->[1] eq $token->{tag_name}) {
3943     $i = $_;
3944     last INSCOPE;
3945     } elsif ({
3946     table => 1, html => 1,
3947     }->{$node->[1]}) {
3948     last INSCOPE;
3949     }
3950     } # INSCOPE
3951     unless (defined $i) {
3952     ## Ignore the token
3953     !!!next-token;
3954     redo B;
3955     }
3956    
3957     ## As if </select>
3958     ## have an element in table scope
3959     undef $i;
3960     INSCOPE: for (reverse 0..$#$open_elements) {
3961     my $node = $open_elements->[$_];
3962     if ($node->[1] eq 'select') {
3963     $i = $_;
3964     last INSCOPE;
3965     } elsif ({
3966     table => 1, html => 1,
3967     }->{$node->[1]}) {
3968     last INSCOPE;
3969     }
3970     } # INSCOPE
3971     unless (defined $i) {
3972     !!!parse-error;
3973     ## Ignore the </select> token
3974     !!!next-token; ## TODO: ok?
3975     redo B;
3976     }
3977    
3978     splice @$open_elements, $i;
3979    
3980     $reset_insertion_mode->();
3981    
3982     ## reprocess
3983     redo B;
3984     } else {
3985     #
3986     }
3987     } else {
3988     #
3989     }
3990    
3991     !!!parse-error;
3992     ## Ignore the token
3993     redo B;
3994     } elsif ($insertion_mode eq 'after body') {
3995     if ($token->{type} eq 'character') {
3996     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3997     ## As if in body
3998     $reconstruct_active_formatting_elements->();
3999    
4000     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4001    
4002     unless (length $token->{data}) {
4003     !!!next-token;
4004     redo B;
4005     }
4006     }
4007    
4008     #
4009     } elsif ($token->{type} eq 'comment') {
4010     my $comment = $self->{document}->create_comment ($token->{data});
4011     $open_elements->[0]->[0]->append_child ($comment);
4012     !!!next-token;
4013     redo B;
4014     } elsif ($token->{type} eq 'end tag') {
4015     if ($token->{type} eq 'html') {
4016     ## TODO: if inner_html, parse-error, ignore the token; otherwise,
4017    
4018     $phase = 'trailing end';
4019     !!!next-token;
4020     redo B;
4021     } else {
4022     #
4023     }
4024     } else {
4025     #
4026     }
4027    
4028     !!!parse-error;
4029     $insertion_mode = 'in body';
4030     ## reprocess
4031     redo B;
4032     } elsif ($insertion_mode eq 'in frameset') {
4033     if ($token->{type} eq 'character') {
4034     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4035     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4036    
4037     unless (length $token->{data}) {
4038     !!!next-token;
4039     redo B;
4040     }
4041     }
4042    
4043     #
4044     } elsif ($token->{type} eq 'comment') {
4045     my $comment = $self->{document}->create_comment ($token->{data});
4046     $open_elements->[-1]->[0]->append_child ($comment);
4047     !!!next-token;
4048     redo B;
4049     } elsif ($token->{type} eq 'start tag') {
4050     if ($token->{tag_name} eq 'frameset') {
4051     !!!insert-element ($token->{tag_name}, $token->{attributes});
4052     !!!next-token;
4053     redo B;
4054     } elsif ($token->{tag_name} eq 'frame') {
4055     !!!insert-element ($token->{tag_name}, $token->{attributes});
4056     pop @$open_elements;
4057     !!!next-token;
4058     redo B;
4059     } elsif ($token->{tag_name} eq 'noframes') {
4060     $in_body->(sub {
4061     $open_elements->[-1]->[0]->append_child (shift);
4062     });
4063     redo B;
4064     } else {
4065     #
4066     }
4067     } elsif ($token->{type} eq 'end tag') {
4068     if ($token->{tag_name} eq 'frameset') {
4069     if ($open_elements->[-1]->[1] eq 'html' and
4070     @$open_elements == 1) {
4071     !!!parse-error;
4072     ## Ignore the token
4073     !!!next-token;
4074     } else {
4075     pop @$open_elements;
4076     !!!next-token;
4077     }
4078    
4079     ## if not inner_html and
4080     if ($open_elements->[-1]->[1] ne 'frameset') {
4081     $insertion_mode = 'after frameset';
4082     }
4083     redo B;
4084     } else {
4085     #
4086     }
4087     } else {
4088     #
4089     }
4090    
4091     !!!parse-error;
4092     ## Ignore the token
4093     !!!next-token;
4094     redo B;
4095     } elsif ($insertion_mode eq 'after frameset') {
4096     if ($token->{type} eq 'character') {
4097     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4098     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4099    
4100     unless (length $token->{data}) {
4101     !!!next-token;
4102     redo B;
4103     }
4104     }
4105    
4106     #
4107     } elsif ($token->{type} eq 'comment') {
4108     my $comment = $self->{document}->create_comment ($token->{data});
4109     $open_elements->[-1]->[0]->append_child ($comment);
4110     !!!next-token;
4111     redo B;
4112     } elsif ($token->{type} eq 'start tag') {
4113     if ($token->{tag_name} eq 'noframes') {
4114     $in_body->(sub {
4115     $open_elements->[-1]->[0]->append_child (shift);
4116     });
4117     redo B;
4118     } else {
4119     #
4120     }
4121     } elsif ($token->{type} eq 'end tag') {
4122     if ($token->{tag_name} eq 'html') {
4123     $phase = 'trailing end';
4124     !!!next-token;
4125     redo B;
4126     } else {
4127     #
4128     }
4129     } else {
4130     #
4131     }
4132    
4133     !!!parse-error;
4134     ## Ignore the token
4135     !!!next-token;
4136     redo B;
4137    
4138     ## ISSUE: An issue in spec there
4139     } else {
4140     die "$0: $insertion_mode: Unknown insertion mode";
4141     }
4142     }
4143     } elsif ($phase eq 'trailing end') {
4144     ## states in the main stage is preserved yet # MUST
4145    
4146     if ($token->{type} eq 'DOCTYPE') {
4147     !!!parse-error;
4148     ## Ignore the token
4149     !!!next-token;
4150     redo B;
4151     } elsif ($token->{type} eq 'comment') {
4152     my $comment = $self->{document}->create_comment ($token->{data});
4153     $self->{document}->append_child ($comment);
4154     !!!next-token;
4155     redo B;
4156     } elsif ($token->{type} eq 'character') {
4157     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4158     ## As if in the main phase.
4159     ## NOTE: The insertion mode in the main phase
4160     ## just before the phase has been changed to the trailing
4161     ## end phase is either "after body" or "after frameset".
4162     $reconstruct_active_formatting_elements->()
4163     if $phase eq 'main';
4164    
4165     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4166    
4167     unless (length $token->{data}) {
4168     !!!next-token;
4169     redo B;
4170     }
4171     }
4172    
4173     !!!parse-error;
4174     $phase = 'main';
4175     ## reprocess
4176     redo B;
4177     } elsif ($token->{type} eq 'start tag' or
4178     $token->{type} eq 'end tag') {
4179     !!!parse-error;
4180     $phase = 'main';
4181     ## reprocess
4182     redo B;
4183     } elsif ($token->{type} eq 'end-of-file') {
4184     ## Stop parsing
4185     last B;
4186     } else {
4187     die "$0: $token->{type}: Unknown token";
4188     }
4189     }
4190     } # B
4191    
4192     ## Stop parsing # MUST
4193    
4194     ## TODO: script stuffs
4195     } # _construct_tree
4196    
4197     sub inner_html ($$$) {
4198     my ($class, $node, $on_error) = @_;
4199    
4200     ## Step 1
4201     my $s = '';
4202    
4203     my $in_cdata;
4204     my $parent = $node;
4205     while (defined $parent) {
4206     if ($parent->node_type == 1 and
4207     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
4208     {
4209     style => 1, script => 1, xmp => 1, iframe => 1,
4210     noembed => 1, noframes => 1, noscript => 1,
4211     }->{$parent->local_name}) { ## TODO: case thingy
4212     $in_cdata = 1;
4213     }
4214     $parent = $parent->parent_node;
4215     }
4216    
4217     ## Step 2
4218     my @node = @{$node->child_nodes};
4219     C: while (@node) {
4220     my $child = shift @node;
4221     unless (ref $child) {
4222     if ($child eq 'cdata-out') {
4223     $in_cdata = 0;
4224     } else {
4225     $s .= $child; # end tag
4226     }
4227     next C;
4228     }
4229    
4230     my $nt = $child->node_type;
4231     if ($nt == 1) { # Element
4232     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
4233     $s .= '<' . $tag_name;
4234    
4235     ## ISSUE: Non-html elements
4236    
4237     my @attrs = @{$child->attributes}; # sort order MUST be stable
4238     for my $attr (@attrs) { # order is implementation dependent
4239     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
4240     $s .= ' ' . $attr_name . '="';
4241     my $attr_value = $attr->value;
4242     ## escape
4243     $attr_value =~ s/&/&amp;/g;
4244     $attr_value =~ s/</&lt;/g;
4245     $attr_value =~ s/>/&gt;/g;
4246     $attr_value =~ s/"/&quot;/g;
4247     $s .= $attr_value . '"';
4248     }
4249     $s .= '>';
4250    
4251     next C if {
4252     area => 1, base => 1, basefont => 1, bgsound => 1,
4253     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
4254     img => 1, input => 1, link => 1, meta => 1, param => 1,
4255     spacer => 1, wbr => 1,
4256     }->{$tag_name};
4257    
4258     if (not $in_cdata and {
4259     style => 1, script => 1, xmp => 1, iframe => 1,
4260     noembed => 1, noframes => 1, noscript => 1,
4261     }->{$tag_name}) {
4262     unshift @node, 'cdata-out';
4263     $in_cdata = 1;
4264     }
4265    
4266     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
4267     } elsif ($nt == 3 or $nt == 4) {
4268     if ($in_cdata) {
4269     $s .= $child->data;
4270     } else {
4271     my $value = $child->data;
4272     $value =~ s/&/&amp;/g;
4273     $value =~ s/</&lt;/g;
4274     $value =~ s/>/&gt;/g;
4275     $value =~ s/"/&quot;/g;
4276     $s .= $value;
4277     }
4278     } elsif ($nt == 8) {
4279     $s .= '<!--' . $child->data . '-->';
4280     } elsif ($nt == 10) {
4281     $s .= '<!DOCTYPE ' . $child->name . '>';
4282     } elsif ($nt == 5) { # entrefs
4283     push @node, @{$child->child_nodes};
4284     } else {
4285     $on_error->($child);
4286     }
4287     } # C
4288    
4289     ## Step 3
4290     return \$s;
4291     } # inner_html
4292    
4293 wakaba 1.1 1;
4294 wakaba 1.2 # $Date: 2007/04/28 14:29:01 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24