/[suikacvs]/markup/html/whatpm/What/HTML.pm.src
Suika

Contents of /markup/html/whatpm/What/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations) (download) (as text)
Mon Apr 30 11:45:24 2007 UTC (17 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.3: +30 -29 lines
File MIME type: application/x-wais-source
++ whatpm/What/ChangeLog	30 Apr 2007 11:40:16 -0000
	* HTML.pm.src: Tokenizer's handling on hexadecimal
	numeric entities are rewritten.

2007-04-30  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/t/ChangeLog	30 Apr 2007 11:45:21 -0000
	* HTML-tokenizer.t: |Data::Dumper::Useqq| is replaced
	by customized ones so that utf8 vs byte string handling
	cause no error.  Set |$JSON::UTF8| flag on so
	that the |JSON| module turns the utf8 flag on.
	Support for the |contentModelFlags| and |lastStartTag|
	test props.  Show the input as well as description
	when a test fails.

2007-04-30  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package What::HTML;
2     use strict;
3 wakaba 1.4 our $VERSION=do{my @r=(q$Revision: 1.3 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is a very, very early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21 wakaba 1.2 my $special_category = {
22     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
23     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
24     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
25     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
26     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
27     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
28     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
29     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
30     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
31     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
32     };
33     my $scoping_category = {
34     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
35     table => 1, td => 1, th => 1,
36     };
37     my $formatting_category = {
38     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
39     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
40     };
41     # $phrasing_category: all other elements
42    
43 wakaba 1.1 sub new ($) {
44     my $class = shift;
45     my $self = bless {}, $class;
46     $self->{set_next_input_character} = sub {
47     $self->{next_input_character} = -1;
48     };
49     $self->{parse_error} = sub {
50     #
51     };
52     return $self;
53     } # new
54    
55     ## Implementations MUST act as if state machine in the spec
56    
57     sub _initialize_tokenizer ($) {
58     my $self = shift;
59     $self->{state} = 'data'; # MUST
60     $self->{content_model_flag} = 'PCDATA'; # be
61     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
62     undef $self->{current_attribute};
63     undef $self->{last_emitted_start_tag_name};
64     undef $self->{last_attribute_value_state};
65     $self->{char} = [];
66     # $self->{next_input_character}
67     !!!next-input-character;
68     $self->{token} = [];
69     } # _initialize_tokenizer
70    
71     ## A token has:
72     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
73     ## 'character', or 'end-of-file'
74     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
75     ## ISSUE: the spec need s/tagname/tag name/
76     ## ->{error} == 1 or 0 (DOCTYPE)
77     ## ->{attributes} isa HASH (start tag, end tag)
78     ## ->{data} (comment, character)
79    
80     ## Macros
81     ## Macros MUST be preceded by three EXCLAMATION MARKs.
82     ## emit ($token)
83     ## Emits the specified token.
84    
85     ## Emitted token MUST immediately be handled by the tree construction state.
86    
87     ## Before each step, UA MAY check to see if either one of the scripts in
88     ## "list of scripts that will execute as soon as possible" or the first
89     ## script in the "list of scripts that will execute asynchronously",
90     ## has completed loading. If one has, then it MUST be executed
91     ## and removed from the list.
92    
93     sub _get_next_token ($) {
94     my $self = shift;
95     if (@{$self->{token}}) {
96     return shift @{$self->{token}};
97     }
98    
99     A: {
100     if ($self->{state} eq 'data') {
101     if ($self->{next_input_character} == 0x0026) { # &
102     if ($self->{content_model_flag} eq 'PCDATA' or
103     $self->{content_model_flag} eq 'RCDATA') {
104     $self->{state} = 'entity data';
105     !!!next-input-character;
106     redo A;
107     } else {
108     #
109     }
110     } elsif ($self->{next_input_character} == 0x003C) { # <
111     if ($self->{content_model_flag} ne 'PLAINTEXT') {
112     $self->{state} = 'tag open';
113     !!!next-input-character;
114     redo A;
115     } else {
116     #
117     }
118     } elsif ($self->{next_input_character} == -1) {
119     !!!emit ({type => 'end-of-file'});
120     last A; ## TODO: ok?
121     }
122     # Anything else
123     my $token = {type => 'character',
124     data => chr $self->{next_input_character}};
125     ## Stay in the data state
126     !!!next-input-character;
127    
128     !!!emit ($token);
129    
130     redo A;
131     } elsif ($self->{state} eq 'entity data') {
132     ## (cannot happen in CDATA state)
133    
134     my $token = $self->_tokenize_attempt_to_consume_an_entity;
135    
136     $self->{state} = 'data';
137     # next-input-character is already done
138    
139     unless (defined $token) {
140     !!!emit ({type => 'character', data => '&'});
141     } else {
142     !!!emit ($token);
143     }
144    
145     redo A;
146     } elsif ($self->{state} eq 'tag open') {
147     if ($self->{content_model_flag} eq 'RCDATA' or
148     $self->{content_model_flag} eq 'CDATA') {
149     if ($self->{next_input_character} == 0x002F) { # /
150     !!!next-input-character;
151     $self->{state} = 'close tag open';
152     redo A;
153     } else {
154     ## reconsume
155     $self->{state} = 'data';
156    
157     !!!emit (type => 'character', data => {'/'});
158    
159     redo A;
160     }
161     } elsif ($self->{content_model_flag} eq 'PCDATA') {
162     if ($self->{next_input_character} == 0x0021) { # !
163     $self->{state} = 'markup declaration open';
164     !!!next-input-character;
165     redo A;
166     } elsif ($self->{next_input_character} == 0x002F) { # /
167     $self->{state} = 'close tag open';
168     !!!next-input-character;
169     redo A;
170     } elsif (0x0041 <= $self->{next_input_character} and
171     $self->{next_input_character} <= 0x005A) { # A..Z
172     $self->{current_token}
173     = {type => 'start tag',
174     tag_name => chr ($self->{next_input_character} + 0x0020)};
175     $self->{state} = 'tag name';
176     !!!next-input-character;
177     redo A;
178     } elsif (0x0061 <= $self->{next_input_character} and
179     $self->{next_input_character} <= 0x007A) { # a..z
180     $self->{current_token} = {type => 'start tag',
181     tag_name => chr ($self->{next_input_character})};
182     $self->{state} = 'tag name';
183     !!!next-input-character;
184     redo A;
185     } elsif ($self->{next_input_character} == 0x003E) { # >
186     !!!parse-error;
187     $self->{state} = 'data';
188     !!!next-input-character;
189    
190 wakaba 1.3 !!!emit ({type => 'character', data => '<>'});
191 wakaba 1.1
192     redo A;
193     } elsif ($self->{next_input_character} == 0x003F) { # ?
194     !!!parse-error;
195     $self->{state} = 'bogus comment';
196     ## $self->{next_input_character} is intentionally left as is
197     redo A;
198     } else {
199     !!!parse-error;
200     $self->{state} = 'data';
201     ## reconsume
202    
203     !!!emit ({type => 'character', data => '<'});
204    
205     redo A;
206     }
207     } else {
208     die "$0: $self->{content_model_flag}: Unknown content model flag";
209     }
210     } elsif ($self->{state} eq 'close tag open') {
211     if ($self->{content_model_flag} eq 'RCDATA' or
212     $self->{content_model_flag} eq 'CDATA') {
213     my @next_char;
214     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
215     push @next_char, $self->{next_input_character};
216     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
217     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
218     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
219     !!!next-input-character;
220     next TAGNAME;
221     } else {
222     !!!parse-error;
223     $self->{next_input_character} = shift @next_char; # reconsume
224     !!!back-next-input-character (@next_char);
225     $self->{state} = 'data';
226    
227     !!!emit ({type => 'character', data => '</'});
228    
229     redo A;
230     }
231     }
232 wakaba 1.2 push @next_char, $self->{next_input_character};
233 wakaba 1.1
234 wakaba 1.2 unless ($self->{next_input_character} == 0x0009 or # HT
235     $self->{next_input_character} == 0x000A or # LF
236     $self->{next_input_character} == 0x000B or # VT
237     $self->{next_input_character} == 0x000C or # FF
238     $self->{next_input_character} == 0x0020 or # SP
239     $self->{next_input_character} == 0x003E or # >
240     $self->{next_input_character} == 0x002F or # /
241     $self->{next_input_character} == 0x003C or # <
242 wakaba 1.1 $self->{next_input_character} == -1) {
243     !!!parse-error;
244     $self->{next_input_character} = shift @next_char; # reconsume
245     !!!back-next-input-character (@next_char);
246     $self->{state} = 'data';
247    
248     !!!emit ({type => 'character', data => '</'});
249    
250     redo A;
251     } else {
252     $self->{next_input_character} = shift @next_char;
253     !!!back-next-input-character (@next_char);
254     # and consume...
255     }
256     }
257    
258     if (0x0041 <= $self->{next_input_character} and
259     $self->{next_input_character} <= 0x005A) { # A..Z
260     $self->{current_token} = {type => 'end tag',
261     tag_name => chr ($self->{next_input_character} + 0x0020)};
262     $self->{state} = 'tag name';
263     !!!next-input-character;
264     redo A;
265     } elsif (0x0061 <= $self->{next_input_character} and
266     $self->{next_input_character} <= 0x007A) { # a..z
267     $self->{current_token} = {type => 'end tag',
268     tag_name => chr ($self->{next_input_character})};
269     $self->{state} = 'tag name';
270     !!!next-input-character;
271     redo A;
272     } elsif ($self->{next_input_character} == 0x003E) { # >
273     !!!parse-error;
274     $self->{state} = 'data';
275     !!!next-input-character;
276     redo A;
277     } elsif ($self->{next_input_character} == -1) {
278     !!!parse-error;
279     $self->{state} = 'data';
280     # reconsume
281    
282     !!!emit ({type => 'character', data => '</'});
283    
284     redo A;
285     } else {
286     !!!parse-error;
287     $self->{state} = 'bogus comment';
288     ## $self->{next_input_character} is intentionally left as is
289     redo A;
290     }
291     } elsif ($self->{state} eq 'tag name') {
292     if ($self->{next_input_character} == 0x0009 or # HT
293     $self->{next_input_character} == 0x000A or # LF
294     $self->{next_input_character} == 0x000B or # VT
295     $self->{next_input_character} == 0x000C or # FF
296     $self->{next_input_character} == 0x0020) { # SP
297     $self->{state} = 'before attribute name';
298     !!!next-input-character;
299     redo A;
300     } elsif ($self->{next_input_character} == 0x003E) { # >
301     if ($self->{current_token}->{type} eq 'start tag') {
302     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
303     } elsif ($self->{current_token}->{type} eq 'end tag') {
304     $self->{content_model_flag} = 'PCDATA'; # MUST
305 wakaba 1.2 if ($self->{current_token}->{attributes}) {
306 wakaba 1.1 !!!parse-error;
307     }
308     } else {
309     die "$0: $self->{current_token}->{type}: Unknown token type";
310     }
311     $self->{state} = 'data';
312     !!!next-input-character;
313    
314     !!!emit ($self->{current_token}); # start tag or end tag
315     undef $self->{current_token};
316    
317     redo A;
318     } elsif (0x0041 <= $self->{next_input_character} and
319     $self->{next_input_character} <= 0x005A) { # A..Z
320     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
321     # start tag or end tag
322     ## Stay in this state
323     !!!next-input-character;
324     redo A;
325     } elsif ($self->{next_input_character} == 0x003C or # <
326     $self->{next_input_character} == -1) {
327     !!!parse-error;
328     if ($self->{current_token}->{type} eq 'start tag') {
329     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
330     } elsif ($self->{current_token}->{type} eq 'end tag') {
331     $self->{content_model_flag} = 'PCDATA'; # MUST
332 wakaba 1.2 if ($self->{current_token}->{attributes}) {
333 wakaba 1.1 !!!parse-error;
334     }
335     } else {
336     die "$0: $self->{current_token}->{type}: Unknown token type";
337     }
338     $self->{state} = 'data';
339     # reconsume
340    
341     !!!emit ($self->{current_token}); # start tag or end tag
342     undef $self->{current_token};
343    
344     redo A;
345     } elsif ($self->{next_input_character} == 0x002F) { # /
346     !!!next-input-character;
347     if ($self->{next_input_character} == 0x003E and # >
348     $self->{current_token}->{type} eq 'start tag' and
349     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
350     # permitted slash
351     #
352     } else {
353     !!!parse-error;
354     }
355     $self->{state} = 'before attribute name';
356     # next-input-character is already done
357     redo A;
358     } else {
359     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
360     # start tag or end tag
361     ## Stay in the state
362     !!!next-input-character;
363     redo A;
364     }
365     } elsif ($self->{state} eq 'before attribute name') {
366     if ($self->{next_input_character} == 0x0009 or # HT
367     $self->{next_input_character} == 0x000A or # LF
368     $self->{next_input_character} == 0x000B or # VT
369     $self->{next_input_character} == 0x000C or # FF
370     $self->{next_input_character} == 0x0020) { # SP
371     ## Stay in the state
372     !!!next-input-character;
373     redo A;
374     } elsif ($self->{next_input_character} == 0x003E) { # >
375     if ($self->{current_token}->{type} eq 'start tag') {
376     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
377     } elsif ($self->{current_token}->{type} eq 'end tag') {
378     $self->{content_model_flag} = 'PCDATA'; # MUST
379 wakaba 1.2 if ($self->{current_token}->{attributes}) {
380 wakaba 1.1 !!!parse-error;
381     }
382     } else {
383     die "$0: $self->{current_token}->{type}: Unknown token type";
384     }
385     $self->{state} = 'data';
386     !!!next-input-character;
387    
388     !!!emit ($self->{current_token}); # start tag or end tag
389     undef $self->{current_token};
390    
391     redo A;
392     } elsif (0x0041 <= $self->{next_input_character} and
393     $self->{next_input_character} <= 0x005A) { # A..Z
394     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
395     value => ''};
396     $self->{state} = 'attribute name';
397     !!!next-input-character;
398     redo A;
399     } elsif ($self->{next_input_character} == 0x002F) { # /
400     !!!next-input-character;
401     if ($self->{next_input_character} == 0x003E and # >
402     $self->{current_token}->{type} eq 'start tag' and
403     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
404     # permitted slash
405     #
406     } else {
407     !!!parse-error;
408     }
409     ## Stay in the state
410     # next-input-character is already done
411     redo A;
412     } elsif ($self->{next_input_character} == 0x003C or # <
413     $self->{next_input_character} == -1) {
414     !!!parse-error;
415     if ($self->{current_token}->{type} eq 'start tag') {
416     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
417     } elsif ($self->{current_token}->{type} eq 'end tag') {
418     $self->{content_model_flag} = 'PCDATA'; # MUST
419 wakaba 1.2 if ($self->{current_token}->{attributes}) {
420 wakaba 1.1 !!!parse-error;
421     }
422     } else {
423     die "$0: $self->{current_token}->{type}: Unknown token type";
424     }
425     $self->{state} = 'data';
426     # reconsume
427    
428     !!!emit ($self->{current_token}); # start tag or end tag
429     undef $self->{current_token};
430    
431     redo A;
432     } else {
433     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
434     value => ''};
435     $self->{state} = 'attribute name';
436     !!!next-input-character;
437     redo A;
438     }
439     } elsif ($self->{state} eq 'attribute name') {
440     my $before_leave = sub {
441 wakaba 1.2 if (exists $self->{current_token}->{attributes} # start tag or end tag
442 wakaba 1.1 ->{$self->{current_attribute}->{name}}) { # MUST
443     !!!parse-error;
444     ## Discard $self->{current_attribute} # MUST
445     } else {
446 wakaba 1.2 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
447 wakaba 1.1 = $self->{current_attribute};
448     }
449     }; # $before_leave
450    
451     if ($self->{next_input_character} == 0x0009 or # HT
452     $self->{next_input_character} == 0x000A or # LF
453     $self->{next_input_character} == 0x000B or # VT
454     $self->{next_input_character} == 0x000C or # FF
455     $self->{next_input_character} == 0x0020) { # SP
456     $before_leave->();
457     $self->{state} = 'after attribute name';
458     !!!next-input-character;
459     redo A;
460     } elsif ($self->{next_input_character} == 0x003D) { # =
461     $before_leave->();
462     $self->{state} = 'before attribute value';
463     !!!next-input-character;
464     redo A;
465     } elsif ($self->{next_input_character} == 0x003E) { # >
466     $before_leave->();
467     if ($self->{current_token}->{type} eq 'start tag') {
468     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
469     } elsif ($self->{current_token}->{type} eq 'end tag') {
470     $self->{content_model_flag} = 'PCDATA'; # MUST
471 wakaba 1.2 if ($self->{current_token}->{attributes}) {
472 wakaba 1.1 !!!parse-error;
473     }
474     } else {
475     die "$0: $self->{current_token}->{type}: Unknown token type";
476     }
477     $self->{state} = 'data';
478     !!!next-input-character;
479    
480     !!!emit ($self->{current_token}); # start tag or end tag
481     undef $self->{current_token};
482    
483     redo A;
484     } elsif (0x0041 <= $self->{next_input_character} and
485     $self->{next_input_character} <= 0x005A) { # A..Z
486     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
487     ## Stay in the state
488     !!!next-input-character;
489     redo A;
490     } elsif ($self->{next_input_character} == 0x002F) { # /
491     $before_leave->();
492     !!!next-input-character;
493     if ($self->{next_input_character} == 0x003E and # >
494     $self->{current_token}->{type} eq 'start tag' and
495     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
496     # permitted slash
497     #
498     } else {
499     !!!parse-error;
500     }
501     $self->{state} = 'before attribute name';
502     # next-input-character is already done
503     redo A;
504     } elsif ($self->{next_input_character} == 0x003C or # <
505     $self->{next_input_character} == -1) {
506     !!!parse-error;
507     $before_leave->();
508     if ($self->{current_token}->{type} eq 'start tag') {
509     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
510     } elsif ($self->{current_token}->{type} eq 'end tag') {
511     $self->{content_model_flag} = 'PCDATA'; # MUST
512 wakaba 1.2 if ($self->{current_token}->{attributes}) {
513 wakaba 1.1 !!!parse-error;
514     }
515     } else {
516     die "$0: $self->{current_token}->{type}: Unknown token type";
517     }
518     $self->{state} = 'data';
519     # reconsume
520    
521     !!!emit ($self->{current_token}); # start tag or end tag
522     undef $self->{current_token};
523    
524     redo A;
525     } else {
526     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
527     ## Stay in the state
528     !!!next-input-character;
529     redo A;
530     }
531     } elsif ($self->{state} eq 'after attribute name') {
532     if ($self->{next_input_character} == 0x0009 or # HT
533     $self->{next_input_character} == 0x000A or # LF
534     $self->{next_input_character} == 0x000B or # VT
535     $self->{next_input_character} == 0x000C or # FF
536     $self->{next_input_character} == 0x0020) { # SP
537     ## Stay in the state
538     !!!next-input-character;
539     redo A;
540     } elsif ($self->{next_input_character} == 0x003D) { # =
541     $self->{state} = 'before attribute value';
542     !!!next-input-character;
543     redo A;
544     } elsif ($self->{next_input_character} == 0x003E) { # >
545     if ($self->{current_token}->{type} eq 'start tag') {
546     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
547     } elsif ($self->{current_token}->{type} eq 'end tag') {
548     $self->{content_model_flag} = 'PCDATA'; # MUST
549 wakaba 1.2 if ($self->{current_token}->{attributes}) {
550 wakaba 1.1 !!!parse-error;
551     }
552     } else {
553     die "$0: $self->{current_token}->{type}: Unknown token type";
554     }
555     $self->{state} = 'data';
556     !!!next-input-character;
557    
558     !!!emit ($self->{current_token}); # start tag or end tag
559     undef $self->{current_token};
560    
561     redo A;
562     } elsif (0x0041 <= $self->{next_input_character} and
563     $self->{next_input_character} <= 0x005A) { # A..Z
564     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
565     value => ''};
566     $self->{state} = 'attribute name';
567     !!!next-input-character;
568     redo A;
569     } elsif ($self->{next_input_character} == 0x002F) { # /
570     !!!next-input-character;
571     if ($self->{next_input_character} == 0x003E and # >
572     $self->{current_token}->{type} eq 'start tag' and
573     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
574     # permitted slash
575     #
576     } else {
577     !!!parse-error;
578     }
579     $self->{state} = 'before attribute name';
580     # next-input-character is already done
581     redo A;
582     } elsif ($self->{next_input_character} == 0x003C or # <
583     $self->{next_input_character} == -1) {
584     !!!parse-error;
585     if ($self->{current_token}->{type} eq 'start tag') {
586     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
587     } elsif ($self->{current_token}->{type} eq 'end tag') {
588     $self->{content_model_flag} = 'PCDATA'; # MUST
589 wakaba 1.2 if ($self->{current_token}->{attributes}) {
590 wakaba 1.1 !!!parse-error;
591     }
592     } else {
593     die "$0: $self->{current_token}->{type}: Unknown token type";
594     }
595     $self->{state} = 'data';
596     # reconsume
597    
598     !!!emit ($self->{current_token}); # start tag or end tag
599     undef $self->{current_token};
600    
601     redo A;
602     } else {
603     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
604     value => ''};
605     $self->{state} = 'attribute name';
606     !!!next-input-character;
607     redo A;
608     }
609     } elsif ($self->{state} eq 'before attribute value') {
610     if ($self->{next_input_character} == 0x0009 or # HT
611     $self->{next_input_character} == 0x000A or # LF
612     $self->{next_input_character} == 0x000B or # VT
613     $self->{next_input_character} == 0x000C or # FF
614     $self->{next_input_character} == 0x0020) { # SP
615     ## Stay in the state
616     !!!next-input-character;
617     redo A;
618     } elsif ($self->{next_input_character} == 0x0022) { # "
619     $self->{state} = 'attribute value (double-quoted)';
620     !!!next-input-character;
621     redo A;
622     } elsif ($self->{next_input_character} == 0x0026) { # &
623     $self->{state} = 'attribute value (unquoted)';
624     ## reconsume
625     redo A;
626     } elsif ($self->{next_input_character} == 0x0027) { # '
627     $self->{state} = 'attribute value (single-quoted)';
628     !!!next-input-character;
629     redo A;
630     } elsif ($self->{next_input_character} == 0x003E) { # >
631     if ($self->{current_token}->{type} eq 'start tag') {
632     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
633     } elsif ($self->{current_token}->{type} eq 'end tag') {
634     $self->{content_model_flag} = 'PCDATA'; # MUST
635 wakaba 1.2 if ($self->{current_token}->{attributes}) {
636 wakaba 1.1 !!!parse-error;
637     }
638     } else {
639     die "$0: $self->{current_token}->{type}: Unknown token type";
640     }
641     $self->{state} = 'data';
642     !!!next-input-character;
643    
644     !!!emit ($self->{current_token}); # start tag or end tag
645     undef $self->{current_token};
646    
647     redo A;
648     } elsif ($self->{next_input_character} == 0x003C or # <
649     $self->{next_input_character} == -1) {
650     !!!parse-error;
651     if ($self->{current_token}->{type} eq 'start tag') {
652     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
653     } elsif ($self->{current_token}->{type} eq 'end tag') {
654     $self->{content_model_flag} = 'PCDATA'; # MUST
655 wakaba 1.2 if ($self->{current_token}->{attributes}) {
656 wakaba 1.1 !!!parse-error;
657     }
658     } else {
659     die "$0: $self->{current_token}->{type}: Unknown token type";
660     }
661     $self->{state} = 'data';
662     ## reconsume
663    
664     !!!emit ($self->{current_token}); # start tag or end tag
665     undef $self->{current_token};
666    
667     redo A;
668     } else {
669     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
670     $self->{state} = 'attribute value (unquoted)';
671     !!!next-input-character;
672     redo A;
673     }
674     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
675     if ($self->{next_input_character} == 0x0022) { # "
676     $self->{state} = 'before attribute name';
677     !!!next-input-character;
678     redo A;
679     } elsif ($self->{next_input_character} == 0x0026) { # &
680     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
681     $self->{state} = 'entity in attribute value';
682     !!!next-input-character;
683     redo A;
684     } elsif ($self->{next_input_character} == -1) {
685     !!!parse-error;
686     if ($self->{current_token}->{type} eq 'start tag') {
687     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
688     } elsif ($self->{current_token}->{type} eq 'end tag') {
689     $self->{content_model_flag} = 'PCDATA'; # MUST
690 wakaba 1.2 if ($self->{current_token}->{attributes}) {
691 wakaba 1.1 !!!parse-error;
692     }
693     } else {
694     die "$0: $self->{current_token}->{type}: Unknown token type";
695     }
696     $self->{state} = 'data';
697     ## reconsume
698    
699     !!!emit ($self->{current_token}); # start tag or end tag
700     undef $self->{current_token};
701    
702     redo A;
703     } else {
704     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
705     ## Stay in the state
706     !!!next-input-character;
707     redo A;
708     }
709     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
710     if ($self->{next_input_character} == 0x0027) { # '
711     $self->{state} = 'before attribute name';
712     !!!next-input-character;
713     redo A;
714     } elsif ($self->{next_input_character} == 0x0026) { # &
715     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
716     $self->{state} = 'entity in attribute value';
717     !!!next-input-character;
718     redo A;
719     } elsif ($self->{next_input_character} == -1) {
720     !!!parse-error;
721     if ($self->{current_token}->{type} eq 'start tag') {
722     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
723     } elsif ($self->{current_token}->{type} eq 'end tag') {
724     $self->{content_model_flag} = 'PCDATA'; # MUST
725 wakaba 1.2 if ($self->{current_token}->{attributes}) {
726 wakaba 1.1 !!!parse-error;
727     }
728     } else {
729     die "$0: $self->{current_token}->{type}: Unknown token type";
730     }
731     $self->{state} = 'data';
732     ## reconsume
733    
734     !!!emit ($self->{current_token}); # start tag or end tag
735     undef $self->{current_token};
736    
737     redo A;
738     } else {
739     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
740     ## Stay in the state
741     !!!next-input-character;
742     redo A;
743     }
744     } elsif ($self->{state} eq 'attribute value (unquoted)') {
745     if ($self->{next_input_character} == 0x0009 or # HT
746     $self->{next_input_character} == 0x000A or # LF
747     $self->{next_input_character} == 0x000B or # HT
748     $self->{next_input_character} == 0x000C or # FF
749     $self->{next_input_character} == 0x0020) { # SP
750     $self->{state} = 'before attribute name';
751     !!!next-input-character;
752     redo A;
753     } elsif ($self->{next_input_character} == 0x0026) { # &
754     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
755     $self->{state} = 'entity in attribute value';
756     !!!next-input-character;
757     redo A;
758     } elsif ($self->{next_input_character} == 0x003E) { # >
759     if ($self->{current_token}->{type} eq 'start tag') {
760     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
761     } elsif ($self->{current_token}->{type} eq 'end tag') {
762     $self->{content_model_flag} = 'PCDATA'; # MUST
763 wakaba 1.2 if ($self->{current_token}->{attributes}) {
764 wakaba 1.1 !!!parse-error;
765     }
766     } else {
767     die "$0: $self->{current_token}->{type}: Unknown token type";
768     }
769     $self->{state} = 'data';
770     !!!next-input-character;
771    
772     !!!emit ($self->{current_token}); # start tag or end tag
773     undef $self->{current_token};
774    
775     redo A;
776     } elsif ($self->{next_input_character} == 0x003C or # <
777     $self->{next_input_character} == -1) {
778     !!!parse-error;
779     if ($self->{current_token}->{type} eq 'start tag') {
780     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
781     } elsif ($self->{current_token}->{type} eq 'end tag') {
782     $self->{content_model_flag} = 'PCDATA'; # MUST
783 wakaba 1.2 if ($self->{current_token}->{attributes}) {
784 wakaba 1.1 !!!parse-error;
785     }
786     } else {
787     die "$0: $self->{current_token}->{type}: Unknown token type";
788     }
789     $self->{state} = 'data';
790     ## reconsume
791    
792     !!!emit ($self->{current_token}); # start tag or end tag
793     undef $self->{current_token};
794    
795     redo A;
796     } else {
797     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
798     ## Stay in the state
799     !!!next-input-character;
800     redo A;
801     }
802     } elsif ($self->{state} eq 'entity in attribute value') {
803     my $token = $self->_tokenize_attempt_to_consume_an_entity;
804    
805     unless (defined $token) {
806     $self->{current_attribute}->{value} .= '&';
807     } else {
808     $self->{current_attribute}->{value} .= $token->{data};
809     ## ISSUE: spec says "append the returned character token to the current attribute's value"
810     }
811    
812     $self->{state} = $self->{last_attribute_value_state};
813     # next-input-character is already done
814     redo A;
815     } elsif ($self->{state} eq 'bogus comment') {
816     ## (only happen if PCDATA state)
817    
818     my $token = {type => 'comment', data => ''};
819    
820     BC: {
821     if ($self->{next_input_character} == 0x003E) { # >
822     $self->{state} = 'data';
823     !!!next-input-character;
824    
825     !!!emit ($token);
826    
827     redo A;
828     } elsif ($self->{next_input_character} == -1) {
829     $self->{state} = 'data';
830     ## reconsume
831    
832     !!!emit ($token);
833    
834     redo A;
835     } else {
836     $token->{data} .= chr ($self->{next_input_character});
837     !!!next-input-character;
838     redo BC;
839     }
840     } # BC
841     } elsif ($self->{state} eq 'markup declaration open') {
842     ## (only happen if PCDATA state)
843    
844     my @next_char;
845     push @next_char, $self->{next_input_character};
846    
847     if ($self->{next_input_character} == 0x002D) { # -
848     !!!next-input-character;
849     push @next_char, $self->{next_input_character};
850     if ($self->{next_input_character} == 0x002D) { # -
851     $self->{current_token} = {type => 'comment', data => ''};
852     $self->{state} = 'comment';
853     !!!next-input-character;
854     redo A;
855     }
856     } elsif ($self->{next_input_character} == 0x0044 or # D
857     $self->{next_input_character} == 0x0064) { # d
858     !!!next-input-character;
859     push @next_char, $self->{next_input_character};
860     if ($self->{next_input_character} == 0x004F or # O
861     $self->{next_input_character} == 0x006F) { # o
862     !!!next-input-character;
863     push @next_char, $self->{next_input_character};
864     if ($self->{next_input_character} == 0x0043 or # C
865     $self->{next_input_character} == 0x0063) { # c
866     !!!next-input-character;
867     push @next_char, $self->{next_input_character};
868     if ($self->{next_input_character} == 0x0054 or # T
869     $self->{next_input_character} == 0x0074) { # t
870     !!!next-input-character;
871     push @next_char, $self->{next_input_character};
872     if ($self->{next_input_character} == 0x0059 or # Y
873     $self->{next_input_character} == 0x0079) { # y
874     !!!next-input-character;
875     push @next_char, $self->{next_input_character};
876     if ($self->{next_input_character} == 0x0050 or # P
877     $self->{next_input_character} == 0x0070) { # p
878     !!!next-input-character;
879     push @next_char, $self->{next_input_character};
880     if ($self->{next_input_character} == 0x0045 or # E
881     $self->{next_input_character} == 0x0065) { # e
882     ## ISSUE: What a stupid code this is!
883     $self->{state} = 'DOCTYPE';
884     !!!next-input-character;
885     redo A;
886     }
887     }
888     }
889     }
890     }
891     }
892     }
893    
894     !!!parse-error;
895     $self->{next_input_character} = shift @next_char;
896     !!!back-next-input-character (@next_char);
897     $self->{state} = 'bogus comment';
898     redo A;
899    
900     ## ISSUE: typos in spec: chacacters, is is a parse error
901     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
902     } elsif ($self->{state} eq 'comment') {
903     if ($self->{next_input_character} == 0x002D) { # -
904     $self->{state} = 'comment dash';
905     !!!next-input-character;
906     redo A;
907     } elsif ($self->{next_input_character} == -1) {
908     !!!parse-error;
909     $self->{state} = 'data';
910     ## reconsume
911    
912     !!!emit ($self->{current_token}); # comment
913     undef $self->{current_token};
914    
915     redo A;
916     } else {
917     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
918     ## Stay in the state
919     !!!next-input-character;
920     redo A;
921     }
922     } elsif ($self->{state} eq 'comment dash') {
923     if ($self->{next_input_character} == 0x002D) { # -
924     $self->{state} = 'comment end';
925     !!!next-input-character;
926     redo A;
927     } elsif ($self->{next_input_character} == -1) {
928     !!!parse-error;
929     $self->{state} = 'data';
930     ## reconsume
931    
932     !!!emit ($self->{current_token}); # comment
933     undef $self->{current_token};
934    
935     redo A;
936     } else {
937     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
938     $self->{state} = 'comment';
939     !!!next-input-character;
940     redo A;
941     }
942     } elsif ($self->{state} eq 'comment end') {
943     if ($self->{next_input_character} == 0x003E) { # >
944     $self->{state} = 'data';
945     !!!next-input-character;
946    
947     !!!emit ($self->{current_token}); # comment
948     undef $self->{current_token};
949    
950     redo A;
951     } elsif ($self->{next_input_character} == 0x002D) { # -
952     !!!parse-error;
953     $self->{current_token}->{data} .= '-'; # comment
954     ## Stay in the state
955     !!!next-input-character;
956     redo A;
957     } elsif ($self->{next_input_character} == -1) {
958     !!!parse-error;
959     $self->{state} = 'data';
960     ## reconsume
961    
962     !!!emit ($self->{current_token}); # comment
963     undef $self->{current_token};
964    
965     redo A;
966     } else {
967     !!!parse-error;
968     $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
969     $self->{state} = 'comment';
970     !!!next-input-character;
971     redo A;
972     }
973     } elsif ($self->{state} eq 'DOCTYPE') {
974     if ($self->{next_input_character} == 0x0009 or # HT
975     $self->{next_input_character} == 0x000A or # LF
976     $self->{next_input_character} == 0x000B or # VT
977     $self->{next_input_character} == 0x000C or # FF
978     $self->{next_input_character} == 0x0020) { # SP
979     $self->{state} = 'before DOCTYPE name';
980     !!!next-input-character;
981     redo A;
982     } else {
983     !!!parse-error;
984     $self->{state} = 'before DOCTYPE name';
985     ## reconsume
986     redo A;
987     }
988     } elsif ($self->{state} eq 'before DOCTYPE name') {
989     if ($self->{next_input_character} == 0x0009 or # HT
990     $self->{next_input_character} == 0x000A or # LF
991     $self->{next_input_character} == 0x000B or # VT
992     $self->{next_input_character} == 0x000C or # FF
993     $self->{next_input_character} == 0x0020) { # SP
994     ## Stay in the state
995     !!!next-input-character;
996     redo A;
997     } elsif (0x0061 <= $self->{next_input_character} and
998     $self->{next_input_character} <= 0x007A) { # a..z
999     $self->{current_token} = {type => 'DOCTYPE',
1000     name => chr ($self->{next_input_character} - 0x0020),
1001     error => 1};
1002     $self->{state} = 'DOCTYPE name';
1003     !!!next-input-character;
1004     redo A;
1005     } elsif ($self->{next_input_character} == 0x003E) { # >
1006     !!!parse-error;
1007     $self->{state} = 'data';
1008     !!!next-input-character;
1009    
1010     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1011    
1012     redo A;
1013     } elsif ($self->{next_input_character} == -1) {
1014     !!!parse-error;
1015     $self->{state} = 'data';
1016     ## reconsume
1017    
1018     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1019    
1020     redo A;
1021     } else {
1022     $self->{current_token} = {type => 'DOCTYPE',
1023     name => chr ($self->{next_input_character}),
1024     error => 1};
1025     $self->{state} = 'DOCTYPE name';
1026     !!!next-input-character;
1027     redo A;
1028     }
1029     } elsif ($self->{state} eq 'DOCTYPE name') {
1030     if ($self->{next_input_character} == 0x0009 or # HT
1031     $self->{next_input_character} == 0x000A or # LF
1032     $self->{next_input_character} == 0x000B or # VT
1033     $self->{next_input_character} == 0x000C or # FF
1034     $self->{next_input_character} == 0x0020) { # SP
1035     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1036     $self->{state} = 'after DOCTYPE name';
1037     !!!next-input-character;
1038     redo A;
1039     } elsif ($self->{next_input_character} == 0x003E) { # >
1040     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1041     $self->{state} = 'data';
1042     !!!next-input-character;
1043    
1044     !!!emit ($self->{current_token}); # DOCTYPE
1045     undef $self->{current_token};
1046    
1047     redo A;
1048     } elsif (0x0061 <= $self->{next_input_character} and
1049     $self->{next_input_character} <= 0x007A) { # a..z
1050     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1051     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1052     ## Stay in the state
1053     !!!next-input-character;
1054     redo A;
1055     } elsif ($self->{next_input_character} == -1) {
1056     !!!parse-error;
1057     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1058     $self->{state} = 'data';
1059     ## reconsume
1060    
1061     !!!emit ($self->{current_token});
1062     undef $self->{current_token};
1063    
1064     redo A;
1065     } else {
1066 wakaba 1.3 $self->{current_token}->{name}
1067     .= chr ($self->{next_input_character}); # DOCTYPE
1068 wakaba 1.1 #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1069     ## Stay in the state
1070     !!!next-input-character;
1071     redo A;
1072     }
1073     } elsif ($self->{state} eq 'after DOCTYPE name') {
1074     if ($self->{next_input_character} == 0x0009 or # HT
1075     $self->{next_input_character} == 0x000A or # LF
1076     $self->{next_input_character} == 0x000B or # VT
1077     $self->{next_input_character} == 0x000C or # FF
1078     $self->{next_input_character} == 0x0020) { # SP
1079     ## Stay in the state
1080     !!!next-input-character;
1081     redo A;
1082     } elsif ($self->{next_input_character} == 0x003E) { # >
1083     $self->{state} = 'data';
1084     !!!next-input-character;
1085    
1086     !!!emit ($self->{current_token}); # DOCTYPE
1087     undef $self->{current_token};
1088    
1089     redo A;
1090     } elsif ($self->{next_input_character} == -1) {
1091     !!!parse-error;
1092     $self->{state} = 'data';
1093     ## reconsume
1094    
1095     !!!emit ($self->{current_token}); # DOCTYPE
1096     undef $self->{current_token};
1097    
1098     redo A;
1099     } else {
1100     !!!parse-error;
1101     $self->{current_token}->{error} = 1; # DOCTYPE
1102     $self->{state} = 'bogus DOCTYPE';
1103     !!!next-input-character;
1104     redo A;
1105     }
1106     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1107     if ($self->{next_input_character} == 0x003E) { # >
1108     $self->{state} = 'data';
1109     !!!next-input-character;
1110    
1111     !!!emit ($self->{current_token}); # DOCTYPE
1112     undef $self->{current_token};
1113    
1114     redo A;
1115     } elsif ($self->{next_input_character} == -1) {
1116     !!!parse-error;
1117     $self->{state} = 'data';
1118     ## reconsume
1119    
1120     !!!emit ($self->{current_token}); # DOCTYPE
1121     undef $self->{current_token};
1122    
1123     redo A;
1124     } else {
1125     ## Stay in the state
1126     !!!next-input-character;
1127     redo A;
1128     }
1129     } else {
1130     die "$0: $self->{state}: Unknown state";
1131     }
1132     } # A
1133    
1134     die "$0: _get_next_token: unexpected case";
1135     } # _get_next_token
1136    
1137     sub _tokenize_attempt_to_consume_an_entity ($) {
1138     my $self = shift;
1139     my $r;
1140    
1141     if ($self->{next_input_character} == 0x0023) { # #
1142     !!!next-input-character;
1143     my $num;
1144     if ($self->{next_input_character} == 0x0078 or # x
1145     $self->{next_input_character} == 0x0058) { # X
1146     X: {
1147     my $x_char = $self->{next_input_character};
1148     !!!next-input-character;
1149     if (0x0030 <= $self->{next_input_character} and
1150     $self->{next_input_character} <= 0x0039) { # 0..9
1151     $num ||= 0;
1152     $num *= 0x10;
1153     $num += $self->{next_input_character} - 0x0030;
1154     redo X;
1155     } elsif (0x0061 <= $self->{next_input_character} and
1156     $self->{next_input_character} <= 0x0066) { # a..f
1157     ## ISSUE: the spec says U+0078, which is apparently incorrect
1158     $num ||= 0;
1159     $num *= 0x10;
1160     $num += $self->{next_input_character} - 0x0060 + 9;
1161     redo X;
1162     } elsif (0x0041 <= $self->{next_input_character} and
1163     $self->{next_input_character} <= 0x0046) { # A..F
1164     ## ISSUE: the spec says U+0058, which is apparently incorrect
1165     $num ||= 0;
1166     $num *= 0x10;
1167     $num += $self->{next_input_character} - 0x0040 + 9;
1168     redo X;
1169     } elsif (not defined $num) { # no hexadecimal digit
1170     !!!parse-error;
1171     $self->{next_input_character} = 0x0023; # #
1172     !!!back-next-input-character ($x_char);
1173     last X; ## nothing is returned
1174     } elsif ($self->{next_input_character} == 0x003B) { # ;
1175     !!!next-input-character;
1176     } else {
1177     !!!parse-error;
1178     }
1179    
1180     ## TODO: check the definition for |a valid Unicode character|.
1181     if ($num > 1114111 or $num == 0) {
1182     $num = 0xFFFD; # REPLACEMENT CHARACTER
1183     ## ISSUE: Why this is not an error?
1184     }
1185    
1186     $r = {type => 'character', data => chr $num};
1187     } # X
1188 wakaba 1.4 } elsif (0x0030 <= $self->{next_input_character} and
1189     $self->{next_input_character} <= 0x0039) { # 0..9
1190     my $code = $self->{next_input_character} - 0x0030;
1191     !!!next-input-character;
1192    
1193     while (0x0030 <= $self->{next_input_character} and
1194     $self->{next_input_character} <= 0x0039) { # 0..9
1195     $code *= 10;
1196     $code += $self->{next_input_character} - 0x0030;
1197    
1198     !!!next-input-character;
1199     }
1200 wakaba 1.1
1201 wakaba 1.4 if ($self->{next_input_character} == 0x003B) { # ;
1202     !!!next-input-character;
1203     } else {
1204     !!!parse-error;
1205     }
1206 wakaba 1.1
1207 wakaba 1.4 ## TODO: check the definition for |a valid Unicode character|.
1208     if ($code > 1114111 or $code == 0) {
1209     $code = 0xFFFD; # REPLACEMENT CHARACTER
1210     ## ISSUE: Why this is not an error?
1211     }
1212    
1213     $r = {type => 'character', data => chr $code};
1214     } else {
1215     !!!parse-error;
1216     !!!back-next-input-character ($self->{next_input_character});
1217     $self->{next_input_character} = 0x0023; # #
1218 wakaba 1.1 }
1219     !!!consume-entity}
1220     return $r;
1221     } # _tokenize_attempt_to_consume_an_entity
1222    
1223 wakaba 1.2 sub _initialize_tree_constructor ($) {
1224     my $self = shift;
1225     require What::NanoDOM;
1226     $self->{document} = What::NanoDOM::Document->new;
1227     $self->{document}->strict_error_checking (0);
1228     ## TODO: Turn mutation events off # MUST
1229     ## TODO: Turn loose Document option (manakai extension) on
1230     } # _initialize_tree_constructor
1231    
1232     sub _terminate_tree_constructor ($) {
1233     my $self = shift;
1234     $self->{document}->strict_error_checking (1);
1235     ## TODO: Turn mutation events on
1236     } # _terminate_tree_constructor
1237    
1238     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1239    
1240     sub _construct_tree ($) {
1241     my ($self) = @_;
1242    
1243     ## When an interactive UA render the $self->{document} available
1244     ## to the user, or when it begin accepting user input, are
1245     ## not defined.
1246    
1247     ## Append a character: collect it and all subsequent consecutive
1248     ## characters and insert one Text node whose data is concatenation
1249     ## of all those characters. # MUST
1250    
1251     my $token;
1252     !!!next-token;
1253    
1254     my $phase = 'initial'; # MUST
1255    
1256     my $open_elements = [];
1257     my $active_formatting_elements = [];
1258     my $head_element;
1259     my $form_element;
1260     my $insertion_mode = 'before head';
1261    
1262     my $reconstruct_active_formatting_elements = sub { # MUST
1263     ## Step 1
1264     return unless @$active_formatting_elements;
1265    
1266     ## Step 3
1267     my $i = -1;
1268     my $entry = $active_formatting_elements->[$i];
1269    
1270     ## Step 2
1271     return if $entry->[0] eq '#marker';
1272     for (@$open_elements) {
1273     if ($entry->[0] eq $_->[0]) {
1274     return;
1275     }
1276     }
1277    
1278     ## Step 4
1279     S4: {
1280     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1281    
1282     ## Step 5
1283     $i--;
1284     $entry = $active_formatting_elements->[$i];
1285    
1286     ## Step 6
1287     if ($entry->[0] eq '#marker') {
1288     #
1289     } else {
1290     my $in_open_elements;
1291     OE: for (@$open_elements) {
1292     if ($entry->[0] eq $_->[0]) {
1293     $in_open_elements = 1;
1294     last OE;
1295     }
1296     }
1297     if ($in_open_elements) {
1298     #
1299     } else {
1300     redo S4;
1301     }
1302     }
1303    
1304     ## Step 7
1305     $i++;
1306     $entry = $active_formatting_elements->[$i];
1307     } # S4
1308    
1309     S7: {
1310     ## Step 8
1311     my $clone = $entry->[0]->clone_node (0);
1312    
1313     ## Step 9
1314     $open_elements->[-1]->[0]->append_child ($clone);
1315     push @$open_elements, [$clone, $entry->[1]];
1316    
1317     ## Step 10
1318     $active_formatting_elements->[$i] = $open_elements->[-1];
1319    
1320     unless ($i == $#$active_formatting_elements) {
1321     ## Step 7'
1322     $i++;
1323     $entry = $active_formatting_elements->[$i];
1324    
1325     redo S7;
1326     }
1327     } # S7
1328     }; # $reconstruct_active_formatting_elements
1329    
1330     my $clear_up_to_marker = sub {
1331     for (reverse 0..$#$active_formatting_elements) {
1332     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1333     splice @$active_formatting_elements, $_;
1334     return;
1335     }
1336     }
1337     }; # $clear_up_to_marker
1338    
1339     my $reset_insertion_mode = sub {
1340     ## Step 1
1341     my $last;
1342    
1343     ## Step 2
1344     my $i = -1;
1345     my $node = $open_elements->[$i];
1346    
1347     ## Step 3
1348     S3: {
1349     $last = 1 if $open_elements->[0]->[0] eq $node->[0];
1350     ## TODO: the element whose inner_html is set is neither td nor th, then $node = the element
1351    
1352     ## Step 4..13
1353     my $new_mode = {
1354     select => 'in select',
1355     td => 'in cell',
1356     th => 'in cell',
1357     tr => 'in row',
1358     tbody => 'in table body',
1359     thead => 'in table head',
1360     tfoot => 'in table foot',
1361     caption => 'in caption',
1362     colgroup => 'in column group',
1363     table => 'in table',
1364     head => 'in body', # not in head!
1365     body => 'in body',
1366     frameset => 'in frameset',
1367     }->{$node->[1]};
1368     $insertion_mode = $new_mode and return if defined $new_mode;
1369    
1370     ## Step 14
1371     if ($node->[1] eq 'html') {
1372     unless (defined $head_element) {
1373     $insertion_mode = 'before head';
1374     } else {
1375     $insertion_mode = 'after head';
1376     }
1377     return;
1378     }
1379    
1380     ## Step 15
1381     $insertion_mode = 'in body' and return if $last;
1382    
1383     ## Step 16
1384     $i--;
1385     $node = $open_elements->[$i];
1386    
1387     ## Step 17
1388     redo S3;
1389     } # S3
1390     }; # $reset_insertion_mode
1391    
1392     my $style_start_tag = sub {
1393     my $style_el; !!!create-element ($style_el, 'style');
1394     ## $insertion_mode eq 'in head' and ... (always true)
1395     (($insertion_mode eq 'in head' and defined $head_element)
1396     ? $head_element : $open_elements->[-1]->[0])
1397     ->append_child ($style_el);
1398     $self->{content_model_flag} = 'CDATA';
1399    
1400     my $text = '';
1401     !!!next-token;
1402     while ($token->{type} eq 'character') {
1403     $text .= $token->{data};
1404     !!!next-token;
1405     } # stop if non-character token or tokenizer stops tokenising
1406     if (length $text) {
1407     $style_el->manakai_append_text ($text);
1408     }
1409    
1410     $self->{content_model_flag} = 'PCDATA';
1411    
1412     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1413     ## Ignore the token
1414     } else {
1415     !!!parse-error;
1416     ## ISSUE: And ignore?
1417     }
1418     !!!next-token;
1419     }; # $style_start_tag
1420    
1421     my $script_start_tag = sub {
1422     my $script_el; !!!create-element ($script_el, 'script');
1423     ## TODO: mark as "parser-inserted"
1424    
1425     $self->{content_model_flag} = 'CDATA';
1426    
1427     my $text = '';
1428     !!!next-token;
1429     while ($token->{type} eq 'character') {
1430     $text .= $token->{data};
1431     !!!next-token;
1432     } # stop if non-character token or tokenizer stops tokenising
1433     if (length $text) {
1434     $script_el->manakai_append_text ($text);
1435     }
1436    
1437     $self->{content_model_flag} = 'PCDATA';
1438    
1439     if ($token->{type} eq 'end tag' and
1440     $token->{tag_name} eq 'script') {
1441     ## Ignore the token
1442     } else {
1443     !!!parse-error;
1444     ## ISSUE: And ignore?
1445     ## TODO: mark as "already executed"
1446     }
1447    
1448     ## TODO: inner_html mode then mark as "already executed" and skip
1449     if (1) {
1450     ## TODO: $old_insertion_point = current insertion point
1451     ## TODO: insertion point = just before the next input character
1452    
1453     (($insertion_mode eq 'in head' and defined $head_element)
1454     ? $head_element : $open_elements->[-1]->[0])->append_child ($script_el);
1455    
1456     ## TODO: insertion point = $old_insertion_point (might be "undefined")
1457    
1458     ## TODO: if there is a script that will execute as soon as the parser resume, then...
1459     }
1460    
1461     !!!next-token;
1462     }; # $script_start_tag
1463    
1464     my $formatting_end_tag = sub {
1465     my $tag_name = shift;
1466    
1467     FET: {
1468     ## Step 1
1469     my $formatting_element;
1470     my $formatting_element_i_in_active;
1471     AFE: for (reverse 0..$#$active_formatting_elements) {
1472     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
1473     $formatting_element = $active_formatting_elements->[$_];
1474     $formatting_element_i_in_active = $_;
1475     last AFE;
1476     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
1477     last AFE;
1478     }
1479     } # AFE
1480     unless (defined $formatting_element) {
1481     !!!parse-error;
1482     ## Ignore the token
1483     !!!next-token;
1484     return;
1485     }
1486     ## has an element in scope
1487     my $in_scope = 1;
1488     my $formatting_element_i_in_open;
1489     INSCOPE: for (reverse 0..$#$open_elements) {
1490     my $node = $open_elements->[$_];
1491     if ($node->[0] eq $formatting_element->[0]) {
1492     if ($in_scope) {
1493     $formatting_element_i_in_open = $_;
1494     last INSCOPE;
1495     } else { # in open elements but not in scope
1496     !!!parse-error;
1497     ## Ignore the token
1498     !!!next-token;
1499     return;
1500     }
1501     } elsif ({
1502     table => 1, caption => 1, td => 1, th => 1,
1503     button => 1, marquee => 1, object => 1, html => 1,
1504     }->{$node->[1]}) {
1505     $in_scope = 0;
1506     }
1507     } # INSCOPE
1508     unless (defined $formatting_element_i_in_open) {
1509     !!!parse-error;
1510     pop @$active_formatting_elements; # $formatting_element
1511     !!!next-token; ## TODO: ok?
1512     return;
1513     }
1514     if (not $open_elements->[-1]->[0] eq $formatting_element->[0]) {
1515     !!!parse-error;
1516     }
1517    
1518     ## Step 2
1519     my $furthest_block;
1520     my $furthest_block_i_in_open;
1521     OE: for (reverse 0..$#$open_elements) {
1522     my $node = $open_elements->[$_];
1523     if (not $formatting_category->{$node->[1]} and
1524     #not $phrasing_category->{$node->[1]} and
1525     ($special_category->{$node->[1]} or
1526     $scoping_category->{$node->[1]})) {
1527     $furthest_block = $node;
1528     $furthest_block_i_in_open = $_;
1529     } elsif ($node->[0] eq $formatting_element->[0]) {
1530     last OE;
1531     }
1532     } # OE
1533    
1534     ## Step 3
1535     unless (defined $furthest_block) { # MUST
1536     splice @$open_elements, $formatting_element_i_in_open;
1537     splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1538     !!!next-token;
1539     return;
1540     }
1541    
1542     ## Step 4
1543     my $common_ancestor_node = $open_elements->[$formatting_element_i_in_open - 1];
1544    
1545     ## Step 5
1546     my $furthest_block_parent = $furthest_block->[0]->parent_node;
1547     if (defined $furthest_block_parent) {
1548     $furthest_block_parent->remove_child ($furthest_block->[0]);
1549     }
1550    
1551     ## Step 6
1552     my $bookmark_prev_el
1553     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1554     ->[0];
1555    
1556     ## Step 7
1557     my $node = $furthest_block;
1558     my $node_i_in_open = $furthest_block_i_in_open;
1559     my $last_node = $furthest_block;
1560     S7: {
1561     ## Step 1
1562     $node_i_in_open--;
1563     $node = $open_elements->[$node_i_in_open];
1564    
1565     ## Step 2
1566     my $node_i_in_active;
1567     S7S2: {
1568     for (reverse 0..$#$active_formatting_elements) {
1569     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1570     $node_i_in_active = $_;
1571     last S7S2;
1572     }
1573     }
1574     splice @$open_elements, $node_i_in_open, 1;
1575     redo S7;
1576     } # S7S2
1577    
1578     ## Step 3
1579     last S7 if $node->[0] eq $formatting_element->[0];
1580    
1581     ## Step 4
1582     if ($last_node->[0] eq $furthest_block->[0]) {
1583     $bookmark_prev_el = $node->[0];
1584     }
1585    
1586     ## Step 5
1587     if ($node->[0]->has_child_nodes ()) {
1588     my $clone = [$node->[0]->clone_node (0), $node->[1]];
1589     $active_formatting_elements->[$node_i_in_active] = $clone;
1590     $open_elements->[$node_i_in_open] = $clone;
1591     $node = $clone;
1592     }
1593    
1594     ## Step 6
1595     $node->append_child ($last_node);
1596    
1597     ## Step 7
1598     $last_node = $node;
1599    
1600     ## Step 8
1601     redo S7;
1602     } # S7
1603    
1604     ## Step 8
1605     $common_ancestor_node->append_child ($last_node);
1606    
1607     ## Step 9
1608     my $clone = [$formatting_element->[0]->clone_node (0),
1609     $formatting_element->[1]];
1610    
1611     ## Step 10
1612     my @cn = @{$furthest_block->[0]->child_nodes};
1613     $clone->[0]->append_child ($_) for @cn;
1614    
1615     ## Step 11
1616     $furthest_block->[0]->append_child ($clone->[0]);
1617    
1618     ## Step 12
1619     my $i;
1620     AFE: for (reverse 0..$#$active_formatting_elements) {
1621     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1622     splice @$active_formatting_elements, $_, 1;
1623     $i-- and last AFE if defined $i;
1624     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1625     $i = $_;
1626     }
1627     } # AFE
1628     splice @$active_formatting_elements, $i + 1, 0, $clone;
1629    
1630     ## Step 13
1631     undef $i;
1632     OE: for (reverse 0..$#$open_elements) {
1633     if ($open_elements->[$_]->[0] eq $formatting_element->[0]) {
1634     splice @$open_elements, $_, 1;
1635     $i-- and last OE if defined $i;
1636     } elsif ($open_elements->[$_]->[0] eq $furthest_block->[0]) {
1637     $i = $_;
1638     }
1639     } # OE
1640     splice @$open_elements, $i + 1, 1, $clone;
1641    
1642     ## Step 14
1643     redo FET;
1644     } # FET
1645     }; # $formatting_end_tag
1646    
1647     my $in_body = sub {
1648     my $insert = shift;
1649     if ($token->{type} eq 'start tag') {
1650     if ($token->{tag_name} eq 'script') {
1651     $script_start_tag->();
1652     return;
1653     } elsif ($token->{tag_name} eq 'style') {
1654     $style_start_tag->();
1655     return;
1656     } elsif ({
1657     base => 1, link => 1, meta => 1, title => 1,
1658     }->{$token->{tag_name}}) {
1659     !!!parse-error;
1660     ## NOTE: This is an "as if in head" code clone
1661     my $el;
1662     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
1663     if (defined $head_element) {
1664     $head_element->append_child ($el);
1665     } else {
1666     $insert->($el);
1667     }
1668    
1669     ## ISSUE: Issue on magical <base> in the spec
1670    
1671     !!!next-token;
1672     return;
1673     } elsif ($token->{tag_name} eq 'body') {
1674     !!!parse-error;
1675    
1676     if (@$open_elements == 1 or
1677     $open_elements->[1]->[1] ne 'body') {
1678     ## Ignore the token
1679     } else {
1680     my $body_el = $open_elements->[1]->[0];
1681     for my $attr_name (keys %{$token->{attributes}}) {
1682     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
1683     $body_el->set_attribute_ns
1684     (undef, [undef, $attr_name],
1685     $token->{attributes}->{$attr_name}->{value});
1686     }
1687     }
1688     }
1689     !!!next-token;
1690     return;
1691     } elsif ({
1692     address => 1, blockquote => 1, center => 1, dir => 1,
1693     div => 1, dl => 1, fieldset => 1, listing => 1,
1694     menu => 1, ol => 1, p => 1, ul => 1,
1695     pre => 1,
1696     }->{$token->{tag_name}}) {
1697     ## has a p element in scope
1698     INSCOPE: for (reverse @$open_elements) {
1699     if ($_->[1] eq 'p') {
1700     !!!back-token;
1701     $token = {type => 'end tag', tag_name => 'p'};
1702     return;
1703     } elsif ({
1704     table => 1, caption => 1, td => 1, th => 1,
1705     button => 1, marquee => 1, object => 1, html => 1,
1706     }->{$_->[1]}) {
1707     last INSCOPE;
1708     }
1709     } # INSCOPE
1710    
1711     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1712     if ($token->{tag_name} eq 'pre') {
1713     !!!next-token;
1714     if ($token->{type} eq 'character') {
1715     $token->{data} =~ s/^\x0A//;
1716     unless (length $token->{data}) {
1717     !!!next-token;
1718     }
1719     }
1720     } else {
1721     !!!next-token;
1722     }
1723     return;
1724     } elsif ($token->{tag_name} eq 'form') {
1725     if (defined $form_element) {
1726     !!!parse-error;
1727     ## Ignore the token
1728     } else {
1729     ## has a p element in scope
1730     INSCOPE: for (reverse @$open_elements) {
1731     if ($_->[1] eq 'p') {
1732     !!!back-token;
1733     $token = {type => 'end tag', tag_name => 'p'};
1734     return;
1735     } elsif ({
1736     table => 1, caption => 1, td => 1, th => 1,
1737     button => 1, marquee => 1, object => 1, html => 1,
1738     }->{$_->[1]}) {
1739     last INSCOPE;
1740     }
1741     } # INSCOPE
1742    
1743     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1744     $form_element = $open_elements->[-1]->[0];
1745     !!!next-token;
1746     return;
1747     }
1748     } elsif ($token->{tag_name} eq 'li') {
1749     ## has a p element in scope
1750     INSCOPE: for (reverse @$open_elements) {
1751     if ($_->[1] eq 'p') {
1752     !!!back-token;
1753     $token = {type => 'end tag', tag_name => 'p'};
1754     return;
1755     } elsif ({
1756     table => 1, caption => 1, td => 1, th => 1,
1757     button => 1, marquee => 1, object => 1, html => 1,
1758     }->{$_->[1]}) {
1759     last INSCOPE;
1760     }
1761     } # INSCOPE
1762    
1763     ## Step 1
1764     my $i = -1;
1765     my $node = $open_elements->[$i];
1766     LI: {
1767     ## Step 2
1768     if ($node->[1] eq 'li') {
1769     splice @$open_elements, $i;
1770     last LI;
1771     }
1772    
1773     ## Step 3
1774     if (not $formatting_category->{$node->[1]} and
1775     #not $phrasing_category->{$node->[1]} and
1776     ($special_category->{$node->[1]} or
1777     $scoping_category->{$node->[1]}) and
1778     $node->[1] ne 'address' and $node->[1] ne 'div') {
1779     last LI;
1780     }
1781    
1782     ## Step 4
1783     $i++;
1784     $node = $open_elements->[$i];
1785     redo LI;
1786     } # LI
1787    
1788     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1789     !!!next-token;
1790     return;
1791     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
1792     ## has a p element in scope
1793     INSCOPE: for (reverse @$open_elements) {
1794     if ($_->[1] eq 'p') {
1795     !!!back-token;
1796     $token = {type => 'end tag', tag_name => 'p'};
1797     return;
1798     } elsif ({
1799     table => 1, caption => 1, td => 1, th => 1,
1800     button => 1, marquee => 1, object => 1, html => 1,
1801     }->{$_->[1]}) {
1802     last INSCOPE;
1803     }
1804     } # INSCOPE
1805    
1806     ## Step 1
1807     my $i = -1;
1808     my $node = $open_elements->[$i];
1809     LI: {
1810     ## Step 2
1811     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
1812     splice @$open_elements, $i;
1813     last LI;
1814     }
1815    
1816     ## Step 3
1817     if (not $formatting_category->{$node->[1]} and
1818     #not $phrasing_category->{$node->[1]} and
1819     ($special_category->{$node->[1]} or
1820     $scoping_category->{$node->[1]}) and
1821     $node->[1] ne 'address' and $node->[1] ne 'div') {
1822     last LI;
1823     }
1824    
1825     ## Step 4
1826     $i++;
1827     $node = $open_elements->[$i];
1828     redo LI;
1829     } # LI
1830    
1831     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1832     !!!next-token;
1833     return;
1834     } elsif ($token->{tag_name} eq 'plaintext') {
1835     ## has a p element in scope
1836     INSCOPE: for (reverse @$open_elements) {
1837     if ($_->[1] eq 'p') {
1838     !!!back-token;
1839     $token = {type => 'end tag', tag_name => 'p'};
1840     return;
1841     } elsif ({
1842     table => 1, caption => 1, td => 1, th => 1,
1843     button => 1, marquee => 1, object => 1, html => 1,
1844     }->{$_->[1]}) {
1845     last INSCOPE;
1846     }
1847     } # INSCOPE
1848    
1849     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1850    
1851     $self->{content_model_flag} = 'PLAINTEXT';
1852    
1853     !!!next-token;
1854     return;
1855     } elsif ({
1856     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
1857     }->{$token->{tag_name}}) {
1858     ## has a p element in scope
1859     INSCOPE: for (reverse 0..$#$open_elements) {
1860     my $node = $open_elements->[$_];
1861     if ($node->[1] eq 'p') {
1862     !!!back-token;
1863     $token = {type => 'end tag', tag_name => 'p'};
1864     return;
1865     } elsif ({
1866     table => 1, caption => 1, td => 1, th => 1,
1867     button => 1, marquee => 1, object => 1, html => 1,
1868     }->{$node->[1]}) {
1869     last INSCOPE;
1870     }
1871     } # INSCOPE
1872    
1873     ## has an element in scope
1874     my $i;
1875     INSCOPE: for (reverse 0..$#$open_elements) {
1876     my $node = $open_elements->[$_];
1877     if ({
1878     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
1879     }->{$node->[1]}) {
1880     $i = $_;
1881     last INSCOPE;
1882     } elsif ({
1883     table => 1, caption => 1, td => 1, th => 1,
1884     button => 1, marquee => 1, object => 1, html => 1,
1885     }->{$node->[1]}) {
1886     last INSCOPE;
1887     }
1888     } # INSCOPE
1889    
1890     if (defined $i) {
1891     !!!parse-error;
1892     splice @$open_elements, $i;
1893     }
1894    
1895     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1896    
1897     !!!next-token;
1898     return;
1899     } elsif ($token->{tag_name} eq 'a') {
1900     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
1901     my $node = $active_formatting_elements->[$i];
1902     if ($node->[1] eq 'a') {
1903     !!!parse-error;
1904    
1905     !!!back-token;
1906     $token = {type => 'end tag', tag_name => 'a'};
1907     $formatting_end_tag->($token->{tag_name});
1908    
1909     splice @$active_formatting_elements, $i;
1910     OE: for (reverse 0..$#$open_elements) {
1911     if ($open_elements->[$_]->[0] eq $node->[0]) {
1912     splice @$open_elements, $_;
1913     last OE;
1914     }
1915     } # OE
1916     last AFE;
1917     } elsif ($node->[0] eq '#marker') {
1918     last AFE;
1919     }
1920     } # AFE
1921    
1922     $reconstruct_active_formatting_elements->();
1923    
1924     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1925     push @$active_formatting_elements, $open_elements->[-1];
1926    
1927     !!!next-token;
1928     return;
1929     } elsif ({
1930     b => 1, big => 1, em => 1, font => 1, i => 1,
1931     nobr => 1, s => 1, small => 1, strile => 1,
1932     strong => 1, tt => 1, u => 1,
1933     }->{$token->{tag_name}}) {
1934     $reconstruct_active_formatting_elements->();
1935    
1936     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1937     push @$active_formatting_elements, $open_elements->[-1];
1938    
1939     !!!next-token;
1940     return;
1941     } elsif ($token->{tag_name} eq 'button') {
1942     ## has a button element in scope
1943     INSCOPE: for (reverse 0..$#$open_elements) {
1944     my $node = $open_elements->[$_];
1945     if ($node->[1] eq 'button') {
1946     !!!parse-error;
1947     !!!back-token;
1948     $token = {type => 'end tag', tag_name => 'button'};
1949     return;
1950     } elsif ({
1951     table => 1, caption => 1, td => 1, th => 1,
1952     button => 1, marquee => 1, object => 1, html => 1,
1953     }->{$node->[1]}) {
1954     last INSCOPE;
1955     }
1956     } # INSCOPE
1957    
1958     $reconstruct_active_formatting_elements->();
1959    
1960     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1961     push @$active_formatting_elements, ['#marker', ''];
1962    
1963     !!!next-token;
1964     return;
1965     } elsif ($token->{tag_name} eq 'marquee' or
1966     $token->{tag_name} eq 'object') {
1967     $reconstruct_active_formatting_elements->();
1968    
1969     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1970     push @$active_formatting_elements, ['#marker', ''];
1971    
1972     !!!next-token;
1973     return;
1974     } elsif ($token->{tag_name} eq 'xmp') {
1975     $reconstruct_active_formatting_elements->();
1976    
1977     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1978    
1979     $self->{content_model_flag} = 'CDATA';
1980    
1981     !!!next-token;
1982     return;
1983     } elsif ($token->{tag_name} eq 'tbale') {
1984     ## has a p element in scope
1985     INSCOPE: for (reverse @$open_elements) {
1986     if ($_->[1] eq 'p') {
1987     !!!back-token;
1988     $token = {type => 'end tag', tag_name => 'p'};
1989     return;
1990     } elsif ({
1991     table => 1, caption => 1, td => 1, th => 1,
1992     button => 1, marquee => 1, object => 1, html => 1,
1993     }->{$_->[1]}) {
1994     last INSCOPE;
1995     }
1996     } # INSCOPE
1997    
1998     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
1999    
2000     $insertion_mode = 'in table';
2001    
2002     !!!next-token;
2003     return;
2004     } elsif ({
2005     area => 1, basefont => 1, bgsound => 1, br => 1,
2006     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2007     image => 1,
2008     }->{$token->{tag_name}}) {
2009     if ($token->{tag_name} eq 'image') {
2010     !!!parse-error;
2011     $token->{tag_name} = 'img';
2012     }
2013    
2014     $reconstruct_active_formatting_elements->();
2015    
2016     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2017     pop @$open_elements;
2018    
2019     !!!next-token;
2020     return;
2021     } elsif ($token->{tag_name} eq 'hr') {
2022     ## has a p element in scope
2023     INSCOPE: for (reverse @$open_elements) {
2024     if ($_->[1] eq 'p') {
2025     !!!back-token;
2026     $token = {type => 'end tag', tag_name => 'p'};
2027     return;
2028     } elsif ({
2029     table => 1, caption => 1, td => 1, th => 1,
2030     button => 1, marquee => 1, object => 1, html => 1,
2031     }->{$_->[1]}) {
2032     last INSCOPE;
2033     }
2034     } # INSCOPE
2035    
2036     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2037     pop @$open_elements;
2038    
2039     !!!next-token;
2040     return;
2041     } elsif ($token->{tag_name} eq 'input') {
2042     $reconstruct_active_formatting_elements->();
2043    
2044     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2045     ## TODO: associate with $form_element if defined
2046     pop @$open_elements;
2047    
2048     !!!next-token;
2049     return;
2050     } elsif ($token->{tag_name} eq 'isindex') {
2051     !!!parse-error;
2052    
2053     if (defined $form_element) {
2054     ## Ignore the token
2055     !!!next-token;
2056     return;
2057     } else {
2058     my $at = $token->{attributes};
2059     $at->{name} = {name => 'name', value => 'isindex'};
2060     my @tokens = (
2061     {type => 'start tag', tag_name => 'form'},
2062     {type => 'start tag', tag_name => 'hr'},
2063     {type => 'start tag', tag_name => 'p'},
2064     {type => 'start tag', tag_name => 'label'},
2065     {type => 'character',
2066     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2067     ## TODO: make this configurable
2068     {type => 'start tag', tag_name => 'input', attributes => $at},
2069     #{type => 'character', data => ''}, # SHOULD
2070     {type => 'end tag', tag_name => 'label'},
2071     {type => 'end tag', tag_name => 'p'},
2072     {type => 'start tag', tag_name => 'hr'},
2073     {type => 'end tag', tag_name => 'form'},
2074     );
2075     $token = shift @tokens;
2076     !!!back-token (@tokens);
2077     return;
2078     }
2079     } elsif ({
2080     textarea => 1,
2081     noembed => 1,
2082     noframes => 1,
2083     noscript => 0, ## TODO: 1 if scripting is enabled
2084     }->{$token->{tag_name}}) {
2085     my $tag_name = $token->{tag_name};
2086     my $el;
2087     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2088    
2089     if ($token->{tag_name} eq 'textarea') {
2090     ## TODO: form_element if defined
2091     $self->{content_model_flag} = 'RCDATA';
2092     } else {
2093     $self->{content_model_flag} = 'CDATA';
2094     }
2095    
2096     $insert->($el);
2097    
2098     my $text = '';
2099     !!!next-token;
2100     while ($token->{type} eq 'character') {
2101     $text .= $token->{data};
2102     !!!next-token;
2103     }
2104     if (length $text) {
2105     $el->manakai_append_text ($text);
2106     }
2107    
2108     $self->{content_model_flag} = 'PCDATA';
2109    
2110     if ($token->{type} eq 'end tag' and
2111     $token->{tag_name} eq $tag_name) {
2112     ## Ignore the token
2113     } else {
2114     !!!parse-error;
2115     ## ISSUE: And ignore?
2116     }
2117     !!!next-token;
2118     return;
2119     } elsif ($token->{type} eq 'select') {
2120     $reconstruct_active_formatting_elements->();
2121    
2122     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2123    
2124     $insertion_mode = 'in select';
2125     !!!next-token;
2126     return;
2127     } elsif ({
2128     caption => 1, col => 1, colgroup => 1, frame => 1,
2129     frameset => 1, head => 1, option => 1, optgroup => 1,
2130     tbody => 1, td => 1, tfoot => 1, th => 1,
2131     thead => 1, tr => 1,
2132     }->{$token->{tag_name}}) {
2133     !!!parse-error;
2134     ## Ignore the token
2135     !!!next-token;
2136     return;
2137    
2138     ## ISSUE: An issue on HTML5 new elements in the spec.
2139     } else {
2140     $reconstruct_active_formatting_elements->();
2141    
2142     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2143    
2144     !!!next-token;
2145     return;
2146     }
2147     } elsif ($token->{type} eq 'end tag') {
2148     if ($token->{tag_name} eq 'body') {
2149     if (@$open_elements > 1 and $open_elements->[1]->[1] eq 'body') {
2150     ## ISSUE: There is an issue in the spec.
2151     if ($open_elements->[-1]->[1] ne 'body') {
2152     !!!parse-error;
2153     }
2154     $insertion_mode = 'after body';
2155     !!!next-token;
2156     return;
2157     } else {
2158     !!!parse-error;
2159     ## Ignore the token
2160     !!!next-token;
2161     return;
2162     }
2163     } elsif ($token->{tag_name} eq 'html') {
2164     if (@$open_elements > 1 and $open_elements->[1]->[1] eq 'body') {
2165     ## ISSUE: There is an issue in the spec.
2166     if ($open_elements->[-1]->[1] ne 'body') {
2167     !!!parse-error;
2168     }
2169     $insertion_mode = 'after body';
2170     ## reprocess
2171     return;
2172     } else {
2173     !!!parse-error;
2174     ## Ignore the token
2175     !!!next-token;
2176     return;
2177     }
2178     } elsif ({
2179     address => 1, blockquote => 1, center => 1, dir => 1,
2180     div => 1, dl => 1, fieldset => 1, listing => 1,
2181     menu => 1, ol => 1, pre => 1, ul => 1,
2182     form => 1,
2183     p => 1,
2184     dd => 1, dt => 1, li => 1,
2185     button => 1, marquee => 1, object => 1,
2186     }->{$token->{tag_name}}) {
2187     ## has an element in scope
2188     my $i;
2189     INSCOPE: for (reverse 0..$#$open_elements) {
2190     my $node = $open_elements->[$_];
2191     if ($node->[1] eq $token->{tag_name}) {
2192     ## generate implied end tags
2193     if ({
2194     dd => ($token->{tag_name} ne 'dd'),
2195     dt => ($token->{tag_name} ne 'dt'),
2196     li => ($token->{tag_name} ne 'li'),
2197     p => ($token->{tag_name} ne 'p'),
2198     td => 1, th => 1, tr => 1,
2199     }->{$open_elements->[-1]->[1]}) {
2200     !!!back-token;
2201     $token = {type => 'end tag',
2202     tag_name => $open_elements->[-1]->[1]}; # MUST
2203     return;
2204     }
2205     $i = $_;
2206     last INSCOPE unless $token->{tag_name} eq 'p';
2207     } elsif ({
2208     table => 1, caption => 1, td => 1, th => 1,
2209     button => 1, marquee => 1, object => 1, html => 1,
2210     }->{$node->[1]}) {
2211     last INSCOPE;
2212     }
2213     } # INSCOPE
2214    
2215     if ($open_elements->[-1]->[1] ne $token->{tag_name}) {
2216     !!!parse-error;
2217     }
2218    
2219     splice @$open_elements, $i if defined $i;
2220     undef $form_element if $token->{tag_name} eq 'form';
2221     $clear_up_to_marker->()
2222     if {
2223     button => 1, marquee => 1, object => 1,
2224     }->{$token->{tag_name}};
2225     !!!next-token;
2226     return;
2227     } elsif ({
2228     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2229     }->{$token->{tag_name}}) {
2230     ## has an element in scope
2231     my $i;
2232     INSCOPE: for (reverse 0..$#$open_elements) {
2233     my $node = $open_elements->[$_];
2234     if ({
2235     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2236     }->{$node->[1]}) {
2237     ## generate implied end tags
2238     if ({
2239     dd => 1, dt => 1, li => 1, p => 1,
2240     td => 1, th => 1, tr => 1,
2241     }->{$open_elements->[-1]->[1]}) {
2242     !!!back-token;
2243     $token = {type => 'end tag',
2244     tag_name => $open_elements->[-1]->[1]}; # MUST
2245     return;
2246     }
2247     $i = $_;
2248     last INSCOPE;
2249     } elsif ({
2250     table => 1, caption => 1, td => 1, th => 1,
2251     button => 1, marquee => 1, object => 1, html => 1,
2252     }->{$node->[1]}) {
2253     last INSCOPE;
2254     }
2255     } # INSCOPE
2256    
2257     if ($open_elements->[-1]->[1] ne $token->{tag_name}) {
2258     !!!parse-error;
2259     }
2260    
2261     splice @$open_elements, $i if defined $i;
2262     !!!next-token;
2263     return;
2264     } elsif ({
2265     a => 1,
2266     b => 1, big => 1, em => 1, font => 1, i => 1,
2267     nobr => 1, s => 1, small => 1, strile => 1,
2268     strong => 1, tt => 1, u => 1,
2269     }->{$token->{tag_name}}) {
2270     $formatting_end_tag->($token->{tag_name});
2271     return;
2272     } elsif ({
2273     caption => 1, col => 1, colgroup => 1, frame => 1,
2274     frameset => 1, head => 1, option => 1, optgroup => 1,
2275     tbody => 1, td => 1, tfoot => 1, th => 1,
2276     thead => 1, tr => 1,
2277     area => 1, basefont => 1, bgsound => 1, br => 1,
2278     embed => 1, hr => 1, iframe => 1, image => 1,
2279     img => 1, input => 1, isindex=> 1, noembed => 1,
2280     noframes => 1, param => 1, select => 1, spacer => 1,
2281     table => 1, textarea => 1, wbr => 1,
2282     noscript => 0, ## TODO: if scripting is enabled
2283     }->{$token->{tag_name}}) {
2284     !!!parse-error;
2285     ## Ignore the token
2286     !!!next-token;
2287     return;
2288    
2289     ## ISSUE: Issue on HTML5 new elements in spec
2290    
2291     } else {
2292     ## Step 1
2293     my $node_i = -1;
2294     my $node = $open_elements->[$node_i];
2295    
2296     ## Step 2
2297     S2: {
2298     if ($node->[1] eq $token->{tag_name}) {
2299     ## Step 1
2300     ## generate implied end tags
2301     if ({
2302     dd => 1, dt => 1, li => 1, p => 1,
2303     td => 1, th => 1, tr => 1,
2304     }->{$open_elements->[-1]->[1]}) {
2305     !!!back-token;
2306     $token = {type => 'end tag',
2307     tag_name => $open_elements->[-1]->[1]}; # MUST
2308     return;
2309     }
2310    
2311     ## Step 2
2312     if ($token->{tag_name} ne $open_elements->[-1]->[1]) {
2313     !!!parse-error;
2314     }
2315    
2316     ## Step 3
2317     splice @$open_elements, $node_i;
2318     last S2;
2319     } else {
2320     ## Step 3
2321     if (not $formatting_category->{$node->[1]} and
2322     #not $phrasing_category->{$node->[1]} and
2323     ($special_category->{$node->[1]} or
2324     $scoping_category->{$node->[1]})) {
2325     !!!parse-error;
2326     ## Ignore the token
2327     !!!next-token;
2328     last S2;
2329     }
2330     }
2331    
2332     ## Step 4
2333     $node_i--;
2334     $node = $open_elements->[$node_i];
2335    
2336     ## Step 5;
2337     redo S2;
2338     } # S2
2339     }
2340     }
2341     }; # $in_body
2342    
2343     B: {
2344     if ($phase eq 'initial') {
2345     if ($token->{type} eq 'DOCTYPE') {
2346     if ($token->{error}) {
2347     ## ISSUE: Spec currently left this case undefined.
2348     }
2349     my $doctype = $self->{document}->create_document_type_definition
2350     ($token->{name});
2351     $self->{document}->append_child ($doctype);
2352     $phase = 'root element';
2353     !!!next-token;
2354     redo B;
2355     } elsif ({
2356     comment => 1,
2357     'start tag' => 1,
2358     'end tag' => 1,
2359     'end-of-file' => 1,
2360     }->{$token->{type}}) {
2361     ## ISSUE: Spec currently left this case undefined.
2362     $phase = 'root element';
2363     ## reprocess
2364     redo B;
2365     } elsif ($token->{type} eq 'character') {
2366     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2367     $self->{document}->manakai_append_text ($1);
2368     ## ISSUE: DOM3 Core does not allow Document > Text
2369     unless (length $token->{data}) {
2370     ## Stay in the phase
2371     !!!next-token;
2372     redo B;
2373     }
2374     }
2375     ## ISSUE: Spec currently left this case undefined.
2376     $phase = 'root element';
2377     ## reprocess
2378     redo B;
2379     } else {
2380     die "$0: $token->{type}: Unknown token";
2381     }
2382     } elsif ($phase eq 'root element') {
2383     if ($token->{type} eq 'DOCTYPE') {
2384     !!!parse-error;
2385     ## Ignore the token
2386     ## Stay in the phase
2387     !!!next-token;
2388     redo B;
2389     } elsif ($token->{type} eq 'comment') {
2390     my $comment = $self->{document}->create_comment ($token->{data});
2391     $self->{document}->append_child ($comment);
2392     ## Stay in the phase
2393     !!!next-token;
2394     redo B;
2395     } elsif ($token->{type} eq 'character') {
2396     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2397     $self->{document}->manakai_append_text ($1);
2398     ## ISSUE: DOM3 Core does not allow Document > Text
2399     unless (length $token->{data}) {
2400     ## Stay in the phase
2401     !!!next-token;
2402     redo B;
2403     }
2404     }
2405     #
2406     } elsif ({
2407     'start tag' => 1,
2408     'end tag' => 1,
2409     'end-of-file' => 1,
2410     }->{$token->{type}}) {
2411     ## ISSUE: There is an issue in the spec
2412     #
2413     } else {
2414     die "$0: $token->{type}: Unknown token";
2415     }
2416     my $root_element; !!!create-element ($root_element, 'html');
2417     $self->{document}->append_child ($root_element);
2418     $open_elements = [[$root_element, 'html']];
2419     $phase = 'main';
2420     ## reprocess
2421     redo B;
2422     } elsif ($phase eq 'main') {
2423     if ($token->{type} eq 'DOCTYPE') {
2424     !!!parse-error;
2425     ## Ignore the token
2426     ## Stay in the phase
2427     !!!next-token;
2428     redo B;
2429     } elsif ($token->{type} eq 'start tag' and
2430     $token->{tag_name} eq 'html') {
2431     ## TODO: unless it is the first start tag token, parse-error
2432     my $top_el = $open_elements->[0]->[0];
2433     for my $attr_name (keys %{$token->{attributes}}) {
2434     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
2435     $top_el->set_attribute_ns (undef, [undef, $attr_name],
2436     $token->{attributes}->{value});
2437     }
2438     }
2439     !!!next-token;
2440     redo B;
2441     } elsif ($token->{type} eq 'end-of-file') {
2442     ## Generate implied end tags
2443     if ({
2444     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
2445     }->{$open_elements->[-1]->[1]}) {
2446     !!!back-token;
2447     $token = {type => 'end tag', tag_name => $open_elements->[-1]->[1]};
2448     redo B;
2449     }
2450    
2451     if (@$open_elements > 2 or
2452     (@$open_elements == 2 and $open_elements->[1]->[1] ne 'body')) {
2453     !!!parse-error;
2454     } else {
2455     ## TODO: inner_html parser and @$open_elements > 1 and $open_elements->[1] ne 'body', then parse-error
2456     }
2457    
2458     ## Stop parsing
2459     last B;
2460    
2461     ## ISSUE: There is an issue in the spec.
2462     } else {
2463     if ($insertion_mode eq 'before head') {
2464     if ($token->{type} eq 'character') {
2465     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2466     $open_elements->[-1]->[0]->manakai_append_text ($1);
2467     unless (length $token->{data}) {
2468     !!!next-token;
2469     redo B;
2470     }
2471     }
2472     ## As if <head>
2473     !!!create-element ($head_element, 'head');
2474     $open_elements->[-1]->[0]->append_child ($head_element);
2475     push @$open_elements, [$head_element, 'head'];
2476     $insertion_mode = 'in head';
2477     ## reprocess
2478     redo B;
2479     } elsif ($token->{type} eq 'comment') {
2480     my $comment = $self->{document}->create_comment ($token->{data});
2481     $open_elements->[-1]->[0]->append_child ($comment);
2482     !!!next-token;
2483     redo B;
2484     } elsif ($token->{type} eq 'start tag') {
2485     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
2486     !!!create-element ($head_element, 'head', $attr);
2487     $open_elements->[-1]->[0]->append_child ($head_element);
2488     push @$open_elements, [$head_element, 'head'];
2489     $insertion_mode = 'in head';
2490     if ($token->{tag_name} eq 'head') {
2491     !!!next-token;
2492     #} elsif ({
2493     # base => 1, link => 1, meta => 1,
2494     # script => 1, style => 1, title => 1,
2495     # }->{$token->{tag_name}}) {
2496     # ## reprocess
2497     } else {
2498     ## reprocess
2499     }
2500     redo B;
2501     } elsif ($token->{type} eq 'end tag') {
2502     if ($token->{tag_name} eq 'html') {
2503     ## As if <head>
2504     !!!create-element ($head_element, 'head');
2505     $open_elements->[-1]->[0]->append_child ($head_element);
2506     push @$open_elements, [$head_element, 'head'];
2507     $insertion_mode = 'in head';
2508     ## reprocess
2509     redo B;
2510     } else {
2511     !!!parse-error;
2512     ## Ignore the token
2513     redo B;
2514     }
2515     } else {
2516     die "$0: $token->{type}: Unknown type";
2517     }
2518     } elsif ($insertion_mode eq 'in head') {
2519     if ($token->{type} eq 'character') {
2520     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2521     $open_elements->[-1]->[0]->manakai_append_text ($1);
2522     unless (length $token->{data}) {
2523     !!!next-token;
2524     redo B;
2525     }
2526     }
2527    
2528     #
2529     } elsif ($token->{type} eq 'comment') {
2530     my $comment = $self->{document}->create_comment ($token->{data});
2531     $open_elements->[-1]->[0]->append_child ($comment);
2532     !!!next-token;
2533     redo B;
2534     } elsif ($token->{type} eq 'start tag') {
2535     if ($token->{tag_name} eq 'title') {
2536     my $title_el; !!!create-element ($title_el, 'title');
2537     (defined $head_element ? $head_element : $open_elements->[-1]->[0])
2538     ->append_child ($title_el);
2539     $self->{content_model_flag} = 'RCDATA';
2540    
2541     my $text = '';
2542     !!!next-token;
2543     while ($token->{type} eq 'character') {
2544     $text .= $token->{data};
2545     !!!next-token;
2546     }
2547     if (length $text) {
2548     $title_el->manakai_append_text ($text);
2549     }
2550    
2551     $self->{content_model_flag} = 'PCDATA';
2552    
2553     if ($token->{type} eq 'end tag' and
2554     $token->{tag_name} eq 'title') {
2555     ## Ignore the token
2556     } else {
2557     !!!parse-error;
2558     ## ISSUE: And ignore?
2559     }
2560     !!!next-token;
2561     redo B;
2562     } elsif ($token->{tag_name} eq 'style') {
2563     $style_start_tag->();
2564     redo B;
2565     } elsif ($token->{tag_name} eq 'script') {
2566     $script_start_tag->();
2567     redo B;
2568     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
2569     ## NOTE: There are "as if in head" code clones
2570     my $el;
2571     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2572     (defined $head_element ? $head_element : $open_elements->[-1]->[0])
2573     ->append_child ($el);
2574    
2575     ## ISSUE: Issue on magical <base> in the spec
2576    
2577     !!!next-token;
2578     redo B;
2579     } elsif ($token->{tag_name} eq 'head') {
2580     !!!parse-error;
2581     ## Ignore the token
2582     !!!next-token;
2583     redo B;
2584     } else {
2585     #
2586     }
2587     } elsif ($token->{type} eq 'end tag') {
2588     if ($token->{tag_name} eq 'head') {
2589     if ($open_elements->[-1]->[1] eq 'head') {
2590     pop @$open_elements;
2591     } else {
2592     !!!parse-error;
2593     }
2594     $insertion_mode = 'after head';
2595     !!!next-token;
2596     redo B;
2597     } elsif ($token->{tag_name} eq 'html') {
2598     #
2599     } else {
2600     !!!parse-error;
2601     ## Ignore the token
2602     !!!next-token;
2603     redo B;
2604     }
2605     } else {
2606     #
2607     }
2608    
2609     if ($open_elements->[-1]->[1] eq 'head') {
2610     ## As if </head>
2611     pop @$open_elements;
2612     }
2613     $insertion_mode = 'after head';
2614     ## reprocess
2615     redo B;
2616    
2617     ## ISSUE: An issue in the spec.
2618     } elsif ($insertion_mode eq 'after head') {
2619     if ($token->{type} eq 'character') {
2620     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2621     $open_elements->[-1]->[0]->manakai_append_text ($1);
2622     unless (length $token->{data}) {
2623     !!!next-token;
2624     redo B;
2625     }
2626     }
2627    
2628     #
2629     } elsif ($token->{type} eq 'comment') {
2630     my $comment = $self->{document}->create_comment ($token->{data});
2631     $open_elements->[-1]->[0]->append_child ($comment);
2632     !!!next-token;
2633     redo B;
2634     } elsif ($token->{type} eq 'start tag') {
2635     if ($token->{tag_name} eq 'body') {
2636     !!!insert-element ('body', $token->{attributes});
2637     $insertion_mode = 'in body';
2638     !!!next-token;
2639     redo B;
2640     } elsif ($token->{tag_name} eq 'frameset') {
2641     !!!insert-element ('frameset', $token->{attributes});
2642     $insertion_mode = 'in frameset';
2643     !!!next-token;
2644     redo B;
2645     } elsif ({
2646     base => 1, link => 1, meta => 1,
2647     script=> 1, style => 1, title => 1,
2648     }->{$token->{tag_name}}) {
2649     !!!parse-error;
2650     $insertion_mode = 'in head';
2651     ## reprocess
2652     redo B;
2653     } else {
2654     #
2655     }
2656     } else {
2657     #
2658     }
2659    
2660     ## As if <body>
2661     !!!insert-element ('body');
2662     $insertion_mode = 'in body';
2663     ## reprocess
2664     redo B;
2665     } elsif ($insertion_mode eq 'in body') {
2666     if ($token->{type} eq 'character') {
2667     ## NOTE: There is a code clone of "character in body".
2668     $reconstruct_active_formatting_elements->();
2669    
2670     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
2671    
2672     !!!next-token;
2673     redo B;
2674     } elsif ($token->{type} eq 'comment') {
2675     ## NOTE: There is a code clone of "comment in body".
2676     my $comment = $self->{document}->create_comment ($token->{data});
2677     $open_elements->[-1]->[0]->append_child ($comment);
2678     !!!next-token;
2679     redo B;
2680     } else {
2681     $in_body->(sub {
2682     $open_elements->[-1]->[0]->append_child (shift);
2683     });
2684     redo B;
2685     }
2686     } elsif ($insertion_mode eq 'in table') {
2687     if ($token->{type} eq 'character') {
2688     $reconstruct_active_formatting_elements->();
2689    
2690     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
2691    
2692     !!!next-token;
2693     redo B;
2694     } elsif ($token->{type} eq 'comment') {
2695     my $comment = $self->{document}->create_comment ($token->{data});
2696     $open_elements->[-1]->[0]->append_child ($comment);
2697     !!!next-token;
2698     redo B;
2699     } elsif ($token->{type} eq 'start tag') {
2700     if ({
2701     caption => 1,
2702     colgroup => 1,
2703     tbody => 1, tfoot => 1, thead => 1,
2704     }->{$token->{tag_name}}) {
2705     ## Clear back to table context
2706     while ($open_elements->[-1]->[1] ne 'table' and
2707     $open_elements->[-1]->[1] ne 'html') {
2708     !!!parse-error;
2709     pop @$open_elements;
2710     }
2711    
2712     push @$active_formatting_elements, ['#marker', '']
2713     if $token->{tag_name} eq 'caption';
2714    
2715     !!!insert-element ($token->{tag_name}, $token->{attributes});
2716     $insertion_mode = {
2717     caption => 'in caption',
2718     colgroup => 'in column group',
2719     tbody => 'in table body',
2720     tfoot => 'in table body',
2721     thead => 'in table body',
2722     }->{$token->{tag_name}};
2723     !!!next-token;
2724     redo B;
2725     } elsif ({
2726     col => 1,
2727     td => 1, th => 1, tr => 1,
2728     }->{$token->{tag_name}}) {
2729     ## Clear back to table context
2730     while ($open_elements->[-1]->[1] ne 'table' and
2731     $open_elements->[-1]->[1] ne 'html') {
2732     !!!parse-error;
2733     pop @$open_elements;
2734     }
2735    
2736     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
2737     $insertion_mode = $token->{tag_name} eq 'col'
2738     ? 'in column group' : 'in table body';
2739     ## reprocess
2740     redo B;
2741     } elsif ($token->{tag_name} eq 'table') {
2742     ## NOTE: There are code clones for this "table in table"
2743     !!!parse-error;
2744    
2745     ## As if </table>
2746     ## have a table element in table scope
2747     my $i;
2748     INSCOPE: for (reverse 0..$#$open_elements) {
2749     my $node = $open_elements->[$_];
2750     if ($node->[1] eq 'table') {
2751     $i = $_;
2752     last INSCOPE;
2753     } elsif ({
2754     table => 1, html => 1,
2755     }->{$node->[1]}) {
2756     last INSCOPE;
2757     }
2758     } # INSCOPE
2759     unless (defined $i) {
2760     !!!parse-error;
2761     ## Ignore tokens </table><table>
2762     !!!next-token;
2763     redo B;
2764     }
2765    
2766     ## generate implied end tags
2767     if ({
2768     dd => 1, dt => 1, li => 1, p => 1,
2769     td => 1, th => 1, tr => 1,
2770     }->{$open_elements->[-1]->[1]}) {
2771     !!!back-token; # <table>
2772     $token = {type => 'end tag', tag_name => 'table'};
2773     !!!back-token;
2774     $token = {type => 'end tag',
2775     tag_name => $open_elements->[-1]->[1]}; # MUST
2776     redo B;
2777     }
2778    
2779     if ($open_elements->[-1]->[1] ne 'table') {
2780     !!!parse-error;
2781     }
2782    
2783     splice @$open_elements, $i;
2784    
2785     $reset_insertion_mode->();
2786    
2787     ## reprocess
2788     redo B;
2789     } else {
2790     #
2791     }
2792     } elsif ($token->{type} eq 'end tag') {
2793     if ($token->{tag_name} eq 'table') {
2794     ## have a table element in table scope
2795     my $i;
2796     INSCOPE: for (reverse 0..$#$open_elements) {
2797     my $node = $open_elements->[$_];
2798     if ($node->[1] eq $token->{tag_name}) {
2799     $i = $_;
2800     last INSCOPE;
2801     } elsif ({
2802     table => 1, html => 1,
2803     }->{$node->[1]}) {
2804     last INSCOPE;
2805     }
2806     } # INSCOPE
2807     unless (defined $i) {
2808     !!!parse-error;
2809     ## Ignore the token
2810     !!!next-token;
2811     redo B;
2812     }
2813    
2814     ## generate implied end tags
2815     if ({
2816     dd => 1, dt => 1, li => 1, p => 1,
2817     td => 1, th => 1, tr => 1,
2818     }->{$open_elements->[-1]->[1]}) {
2819     !!!back-token;
2820     $token = {type => 'end tag',
2821     tag_name => $open_elements->[-1]->[1]}; # MUST
2822     redo B;
2823     }
2824    
2825     if ($open_elements->[-1]->[1] ne 'table') {
2826     !!!parse-error;
2827     }
2828    
2829     splice @$open_elements, $i;
2830    
2831     $reset_insertion_mode->();
2832    
2833     !!!next-token;
2834     redo B;
2835     } elsif ({
2836     body => 1, caption => 1, col => 1, colgroup => 1,
2837     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
2838     thead => 1, tr => 1,
2839     }->{$token->{tag_name}}) {
2840     !!!parse-error;
2841     ## Ignore the token
2842     !!!next-token;
2843     redo B;
2844     } else {
2845     #
2846     }
2847     } else {
2848     #
2849     }
2850    
2851     ## NOTE: There are code clones of "misc in table".
2852     !!!parse-error;
2853     $in_body->(sub {
2854     my $child = shift;
2855     if ({
2856     table => 1, tbody => 1, tfoot => 1,
2857     thead => 1, tr => 1,
2858     }->{$open_elements->[-1]->[1]}) {
2859     # MUST
2860     my $foster_parent_element;
2861     my $next_sibling;
2862     OE: for (reverse 0..$#$open_elements) {
2863     if ($open_elements->[$_]->[1] eq 'table') {
2864     my $parent = $open_elements->[$_]->[0]->parent_node;
2865     if (defined $parent and $parent->node_type == 1) {
2866     $foster_parent_element = $parent;
2867     $next_sibling = $open_elements->[$_]->[0];
2868     } else {
2869     $foster_parent_element
2870     = $open_elements->[$_ - 1]->[0];
2871     }
2872     last OE;
2873     }
2874     } # OE
2875     $foster_parent_element = $open_elements->[0]->[0]
2876     unless defined $foster_parent_element;
2877     $foster_parent_element->insert_before
2878     ($child, $next_sibling);
2879     } else {
2880     $open_elements->[-1]->[0]->append_child ($child);
2881     }
2882     });
2883     redo B;
2884     } elsif ($insertion_mode eq 'in caption') {
2885     if ($token->{type} eq 'start tag') {
2886     if ({
2887     caption => 1, col => 1, colgroup => 1, tbody => 1,
2888     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2889     }->{$token->{tag_name}}) {
2890     !!!parse-error;
2891    
2892     ## As if </caption>
2893     ## have a table element in table scope
2894     my $i;
2895     INSCOPE: for (reverse 0..$#$open_elements) {
2896     my $node = $open_elements->[$_];
2897     if ($node->[1] eq 'caption') {
2898     $i = $_;
2899     last INSCOPE;
2900     } elsif ({
2901     table => 1, html => 1,
2902     }->{$node->[1]}) {
2903     last INSCOPE;
2904     }
2905     } # INSCOPE
2906     unless (defined $i) {
2907     !!!parse-error;
2908     ## Ignore the token
2909     !!!next-token;
2910     redo B;
2911     }
2912    
2913     ## generate implied end tags
2914     if ({
2915     dd => 1, dt => 1, li => 1, p => 1,
2916     td => 1, th => 1, tr => 1,
2917     }->{$open_elements->[-1]->[1]}) {
2918     !!!back-token; # <?>
2919     $token = {type => 'end tag', tag_name => 'caption'};
2920     !!!back-token;
2921     $token = {type => 'end tag',
2922     tag_name => $open_elements->[-1]->[1]}; # MUST
2923     redo B;
2924     }
2925    
2926     if ($open_elements->[-1]->[1] ne 'caption') {
2927     !!!parse-error;
2928     }
2929    
2930     splice @$open_elements, $i;
2931    
2932     $clear_up_to_marker->();
2933    
2934     $insertion_mode = 'in table';
2935    
2936     ## reprocess
2937     redo B;
2938     } else {
2939     #
2940     }
2941     } elsif ($token->{type} eq 'end tag') {
2942     if ($token->{tag_name} eq 'caption') {
2943     ## have a table element in table scope
2944     my $i;
2945     INSCOPE: for (reverse 0..$#$open_elements) {
2946     my $node = $open_elements->[$_];
2947     if ($node->[1] eq $token->{tag_name}) {
2948     $i = $_;
2949     last INSCOPE;
2950     } elsif ({
2951     table => 1, html => 1,
2952     }->{$node->[1]}) {
2953     last INSCOPE;
2954     }
2955     } # INSCOPE
2956     unless (defined $i) {
2957     !!!parse-error;
2958     ## Ignore the token
2959     !!!next-token;
2960     redo B;
2961     }
2962    
2963     ## generate implied end tags
2964     if ({
2965     dd => 1, dt => 1, li => 1, p => 1,
2966     td => 1, th => 1, tr => 1,
2967     }->{$open_elements->[-1]->[1]}) {
2968     !!!back-token;
2969     $token = {type => 'end tag',
2970     tag_name => $open_elements->[-1]->[1]}; # MUST
2971     redo B;
2972     }
2973    
2974     if ($open_elements->[-1]->[1] ne 'caption') {
2975     !!!parse-error;
2976     }
2977    
2978     splice @$open_elements, $i;
2979    
2980     $clear_up_to_marker->();
2981    
2982     $insertion_mode = 'in table';
2983    
2984     !!!next-token;
2985     redo B;
2986     } elsif ($token->{tag_name} eq 'table') {
2987     !!!parse-error;
2988    
2989     ## As if </caption>
2990     ## have a table element in table scope
2991     my $i;
2992     INSCOPE: for (reverse 0..$#$open_elements) {
2993     my $node = $open_elements->[$_];
2994     if ($node->[1] eq 'caption') {
2995     $i = $_;
2996     last INSCOPE;
2997     } elsif ({
2998     table => 1, html => 1,
2999     }->{$node->[1]}) {
3000     last INSCOPE;
3001     }
3002     } # INSCOPE
3003     unless (defined $i) {
3004     !!!parse-error;
3005     ## Ignore the token
3006     !!!next-token;
3007     redo B;
3008     }
3009    
3010     ## generate implied end tags
3011     if ({
3012     dd => 1, dt => 1, li => 1, p => 1,
3013     td => 1, th => 1, tr => 1,
3014     }->{$open_elements->[-1]->[1]}) {
3015     !!!back-token; # </table>
3016     $token = {type => 'end tag', tag_name => 'caption'};
3017     !!!back-token;
3018     $token = {type => 'end tag',
3019     tag_name => $open_elements->[-1]->[1]}; # MUST
3020     redo B;
3021     }
3022    
3023     if ($open_elements->[-1]->[1] ne 'caption') {
3024     !!!parse-error;
3025     }
3026    
3027     splice @$open_elements, $i;
3028    
3029     $clear_up_to_marker->();
3030    
3031     $insertion_mode = 'in table';
3032    
3033     ## reprocess
3034     redo B;
3035     } elsif ({
3036     body => 1, col => 1, colgroup => 1,
3037     html => 1, tbody => 1, td => 1, tfoot => 1,
3038     th => 1, thead => 1, tr => 1,
3039     }->{$token->{tag_name}}) {
3040     !!!parse-error;
3041     ## Ignore the token
3042     redo B;
3043     } else {
3044     #
3045     }
3046     } else {
3047     #
3048     }
3049    
3050     $in_body->(sub {
3051     $open_elements->[-1]->[0]->append_child (shift);
3052     });
3053     redo B;
3054     } elsif ($insertion_mode eq 'in column group') {
3055     if ($token->{type} eq 'character') {
3056     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3057     $open_elements->[-1]->[0]->manakai_append_text ($1);
3058     unless (length $token->{data}) {
3059     !!!next-token;
3060     redo B;
3061     }
3062     }
3063    
3064     #
3065     } elsif ($token->{type} eq 'comment') {
3066     my $comment = $self->{document}->create_comment ($token->{data});
3067     $open_elements->[-1]->[0]->append_child ($comment);
3068     !!!next-token;
3069     redo B;
3070     } elsif ($token->{type} eq 'start tag') {
3071     if ($token->{tag_name} eq 'col') {
3072     !!!insert-element ($token->{tag_name}, $token->{attributes});
3073     pop @$open_elements;
3074     !!!next-token;
3075     redo B;
3076     } else {
3077     #
3078     }
3079     } elsif ($token->{type} eq 'end tag') {
3080     if ($token->{tag_name} eq 'colgroup') {
3081     if ($open_elements->[-1]->[1] eq 'html') {
3082     !!!parse-error;
3083     ## Ignore the token
3084     !!!next-token;
3085     redo B;
3086     } else {
3087     pop @$open_elements; # colgroup
3088     $insertion_mode = 'in table';
3089     !!!next-token;
3090     redo B;
3091     }
3092     } elsif ($token->{tag_name} eq 'col') {
3093     !!!parse-error;
3094     ## Ignore the token
3095     !!!next-token;
3096     redo B;
3097     } else {
3098     #
3099     }
3100     } else {
3101     #
3102     }
3103    
3104     ## As if </colgroup>
3105     if ($open_elements->[-1]->[1] eq 'html') {
3106     !!!parse-error;
3107     ## Ignore the token
3108     !!!next-token;
3109     redo B;
3110     } else {
3111     pop @$open_elements; # colgroup
3112     $insertion_mode = 'in table';
3113     ## reprocess
3114     redo B;
3115     }
3116     } elsif ($insertion_mode eq 'in table body') {
3117     if ($token->{type} eq 'character') {
3118     ## Copied from 'in table'
3119     $reconstruct_active_formatting_elements->();
3120    
3121     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3122    
3123     !!!next-token;
3124     redo B;
3125     } elsif ($token->{type} eq 'comment') {
3126     ## Copied from 'in table'
3127     my $comment = $self->{document}->create_comment ($token->{data});
3128     $open_elements->[-1]->[0]->append_child ($comment);
3129     !!!next-token;
3130     redo B;
3131     } elsif ($token->{type} eq 'start tag') {
3132     if ({
3133     tr => 1,
3134     th => 1, td => 1,
3135     }->{$token->{tag_name}}) {
3136     ## Clear back to table body context
3137     while (not {
3138     tbody => 1, tfoot => 1, thead => 1, html => 1,
3139     }->{$open_elements->[-1]->[1]}) {
3140     !!!parse-error;
3141     pop @$open_elements;
3142     }
3143    
3144     $insertion_mode = 'in row';
3145     if ($token->{tag_name} eq 'tr') {
3146     !!!insert-element ($token->{tag_name}, $token->{attributes});
3147     !!!next-token;
3148     } else {
3149     !!!insert-element ('tr');
3150     ## reprocess
3151     }
3152     redo B;
3153     } elsif ({
3154     caption => 1, col => 1, colgroup => 1,
3155     tbody => 1, tfoot => 1, thead => 1,
3156     }->{$token->{tag_name}}) {
3157     ## have an element in table scope
3158     my $i;
3159     INSCOPE: for (reverse 0..$#$open_elements) {
3160     my $node = $open_elements->[$_];
3161     if ({
3162     tbody => 1, thead => 1, tfoot => 1,
3163     }->{$node->[1]}) {
3164     $i = $_;
3165     last INSCOPE;
3166     } elsif ({
3167     table => 1, html => 1,
3168     }->{$node->[1]}) {
3169     last INSCOPE;
3170     }
3171     } # INSCOPE
3172     unless (defined $i) {
3173     !!!parse-error;
3174     ## Ignore the token
3175     !!!next-token;
3176     redo B;
3177     }
3178    
3179     ## Clear back to table body context
3180     while (not {
3181     tbody => 1, tfoot => 1, thead => 1, html => 1,
3182     }->{$open_elements->[-1]->[1]}) {
3183     !!!parse-error;
3184     pop @$open_elements;
3185     }
3186    
3187     ## As if <{current node}>
3188     ## have an element in table scope
3189     ## true by definition
3190    
3191     ## Clear back to table body context
3192     ## nop by definition
3193    
3194     pop @$open_elements;
3195     $insertion_mode = 'in table';
3196     ## reprocess
3197     redo B;
3198     } elsif ($token->{tag_name} eq 'table') {
3199     ## NOTE: This is a code clone of "table in table"
3200     !!!parse-error;
3201    
3202     ## As if </table>
3203     ## have a table element in table scope
3204     my $i;
3205     INSCOPE: for (reverse 0..$#$open_elements) {
3206     my $node = $open_elements->[$_];
3207     if ($node->[1] eq 'table') {
3208     $i = $_;
3209     last INSCOPE;
3210     } elsif ({
3211     table => 1, html => 1,
3212     }->{$node->[1]}) {
3213     last INSCOPE;
3214     }
3215     } # INSCOPE
3216     unless (defined $i) {
3217     !!!parse-error;
3218     ## Ignore tokens </table><table>
3219     !!!next-token;
3220     redo B;
3221     }
3222    
3223     ## generate implied end tags
3224     if ({
3225     dd => 1, dt => 1, li => 1, p => 1,
3226     td => 1, th => 1, tr => 1,
3227     }->{$open_elements->[-1]->[1]}) {
3228     !!!back-token; # <table>
3229     $token = {type => 'end tag', tag_name => 'table'};
3230     !!!back-token;
3231     $token = {type => 'end tag',
3232     tag_name => $open_elements->[-1]->[1]}; # MUST
3233     redo B;
3234     }
3235    
3236     if ($open_elements->[-1]->[1] ne 'table') {
3237     !!!parse-error;
3238     }
3239    
3240     splice @$open_elements, $i;
3241    
3242     $reset_insertion_mode->();
3243    
3244     ## reprocess
3245     redo B;
3246     } else {
3247     #
3248     }
3249     } elsif ($token->{type} eq 'end tag') {
3250     if ({
3251     tbody => 1, tfoot => 1, thead => 1,
3252     }->{$token->{tag_name}}) {
3253     ## have an element in table scope
3254     my $i;
3255     INSCOPE: for (reverse 0..$#$open_elements) {
3256     my $node = $open_elements->[$_];
3257     if ($node->[1] eq $token->{tag_name}) {
3258     $i = $_;
3259     last INSCOPE;
3260     } elsif ({
3261     table => 1, html => 1,
3262     }->{$node->[1]}) {
3263     last INSCOPE;
3264     }
3265     } # INSCOPE
3266     unless (defined $i) {
3267     !!!parse-error;
3268     ## Ignore the token
3269     !!!next-token;
3270     redo B;
3271     }
3272    
3273     ## Clear back to table body context
3274     while (not {
3275     tbody => 1, tfoot => 1, thead => 1, html => 1,
3276     }->{$open_elements->[-1]->[1]}) {
3277     !!!parse-error;
3278     pop @$open_elements;
3279     }
3280    
3281     pop @$open_elements;
3282     $insertion_mode = 'in table';
3283     !!!next-token;
3284     redo B;
3285     } elsif ($token->{tag_name} eq 'table') {
3286     ## have an element in table scope
3287     my $i;
3288     INSCOPE: for (reverse 0..$#$open_elements) {
3289     my $node = $open_elements->[$_];
3290     if ({
3291     tbody => 1, thead => 1, tfoot => 1,
3292     }->{$node->[1]}) {
3293     $i = $_;
3294     last INSCOPE;
3295     } elsif ({
3296     table => 1, html => 1,
3297     }->{$node->[1]}) {
3298     last INSCOPE;
3299     }
3300     } # INSCOPE
3301     unless (defined $i) {
3302     !!!parse-error;
3303     ## Ignore the token
3304     !!!next-token;
3305     redo B;
3306     }
3307    
3308     ## Clear back to table body context
3309     while (not {
3310     tbody => 1, tfoot => 1, thead => 1, html => 1,
3311     }->{$open_elements->[-1]->[1]}) {
3312     !!!parse-error;
3313     pop @$open_elements;
3314     }
3315    
3316     ## As if <{current node}>
3317     ## have an element in table scope
3318     ## true by definition
3319    
3320     ## Clear back to table body context
3321     ## nop by definition
3322    
3323     pop @$open_elements;
3324     $insertion_mode = 'in table';
3325     ## reprocess
3326     redo B;
3327     } elsif ({
3328     body => 1, caption => 1, col => 1, colgroup => 1,
3329     html => 1, td => 1, th => 1, tr => 1,
3330     }->{$token->{tag_name}}) {
3331     !!!parse-error;
3332     ## Ignore the token
3333     !!!next-token;
3334     redo B;
3335     } else {
3336     #
3337     }
3338     } else {
3339     #
3340     }
3341    
3342     ## As if in table
3343     ## NOTE: This is a code clone of "misc in table".
3344     !!!parse-error;
3345     $in_body->(sub {
3346     my $child = shift;
3347     if ({
3348     table => 1, tbody => 1, tfoot => 1,
3349     thead => 1, tr => 1,
3350     }->{$open_elements->[-1]->[1]}) {
3351     # MUST
3352     my $foster_parent_element;
3353     my $next_sibling;
3354     OE: for (reverse 0..$#$open_elements) {
3355     if ($open_elements->[$_]->[1] eq 'table') {
3356     my $parent = $open_elements->[$_]->[0]->parent_node;
3357     if (defined $parent and $parent->node_type == 1) {
3358     $foster_parent_element = $parent;
3359     $next_sibling = $open_elements->[$_]->[0];
3360     } else {
3361     $foster_parent_element
3362     = $open_elements->[$_ - 1]->[0];
3363     }
3364     last OE;
3365     }
3366     } # OE
3367     $foster_parent_element = $open_elements->[0]->[0]
3368     unless defined $foster_parent_element;
3369     $foster_parent_element->insert_before
3370     ($child, $next_sibling);
3371     } else {
3372     $open_elements->[-1]->[0]->append_child ($child);
3373     }
3374     });
3375     redo B;
3376     } elsif ($insertion_mode eq 'in row') {
3377     if ($token->{type} eq 'character') {
3378     ## Copied from 'in table'
3379     $reconstruct_active_formatting_elements->();
3380    
3381     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3382    
3383     !!!next-token;
3384     redo B;
3385     } elsif ($token->{type} eq 'comment') {
3386     ## Copied from 'in table'
3387     my $comment = $self->{document}->create_comment ($token->{data});
3388     $open_elements->[-1]->[0]->append_child ($comment);
3389     !!!next-token;
3390     redo B;
3391     } elsif ($token->{type} eq 'start tag') {
3392     if ($token->{tag_name} eq 'th' or
3393     $token->{tag_name} eq 'td') {
3394     ## Clear back to table row context
3395     while (not {
3396     th => 1, td => 1, html => 1,
3397     }->{$open_elements->[-1]->[1]}) {
3398     !!!parse-error;
3399     pop @$open_elements;
3400     }
3401    
3402     !!!insert-element ($token->{tag_name}, $token->{attributes});
3403     $insertion_mode = 'in cell';
3404    
3405     push @$active_formatting_elements, ['#marker', ''];
3406    
3407     !!!next-token;
3408     redo B;
3409     } elsif ({
3410     caption => 1, col => 1, colgroup => 1,
3411     tbody => 1, tfoot => 1, thead => 1, tr => 1,
3412     }->{$token->{tag_name}}) {
3413     ## As if </tr>
3414     ## have an element in table scope
3415     my $i;
3416     INSCOPE: for (reverse 0..$#$open_elements) {
3417     my $node = $open_elements->[$_];
3418     if ($node->[1] eq 'tr') {
3419     $i = $_;
3420     last INSCOPE;
3421     } elsif ({
3422     table => 1, html => 1,
3423     }->{$node->[1]}) {
3424     last INSCOPE;
3425     }
3426     } # INSCOPE
3427     unless (defined $i) {
3428     !!!parse-error;
3429     ## Ignore the token
3430     !!!next-token;
3431     redo B;
3432     }
3433    
3434     ## Clear back to table row context
3435     while (not {
3436     tr => 1, html => 1,
3437     }->{$open_elements->[-1]->[1]}) {
3438     !!!parse-error;
3439     pop @$open_elements;
3440     }
3441    
3442     pop @$open_elements; # tr
3443     $insertion_mode = 'in table body';
3444     ## reprocess
3445     redo B;
3446     } elsif ($token->{tag_name} eq 'table') {
3447     ## NOTE: This is a code clone of "table in table"
3448     !!!parse-error;
3449    
3450     ## As if </table>
3451     ## have a table element in table scope
3452     my $i;
3453     INSCOPE: for (reverse 0..$#$open_elements) {
3454     my $node = $open_elements->[$_];
3455     if ($node->[1] eq 'table') {
3456     $i = $_;
3457     last INSCOPE;
3458     } elsif ({
3459     table => 1, html => 1,
3460     }->{$node->[1]}) {
3461     last INSCOPE;
3462     }
3463     } # INSCOPE
3464     unless (defined $i) {
3465     !!!parse-error;
3466     ## Ignore tokens </table><table>
3467     !!!next-token;
3468     redo B;
3469     }
3470    
3471     ## generate implied end tags
3472     if ({
3473     dd => 1, dt => 1, li => 1, p => 1,
3474     td => 1, th => 1, tr => 1,
3475     }->{$open_elements->[-1]->[1]}) {
3476     !!!back-token; # <table>
3477     $token = {type => 'end tag', tag_name => 'table'};
3478     !!!back-token;
3479     $token = {type => 'end tag',
3480     tag_name => $open_elements->[-1]->[1]}; # MUST
3481     redo B;
3482     }
3483    
3484     if ($open_elements->[-1]->[1] ne 'table') {
3485     !!!parse-error;
3486     }
3487    
3488     splice @$open_elements, $i;
3489    
3490     $reset_insertion_mode->();
3491    
3492     ## reprocess
3493     redo B;
3494     } else {
3495     #
3496     }
3497     } elsif ($token->{type} eq 'end tag') {
3498     if ($token->{tag_name} eq 'tr') {
3499     ## have an element in table scope
3500     my $i;
3501     INSCOPE: for (reverse 0..$#$open_elements) {
3502     my $node = $open_elements->[$_];
3503     if ($node->[1] eq $token->{tag_name}) {
3504     $i = $_;
3505     last INSCOPE;
3506     } elsif ({
3507     table => 1, html => 1,
3508     }->{$node->[1]}) {
3509     last INSCOPE;
3510     }
3511     } # INSCOPE
3512     unless (defined $i) {
3513     !!!parse-error;
3514     ## Ignore the token
3515     !!!next-token;
3516     redo B;
3517     }
3518    
3519     ## Clear back to table row context
3520     while (not {
3521     tr => 1, html => 1,
3522     }->{$open_elements->[-1]->[1]}) {
3523     !!!parse-error;
3524     pop @$open_elements;
3525     }
3526    
3527     pop @$open_elements; # tr
3528     $insertion_mode = 'in table body';
3529     !!!next-token;
3530     redo B;
3531     } elsif ($token->{tag_name} eq 'table') {
3532     ## As if </tr>
3533     ## have an element in table scope
3534     my $i;
3535     INSCOPE: for (reverse 0..$#$open_elements) {
3536     my $node = $open_elements->[$_];
3537     if ($node->[1] eq 'tr') {
3538     $i = $_;
3539     last INSCOPE;
3540     } elsif ({
3541     table => 1, html => 1,
3542     }->{$node->[1]}) {
3543     last INSCOPE;
3544     }
3545     } # INSCOPE
3546     unless (defined $i) {
3547     !!!parse-error;
3548     ## Ignore the token
3549     !!!next-token;
3550     redo B;
3551     }
3552    
3553     ## Clear back to table row context
3554     while (not {
3555     tr => 1, html => 1,
3556     }->{$open_elements->[-1]->[1]}) {
3557     !!!parse-error;
3558     pop @$open_elements;
3559     }
3560    
3561     pop @$open_elements; # tr
3562     $insertion_mode = 'in table body';
3563     ## reprocess
3564     redo B;
3565     } elsif ({
3566     tbody => 1, tfoot => 1, thead => 1,
3567     }->{$token->{tag_name}}) {
3568     ## have an element in table scope
3569     my $i;
3570     INSCOPE: for (reverse 0..$#$open_elements) {
3571     my $node = $open_elements->[$_];
3572     if ($node->[1] eq $token->{tag_name}) {
3573     $i = $_;
3574     last INSCOPE;
3575     } elsif ({
3576     table => 1, html => 1,
3577     }->{$node->[1]}) {
3578     last INSCOPE;
3579     }
3580     } # INSCOPE
3581     unless (defined $i) {
3582     !!!parse-error;
3583     ## Ignore the token
3584     !!!next-token;
3585     redo B;
3586     }
3587    
3588     ## As if </tr>
3589     ## have an element in table scope
3590     my $i;
3591     INSCOPE: for (reverse 0..$#$open_elements) {
3592     my $node = $open_elements->[$_];
3593     if ($node->[1] eq 'tr') {
3594     $i = $_;
3595     last INSCOPE;
3596     } elsif ({
3597     table => 1, html => 1,
3598     }->{$node->[1]}) {
3599     last INSCOPE;
3600     }
3601     } # INSCOPE
3602     unless (defined $i) {
3603     !!!parse-error;
3604     ## Ignore the token
3605     !!!next-token;
3606     redo B;
3607     }
3608    
3609     ## Clear back to table row context
3610     while (not {
3611     tr => 1, html => 1,
3612     }->{$open_elements->[-1]->[1]}) {
3613     !!!parse-error;
3614     pop @$open_elements;
3615     }
3616    
3617     pop @$open_elements; # tr
3618     $insertion_mode = 'in table body';
3619     ## reprocess
3620     redo B;
3621     } elsif ({
3622     body => 1, caption => 1, col => 1,
3623     colgroup => 1, html => 1, td => 1, th => 1,
3624     }->{$token->{tag_name}}) {
3625     !!!parse-error;
3626     ## Ignore the token
3627     !!!next-token;
3628     redo B;
3629     } else {
3630     #
3631     }
3632     } else {
3633     #
3634     }
3635    
3636     ## As if in table
3637     ## NOTE: This is a code clone of "misc in table".
3638     !!!parse-error;
3639     $in_body->(sub {
3640     my $child = shift;
3641     if ({
3642     table => 1, tbody => 1, tfoot => 1,
3643     thead => 1, tr => 1,
3644     }->{$open_elements->[-1]->[1]}) {
3645     # MUST
3646     my $foster_parent_element;
3647     my $next_sibling;
3648     OE: for (reverse 0..$#$open_elements) {
3649     if ($open_elements->[$_]->[1] eq 'table') {
3650     my $parent = $open_elements->[$_]->[0]->parent_node;
3651     if (defined $parent and $parent->node_type == 1) {
3652     $foster_parent_element = $parent;
3653     $next_sibling = $open_elements->[$_]->[0];
3654     } else {
3655     $foster_parent_element
3656     = $open_elements->[$_ - 1]->[0];
3657     }
3658     last OE;
3659     }
3660     } # OE
3661     $foster_parent_element = $open_elements->[0]->[0]
3662     unless defined $foster_parent_element;
3663     $foster_parent_element->insert_before
3664     ($child, $next_sibling);
3665     } else {
3666     $open_elements->[-1]->[0]->append_child ($child);
3667     }
3668     });
3669     redo B;
3670     } elsif ($insertion_mode eq 'in cell') {
3671     if ($token->{type} eq 'character') {
3672     ## NOTE: This is a code clone of "character in body".
3673     $reconstruct_active_formatting_elements->();
3674    
3675     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3676    
3677     !!!next-token;
3678     redo B;
3679     } elsif ($token->{type} eq 'comment') {
3680     ## NOTE: This is a code clone of "comment in body".
3681     my $comment = $self->{document}->create_comment ($token->{data});
3682     $open_elements->[-1]->[0]->append_child ($comment);
3683     !!!next-token;
3684     redo B;
3685     } elsif ($token->{type} eq 'start tag') {
3686     if ({
3687     caption => 1, col => 1, colgroup => 1,
3688     tbody => 1, td => 1, tfoot => 1, th => 1,
3689     thead => 1, tr => 1,
3690     }->{$token->{tag_name}}) {
3691     ## have an element in table scope
3692     my $tn;
3693     INSCOPE: for (reverse 0..$#$open_elements) {
3694     my $node = $open_elements->[$_];
3695     if ($node->[1] eq 'td' or $node->[1] eq 'th') {
3696     $tn = $node->[1];
3697     last INSCOPE;
3698     } elsif ({
3699     table => 1, html => 1,
3700     }->{$node->[1]}) {
3701     last INSCOPE;
3702     }
3703     } # INSCOPE
3704     unless (defined $tn) {
3705     !!!parse-error;
3706     ## Ignore the token
3707     !!!next-token;
3708     redo B;
3709     }
3710    
3711     ## Close the cell
3712     !!!back-token; # <?>
3713     $token = {type => 'end tag', tag_name => $tn};
3714     redo B;
3715     } else {
3716     #
3717     }
3718     } elsif ($token->{type} eq 'end tag') {
3719     if ($token->{type} eq 'td' or $token->{type} eq 'th') {
3720     ## have an element in table scope
3721     my $i;
3722     INSCOPE: for (reverse 0..$#$open_elements) {
3723     my $node = $open_elements->[$_];
3724     if ($node->[1] eq $token->{tag_name}) {
3725     $i = $_;
3726     last INSCOPE;
3727     } elsif ({
3728     table => 1, html => 1,
3729     }->{$node->[1]}) {
3730     last INSCOPE;
3731     }
3732     } # INSCOPE
3733     unless (defined $i) {
3734     !!!parse-error;
3735     ## Ignore the token
3736     !!!next-token;
3737     redo B;
3738     }
3739    
3740     ## generate implied end tags
3741     if ({
3742     dd => 1, dt => 1, li => 1, p => 1,
3743     td => ($token->{tag_name} eq 'th'),
3744     th => ($token->{tag_name} eq 'td'),
3745     tr => 1,
3746     }->{$open_elements->[-1]->[1]}) {
3747     !!!back-token;
3748     $token = {type => 'end tag',
3749     tag_name => $open_elements->[-1]->[1]}; # MUST
3750     redo B;
3751     }
3752    
3753     if ($open_elements->[-1]->[1] ne $token->{tag_name}) {
3754     !!!parse-error;
3755     }
3756    
3757     splice @$open_elements, $i;
3758    
3759     $clear_up_to_marker->();
3760    
3761     $insertion_mode = 'in row';
3762    
3763     !!!next-token;
3764     redo B;
3765     } elsif ({
3766     body => 1, caption => 1, col => 1,
3767     colgroup => 1, html => 1,
3768     }->{$token->{tag_name}}) {
3769     !!!parse-error;
3770     ## Ignore the token
3771     !!!next-token;
3772     redo B;
3773     } elsif ({
3774     table => 1, tbody => 1, tfoot => 1,
3775     thead => 1, tr => 1,
3776     }->{$token->{tag_name}}) {
3777     ## have an element in table scope
3778     my $i;
3779     my $tn;
3780     INSCOPE: for (reverse 0..$#$open_elements) {
3781     my $node = $open_elements->[$_];
3782     if ($node->[1] eq $token->{tag_name}) {
3783     $i = $_;
3784     $tn = $node->[1];
3785     last INSCOPE;
3786     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
3787     $tn = $node->[1];
3788     ## NOTE: There is exactly one |td| or |th| element
3789     ## in scope in the stack of open elements by definition.
3790     } elsif ({
3791     table => 1, html => 1,
3792     }->{$node->[1]}) {
3793     last INSCOPE;
3794     }
3795     } # INSCOPE
3796     unless (defined $i) {
3797     !!!parse-error;
3798     ## Ignore the token
3799     !!!next-token;
3800     redo B;
3801     }
3802    
3803     ## Close the cell
3804     !!!back-token; # </?>
3805     $token = {type => 'end tag', tag_name => $tn};
3806     redo B;
3807     } else {
3808     #
3809     }
3810     } else {
3811     #
3812     }
3813    
3814     $in_body->(sub {
3815     $open_elements->[-1]->[0]->append_child (shift);
3816     });
3817     redo B;
3818     } elsif ($insertion_mode eq 'in select') {
3819     if ($token->{type} eq 'character') {
3820     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3821     !!!next-token;
3822     redo B;
3823     } elsif ($token->{type} eq 'comment') {
3824     my $comment = $self->{document}->create_comment ($token->{data});
3825     $open_elements->[-1]->[0]->append_child ($comment);
3826     !!!next-token;
3827     redo B;
3828     } elsif ($token->{type} eq 'start tag') {
3829     if ($token->{tag_name} eq 'option') {
3830     if ($open_elements->[-1]->[1] eq 'option') {
3831     ## As if </option>
3832     pop @$open_elements;
3833     }
3834    
3835     !!!insert-element ($token->{tag_name}, $token->{attributes});
3836     !!!next-token;
3837     redo B;
3838     } elsif ($token->{tag_name} eq 'optgroup') {
3839     if ($open_elements->[-1]->[1] eq 'option') {
3840     ## As if </option>
3841     pop @$open_elements;
3842     }
3843    
3844     if ($open_elements->[-1]->[1] eq 'optgroup') {
3845     ## As if </optgroup>
3846     pop @$open_elements;
3847     }
3848    
3849     !!!insert-element ($token->{tag_name}, $token->{attributes});
3850     !!!next-token;
3851     redo B;
3852     } elsif ($token->{tag_name} eq 'select') {
3853     !!!parse-error;
3854     ## As if </select> instead
3855     ## have an element in table scope
3856     my $i;
3857     INSCOPE: for (reverse 0..$#$open_elements) {
3858     my $node = $open_elements->[$_];
3859     if ($node->[1] eq $token->{tag_name}) {
3860     $i = $_;
3861     last INSCOPE;
3862     } elsif ({
3863     table => 1, html => 1,
3864     }->{$node->[1]}) {
3865     last INSCOPE;
3866     }
3867     } # INSCOPE
3868     unless (defined $i) {
3869     !!!parse-error;
3870     ## Ignore the token
3871     !!!next-token;
3872     redo B;
3873     }
3874    
3875     splice @$open_elements, $i;
3876    
3877     $reset_insertion_mode->();
3878    
3879     !!!next-token;
3880     redo B;
3881     } else {
3882     #
3883     }
3884     } elsif ($token->{type} eq 'end tag') {
3885     if ($token->{tag_name} eq 'optgroup') {
3886     if ($open_elements->[-1]->[1] eq 'option' and
3887     $open_elements->[-2]->[1] eq 'optgroup') {
3888     ## As if </option>
3889     splice @$open_elements, -2;
3890     } elsif ($open_elements->[-1]->[1] eq 'optgroup') {
3891     pop @$open_elements;
3892     } else {
3893     !!!parse-error;
3894     ## Ignore the token
3895     }
3896     !!!next-token;
3897     redo B;
3898     } elsif ($token->{tag_name} eq 'option') {
3899     if ($open_elements->[-1]->[1] eq 'option') {
3900     pop @$open_elements;
3901     } else {
3902     !!!parse-error;
3903     ## Ignore the token
3904     }
3905     !!!next-token;
3906     redo B;
3907     } elsif ($token->{tag_name} eq 'select') {
3908     ## have an element in table scope
3909     my $i;
3910     INSCOPE: for (reverse 0..$#$open_elements) {
3911     my $node = $open_elements->[$_];
3912     if ($node->[1] eq $token->{tag_name}) {
3913     $i = $_;
3914     last INSCOPE;
3915     } elsif ({
3916     table => 1, html => 1,
3917     }->{$node->[1]}) {
3918     last INSCOPE;
3919     }
3920     } # INSCOPE
3921     unless (defined $i) {
3922     !!!parse-error;
3923     ## Ignore the token
3924     !!!next-token;
3925     redo B;
3926     }
3927    
3928     splice @$open_elements, $i;
3929    
3930     $reset_insertion_mode->();
3931    
3932     !!!next-token;
3933     redo B;
3934     } elsif ({
3935     caption => 1, table => 1, tbody => 1,
3936     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
3937     }->{$token->{tag_name}}) {
3938     !!!parse-error;
3939    
3940     ## have an element in table scope
3941     my $i;
3942     INSCOPE: for (reverse 0..$#$open_elements) {
3943     my $node = $open_elements->[$_];
3944     if ($node->[1] eq $token->{tag_name}) {
3945     $i = $_;
3946     last INSCOPE;
3947     } elsif ({
3948     table => 1, html => 1,
3949     }->{$node->[1]}) {
3950     last INSCOPE;
3951     }
3952     } # INSCOPE
3953     unless (defined $i) {
3954     ## Ignore the token
3955     !!!next-token;
3956     redo B;
3957     }
3958    
3959     ## As if </select>
3960     ## have an element in table scope
3961     undef $i;
3962     INSCOPE: for (reverse 0..$#$open_elements) {
3963     my $node = $open_elements->[$_];
3964     if ($node->[1] eq 'select') {
3965     $i = $_;
3966     last INSCOPE;
3967     } elsif ({
3968     table => 1, html => 1,
3969     }->{$node->[1]}) {
3970     last INSCOPE;
3971     }
3972     } # INSCOPE
3973     unless (defined $i) {
3974     !!!parse-error;
3975     ## Ignore the </select> token
3976     !!!next-token; ## TODO: ok?
3977     redo B;
3978     }
3979    
3980     splice @$open_elements, $i;
3981    
3982     $reset_insertion_mode->();
3983    
3984     ## reprocess
3985     redo B;
3986     } else {
3987     #
3988     }
3989     } else {
3990     #
3991     }
3992    
3993     !!!parse-error;
3994     ## Ignore the token
3995     redo B;
3996     } elsif ($insertion_mode eq 'after body') {
3997     if ($token->{type} eq 'character') {
3998     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3999     ## As if in body
4000     $reconstruct_active_formatting_elements->();
4001    
4002     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4003    
4004     unless (length $token->{data}) {
4005     !!!next-token;
4006     redo B;
4007     }
4008     }
4009    
4010     #
4011     } elsif ($token->{type} eq 'comment') {
4012     my $comment = $self->{document}->create_comment ($token->{data});
4013     $open_elements->[0]->[0]->append_child ($comment);
4014     !!!next-token;
4015     redo B;
4016     } elsif ($token->{type} eq 'end tag') {
4017     if ($token->{type} eq 'html') {
4018     ## TODO: if inner_html, parse-error, ignore the token; otherwise,
4019    
4020     $phase = 'trailing end';
4021     !!!next-token;
4022     redo B;
4023     } else {
4024     #
4025     }
4026     } else {
4027     #
4028     }
4029    
4030     !!!parse-error;
4031     $insertion_mode = 'in body';
4032     ## reprocess
4033     redo B;
4034     } elsif ($insertion_mode eq 'in frameset') {
4035     if ($token->{type} eq 'character') {
4036     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4037     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4038    
4039     unless (length $token->{data}) {
4040     !!!next-token;
4041     redo B;
4042     }
4043     }
4044    
4045     #
4046     } elsif ($token->{type} eq 'comment') {
4047     my $comment = $self->{document}->create_comment ($token->{data});
4048     $open_elements->[-1]->[0]->append_child ($comment);
4049     !!!next-token;
4050     redo B;
4051     } elsif ($token->{type} eq 'start tag') {
4052     if ($token->{tag_name} eq 'frameset') {
4053     !!!insert-element ($token->{tag_name}, $token->{attributes});
4054     !!!next-token;
4055     redo B;
4056     } elsif ($token->{tag_name} eq 'frame') {
4057     !!!insert-element ($token->{tag_name}, $token->{attributes});
4058     pop @$open_elements;
4059     !!!next-token;
4060     redo B;
4061     } elsif ($token->{tag_name} eq 'noframes') {
4062     $in_body->(sub {
4063     $open_elements->[-1]->[0]->append_child (shift);
4064     });
4065     redo B;
4066     } else {
4067     #
4068     }
4069     } elsif ($token->{type} eq 'end tag') {
4070     if ($token->{tag_name} eq 'frameset') {
4071     if ($open_elements->[-1]->[1] eq 'html' and
4072     @$open_elements == 1) {
4073     !!!parse-error;
4074     ## Ignore the token
4075     !!!next-token;
4076     } else {
4077     pop @$open_elements;
4078     !!!next-token;
4079     }
4080    
4081     ## if not inner_html and
4082     if ($open_elements->[-1]->[1] ne 'frameset') {
4083     $insertion_mode = 'after frameset';
4084     }
4085     redo B;
4086     } else {
4087     #
4088     }
4089     } else {
4090     #
4091     }
4092    
4093     !!!parse-error;
4094     ## Ignore the token
4095     !!!next-token;
4096     redo B;
4097     } elsif ($insertion_mode eq 'after frameset') {
4098     if ($token->{type} eq 'character') {
4099     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4100     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4101    
4102     unless (length $token->{data}) {
4103     !!!next-token;
4104     redo B;
4105     }
4106     }
4107    
4108     #
4109     } elsif ($token->{type} eq 'comment') {
4110     my $comment = $self->{document}->create_comment ($token->{data});
4111     $open_elements->[-1]->[0]->append_child ($comment);
4112     !!!next-token;
4113     redo B;
4114     } elsif ($token->{type} eq 'start tag') {
4115     if ($token->{tag_name} eq 'noframes') {
4116     $in_body->(sub {
4117     $open_elements->[-1]->[0]->append_child (shift);
4118     });
4119     redo B;
4120     } else {
4121     #
4122     }
4123     } elsif ($token->{type} eq 'end tag') {
4124     if ($token->{tag_name} eq 'html') {
4125     $phase = 'trailing end';
4126     !!!next-token;
4127     redo B;
4128     } else {
4129     #
4130     }
4131     } else {
4132     #
4133     }
4134    
4135     !!!parse-error;
4136     ## Ignore the token
4137     !!!next-token;
4138     redo B;
4139    
4140     ## ISSUE: An issue in spec there
4141     } else {
4142     die "$0: $insertion_mode: Unknown insertion mode";
4143     }
4144     }
4145     } elsif ($phase eq 'trailing end') {
4146     ## states in the main stage is preserved yet # MUST
4147    
4148     if ($token->{type} eq 'DOCTYPE') {
4149     !!!parse-error;
4150     ## Ignore the token
4151     !!!next-token;
4152     redo B;
4153     } elsif ($token->{type} eq 'comment') {
4154     my $comment = $self->{document}->create_comment ($token->{data});
4155     $self->{document}->append_child ($comment);
4156     !!!next-token;
4157     redo B;
4158     } elsif ($token->{type} eq 'character') {
4159     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4160     ## As if in the main phase.
4161     ## NOTE: The insertion mode in the main phase
4162     ## just before the phase has been changed to the trailing
4163     ## end phase is either "after body" or "after frameset".
4164     $reconstruct_active_formatting_elements->()
4165     if $phase eq 'main';
4166    
4167     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4168    
4169     unless (length $token->{data}) {
4170     !!!next-token;
4171     redo B;
4172     }
4173     }
4174    
4175     !!!parse-error;
4176     $phase = 'main';
4177     ## reprocess
4178     redo B;
4179     } elsif ($token->{type} eq 'start tag' or
4180     $token->{type} eq 'end tag') {
4181     !!!parse-error;
4182     $phase = 'main';
4183     ## reprocess
4184     redo B;
4185     } elsif ($token->{type} eq 'end-of-file') {
4186     ## Stop parsing
4187     last B;
4188     } else {
4189     die "$0: $token->{type}: Unknown token";
4190     }
4191     }
4192     } # B
4193    
4194     ## Stop parsing # MUST
4195    
4196     ## TODO: script stuffs
4197     } # _construct_tree
4198    
4199     sub inner_html ($$$) {
4200     my ($class, $node, $on_error) = @_;
4201    
4202     ## Step 1
4203     my $s = '';
4204    
4205     my $in_cdata;
4206     my $parent = $node;
4207     while (defined $parent) {
4208     if ($parent->node_type == 1 and
4209     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
4210     {
4211     style => 1, script => 1, xmp => 1, iframe => 1,
4212     noembed => 1, noframes => 1, noscript => 1,
4213     }->{$parent->local_name}) { ## TODO: case thingy
4214     $in_cdata = 1;
4215     }
4216     $parent = $parent->parent_node;
4217     }
4218    
4219     ## Step 2
4220     my @node = @{$node->child_nodes};
4221     C: while (@node) {
4222     my $child = shift @node;
4223     unless (ref $child) {
4224     if ($child eq 'cdata-out') {
4225     $in_cdata = 0;
4226     } else {
4227     $s .= $child; # end tag
4228     }
4229     next C;
4230     }
4231    
4232     my $nt = $child->node_type;
4233     if ($nt == 1) { # Element
4234     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
4235     $s .= '<' . $tag_name;
4236    
4237     ## ISSUE: Non-html elements
4238    
4239     my @attrs = @{$child->attributes}; # sort order MUST be stable
4240     for my $attr (@attrs) { # order is implementation dependent
4241     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
4242     $s .= ' ' . $attr_name . '="';
4243     my $attr_value = $attr->value;
4244     ## escape
4245     $attr_value =~ s/&/&amp;/g;
4246     $attr_value =~ s/</&lt;/g;
4247     $attr_value =~ s/>/&gt;/g;
4248     $attr_value =~ s/"/&quot;/g;
4249     $s .= $attr_value . '"';
4250     }
4251     $s .= '>';
4252    
4253     next C if {
4254     area => 1, base => 1, basefont => 1, bgsound => 1,
4255     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
4256     img => 1, input => 1, link => 1, meta => 1, param => 1,
4257     spacer => 1, wbr => 1,
4258     }->{$tag_name};
4259    
4260     if (not $in_cdata and {
4261     style => 1, script => 1, xmp => 1, iframe => 1,
4262     noembed => 1, noframes => 1, noscript => 1,
4263     }->{$tag_name}) {
4264     unshift @node, 'cdata-out';
4265     $in_cdata = 1;
4266     }
4267    
4268     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
4269     } elsif ($nt == 3 or $nt == 4) {
4270     if ($in_cdata) {
4271     $s .= $child->data;
4272     } else {
4273     my $value = $child->data;
4274     $value =~ s/&/&amp;/g;
4275     $value =~ s/</&lt;/g;
4276     $value =~ s/>/&gt;/g;
4277     $value =~ s/"/&quot;/g;
4278     $s .= $value;
4279     }
4280     } elsif ($nt == 8) {
4281     $s .= '<!--' . $child->data . '-->';
4282     } elsif ($nt == 10) {
4283     $s .= '<!DOCTYPE ' . $child->name . '>';
4284     } elsif ($nt == 5) { # entrefs
4285     push @node, @{$child->child_nodes};
4286     } else {
4287     $on_error->($child);
4288     }
4289     } # C
4290    
4291     ## Step 3
4292     return \$s;
4293     } # inner_html
4294    
4295 wakaba 1.1 1;
4296 wakaba 1.4 # $Date: 2007/04/30 09:59:35 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24