/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.30 - (show annotations) (download) (as text)
Sat Jun 30 13:12:32 2007 UTC (18 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.29: +12 -11 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	30 Jun 2007 12:28:52 -0000
2007-06-30  Wakaba  <wakaba@suika.fam.cx>

	* URIChecker.t: Error level names in test results has
	been changed.

	* tokenizer-test-1.test: A test for bogus SYSTEM identifier
	is added.

	* content-model-1.dat, content-model-2.dat, content-model-3.dat,
	content-model-4.dat: Error messages has been changed.

	* ContentChecker.t: Appends error level to the error
	message if any.

++ whatpm/Whatpm/ChangeLog	30 Jun 2007 13:03:50 -0000
2007-06-30  Wakaba  <wakaba@suika.fam.cx>

	* IMTChecker.pm: Report warning for unregistered
	and private types/subtypes.

	* ContentChecker.pm, HTML.pm.src, IMTChecker.pm,
	URIChecker.pm, HTMLTable.pm: Error messages are now
	consistent; they are all listed in
	<http://suika.fam.cx/gate/2005/sw/Whatpm%20Error%20Types>.

1 package Whatpm::HTML;
2 use strict;
3 our $VERSION=do{my @r=(q$Revision: 1.29 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4
5 ## ISSUE:
6 ## var doc = implementation.createDocument (null, null, null);
7 ## doc.write ('');
8 ## alert (doc.compatMode);
9
10 my $permitted_slash_tag_name = {
11 base => 1,
12 link => 1,
13 meta => 1,
14 hr => 1,
15 br => 1,
16 img=> 1,
17 embed => 1,
18 param => 1,
19 area => 1,
20 col => 1,
21 input => 1,
22 };
23
24 my $c1_entity_char = {
25 0x80 => 0x20AC,
26 0x81 => 0xFFFD,
27 0x82 => 0x201A,
28 0x83 => 0x0192,
29 0x84 => 0x201E,
30 0x85 => 0x2026,
31 0x86 => 0x2020,
32 0x87 => 0x2021,
33 0x88 => 0x02C6,
34 0x89 => 0x2030,
35 0x8A => 0x0160,
36 0x8B => 0x2039,
37 0x8C => 0x0152,
38 0x8D => 0xFFFD,
39 0x8E => 0x017D,
40 0x8F => 0xFFFD,
41 0x90 => 0xFFFD,
42 0x91 => 0x2018,
43 0x92 => 0x2019,
44 0x93 => 0x201C,
45 0x94 => 0x201D,
46 0x95 => 0x2022,
47 0x96 => 0x2013,
48 0x97 => 0x2014,
49 0x98 => 0x02DC,
50 0x99 => 0x2122,
51 0x9A => 0x0161,
52 0x9B => 0x203A,
53 0x9C => 0x0153,
54 0x9D => 0xFFFD,
55 0x9E => 0x017E,
56 0x9F => 0x0178,
57 }; # $c1_entity_char
58
59 my $special_category = {
60 address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
61 blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
62 dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
63 form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
64 h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
65 img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
66 menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
67 ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
68 pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
69 textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
70 };
71 my $scoping_category = {
72 button => 1, caption => 1, html => 1, marquee => 1, object => 1,
73 table => 1, td => 1, th => 1,
74 };
75 my $formatting_category = {
76 a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
77 s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
78 };
79 # $phrasing_category: all other elements
80
81 sub parse_string ($$$;$) {
82 my $self = shift->new;
83 my $s = \$_[0];
84 $self->{document} = $_[1];
85
86 ## NOTE: |set_inner_html| copies most of this method's code
87
88 my $i = 0;
89 my $line = 1;
90 my $column = 0;
91 $self->{set_next_input_character} = sub {
92 my $self = shift;
93
94 pop @{$self->{prev_input_character}};
95 unshift @{$self->{prev_input_character}}, $self->{next_input_character};
96
97 $self->{next_input_character} = -1 and return if $i >= length $$s;
98 $self->{next_input_character} = ord substr $$s, $i++, 1;
99 $column++;
100
101 if ($self->{next_input_character} == 0x000A) { # LF
102 $line++;
103 $column = 0;
104 } elsif ($self->{next_input_character} == 0x000D) { # CR
105 $i++ if substr ($$s, $i, 1) eq "\x0A";
106 $self->{next_input_character} = 0x000A; # LF # MUST
107 $line++;
108 $column = 0;
109 } elsif ($self->{next_input_character} > 0x10FFFF) {
110 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
111 } elsif ($self->{next_input_character} == 0x0000) { # NULL
112 !!!parse-error (type => 'NULL');
113 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
114 }
115 };
116 $self->{prev_input_character} = [-1, -1, -1];
117 $self->{next_input_character} = -1;
118
119 my $onerror = $_[2] || sub {
120 my (%opt) = @_;
121 warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
122 };
123 $self->{parse_error} = sub {
124 $onerror->(@_, line => $line, column => $column);
125 };
126
127 $self->_initialize_tokenizer;
128 $self->_initialize_tree_constructor;
129 $self->_construct_tree;
130 $self->_terminate_tree_constructor;
131
132 return $self->{document};
133 } # parse_string
134
135 sub new ($) {
136 my $class = shift;
137 my $self = bless {}, $class;
138 $self->{set_next_input_character} = sub {
139 $self->{next_input_character} = -1;
140 };
141 $self->{parse_error} = sub {
142 #
143 };
144 return $self;
145 } # new
146
147 ## Implementations MUST act as if state machine in the spec
148
149 sub _initialize_tokenizer ($) {
150 my $self = shift;
151 $self->{state} = 'data'; # MUST
152 $self->{content_model_flag} = 'PCDATA'; # be
153 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
154 undef $self->{current_attribute};
155 undef $self->{last_emitted_start_tag_name};
156 undef $self->{last_attribute_value_state};
157 $self->{char} = [];
158 # $self->{next_input_character}
159 !!!next-input-character;
160 $self->{token} = [];
161 # $self->{escape}
162 } # _initialize_tokenizer
163
164 ## A token has:
165 ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
166 ## 'character', or 'end-of-file'
167 ## ->{name} (DOCTYPE, start tag (tag name), end tag (tag name))
168 ## ->{public_identifier} (DOCTYPE)
169 ## ->{system_identifier} (DOCTYPE)
170 ## ->{correct} == 1 or 0 (DOCTYPE)
171 ## ->{attributes} isa HASH (start tag, end tag)
172 ## ->{data} (comment, character)
173
174 ## Emitted token MUST immediately be handled by the tree construction state.
175
176 ## Before each step, UA MAY check to see if either one of the scripts in
177 ## "list of scripts that will execute as soon as possible" or the first
178 ## script in the "list of scripts that will execute asynchronously",
179 ## has completed loading. If one has, then it MUST be executed
180 ## and removed from the list.
181
182 sub _get_next_token ($) {
183 my $self = shift;
184 if (@{$self->{token}}) {
185 return shift @{$self->{token}};
186 }
187
188 A: {
189 if ($self->{state} eq 'data') {
190 if ($self->{next_input_character} == 0x0026) { # &
191 if ($self->{content_model_flag} eq 'PCDATA' or
192 $self->{content_model_flag} eq 'RCDATA') {
193 $self->{state} = 'entity data';
194 !!!next-input-character;
195 redo A;
196 } else {
197 #
198 }
199 } elsif ($self->{next_input_character} == 0x002D) { # -
200 if ($self->{content_model_flag} eq 'RCDATA' or
201 $self->{content_model_flag} eq 'CDATA') {
202 unless ($self->{escape}) {
203 if ($self->{prev_input_character}->[0] == 0x002D and # -
204 $self->{prev_input_character}->[1] == 0x0021 and # !
205 $self->{prev_input_character}->[2] == 0x003C) { # <
206 $self->{escape} = 1;
207 }
208 }
209 }
210
211 #
212 } elsif ($self->{next_input_character} == 0x003C) { # <
213 if ($self->{content_model_flag} eq 'PCDATA' or
214 (($self->{content_model_flag} eq 'CDATA' or
215 $self->{content_model_flag} eq 'RCDATA') and
216 not $self->{escape})) {
217 $self->{state} = 'tag open';
218 !!!next-input-character;
219 redo A;
220 } else {
221 #
222 }
223 } elsif ($self->{next_input_character} == 0x003E) { # >
224 if ($self->{escape} and
225 ($self->{content_model_flag} eq 'RCDATA' or
226 $self->{content_model_flag} eq 'CDATA')) {
227 if ($self->{prev_input_character}->[0] == 0x002D and # -
228 $self->{prev_input_character}->[1] == 0x002D) { # -
229 delete $self->{escape};
230 }
231 }
232
233 #
234 } elsif ($self->{next_input_character} == -1) {
235 !!!emit ({type => 'end-of-file'});
236 last A; ## TODO: ok?
237 }
238 # Anything else
239 my $token = {type => 'character',
240 data => chr $self->{next_input_character}};
241 ## Stay in the data state
242 !!!next-input-character;
243
244 !!!emit ($token);
245
246 redo A;
247 } elsif ($self->{state} eq 'entity data') {
248 ## (cannot happen in CDATA state)
249
250 my $token = $self->_tokenize_attempt_to_consume_an_entity (0);
251
252 $self->{state} = 'data';
253 # next-input-character is already done
254
255 unless (defined $token) {
256 !!!emit ({type => 'character', data => '&'});
257 } else {
258 !!!emit ($token);
259 }
260
261 redo A;
262 } elsif ($self->{state} eq 'tag open') {
263 if ($self->{content_model_flag} eq 'RCDATA' or
264 $self->{content_model_flag} eq 'CDATA') {
265 if ($self->{next_input_character} == 0x002F) { # /
266 !!!next-input-character;
267 $self->{state} = 'close tag open';
268 redo A;
269 } else {
270 ## reconsume
271 $self->{state} = 'data';
272
273 !!!emit ({type => 'character', data => '<'});
274
275 redo A;
276 }
277 } elsif ($self->{content_model_flag} eq 'PCDATA') {
278 if ($self->{next_input_character} == 0x0021) { # !
279 $self->{state} = 'markup declaration open';
280 !!!next-input-character;
281 redo A;
282 } elsif ($self->{next_input_character} == 0x002F) { # /
283 $self->{state} = 'close tag open';
284 !!!next-input-character;
285 redo A;
286 } elsif (0x0041 <= $self->{next_input_character} and
287 $self->{next_input_character} <= 0x005A) { # A..Z
288 $self->{current_token}
289 = {type => 'start tag',
290 tag_name => chr ($self->{next_input_character} + 0x0020)};
291 $self->{state} = 'tag name';
292 !!!next-input-character;
293 redo A;
294 } elsif (0x0061 <= $self->{next_input_character} and
295 $self->{next_input_character} <= 0x007A) { # a..z
296 $self->{current_token} = {type => 'start tag',
297 tag_name => chr ($self->{next_input_character})};
298 $self->{state} = 'tag name';
299 !!!next-input-character;
300 redo A;
301 } elsif ($self->{next_input_character} == 0x003E) { # >
302 !!!parse-error (type => 'empty start tag');
303 $self->{state} = 'data';
304 !!!next-input-character;
305
306 !!!emit ({type => 'character', data => '<>'});
307
308 redo A;
309 } elsif ($self->{next_input_character} == 0x003F) { # ?
310 !!!parse-error (type => 'pio');
311 $self->{state} = 'bogus comment';
312 ## $self->{next_input_character} is intentionally left as is
313 redo A;
314 } else {
315 !!!parse-error (type => 'bare stago');
316 $self->{state} = 'data';
317 ## reconsume
318
319 !!!emit ({type => 'character', data => '<'});
320
321 redo A;
322 }
323 } else {
324 die "$0: $self->{content_model_flag}: Unknown content model flag";
325 }
326 } elsif ($self->{state} eq 'close tag open') {
327 if ($self->{content_model_flag} eq 'RCDATA' or
328 $self->{content_model_flag} eq 'CDATA') {
329 if (defined $self->{last_emitted_start_tag_name}) {
330 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
331 my @next_char;
332 TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
333 push @next_char, $self->{next_input_character};
334 my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
335 my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
336 if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
337 !!!next-input-character;
338 next TAGNAME;
339 } else {
340 $self->{next_input_character} = shift @next_char; # reconsume
341 !!!back-next-input-character (@next_char);
342 $self->{state} = 'data';
343
344 !!!emit ({type => 'character', data => '</'});
345
346 redo A;
347 }
348 }
349 push @next_char, $self->{next_input_character};
350
351 unless ($self->{next_input_character} == 0x0009 or # HT
352 $self->{next_input_character} == 0x000A or # LF
353 $self->{next_input_character} == 0x000B or # VT
354 $self->{next_input_character} == 0x000C or # FF
355 $self->{next_input_character} == 0x0020 or # SP
356 $self->{next_input_character} == 0x003E or # >
357 $self->{next_input_character} == 0x002F or # /
358 $self->{next_input_character} == -1) {
359 $self->{next_input_character} = shift @next_char; # reconsume
360 !!!back-next-input-character (@next_char);
361 $self->{state} = 'data';
362 !!!emit ({type => 'character', data => '</'});
363 redo A;
364 } else {
365 $self->{next_input_character} = shift @next_char;
366 !!!back-next-input-character (@next_char);
367 # and consume...
368 }
369 } else {
370 ## No start tag token has ever been emitted
371 # next-input-character is already done
372 $self->{state} = 'data';
373 !!!emit ({type => 'character', data => '</'});
374 redo A;
375 }
376 }
377
378 if (0x0041 <= $self->{next_input_character} and
379 $self->{next_input_character} <= 0x005A) { # A..Z
380 $self->{current_token} = {type => 'end tag',
381 tag_name => chr ($self->{next_input_character} + 0x0020)};
382 $self->{state} = 'tag name';
383 !!!next-input-character;
384 redo A;
385 } elsif (0x0061 <= $self->{next_input_character} and
386 $self->{next_input_character} <= 0x007A) { # a..z
387 $self->{current_token} = {type => 'end tag',
388 tag_name => chr ($self->{next_input_character})};
389 $self->{state} = 'tag name';
390 !!!next-input-character;
391 redo A;
392 } elsif ($self->{next_input_character} == 0x003E) { # >
393 !!!parse-error (type => 'empty end tag');
394 $self->{state} = 'data';
395 !!!next-input-character;
396 redo A;
397 } elsif ($self->{next_input_character} == -1) {
398 !!!parse-error (type => 'bare etago');
399 $self->{state} = 'data';
400 # reconsume
401
402 !!!emit ({type => 'character', data => '</'});
403
404 redo A;
405 } else {
406 !!!parse-error (type => 'bogus end tag');
407 $self->{state} = 'bogus comment';
408 ## $self->{next_input_character} is intentionally left as is
409 redo A;
410 }
411 } elsif ($self->{state} eq 'tag name') {
412 if ($self->{next_input_character} == 0x0009 or # HT
413 $self->{next_input_character} == 0x000A or # LF
414 $self->{next_input_character} == 0x000B or # VT
415 $self->{next_input_character} == 0x000C or # FF
416 $self->{next_input_character} == 0x0020) { # SP
417 $self->{state} = 'before attribute name';
418 !!!next-input-character;
419 redo A;
420 } elsif ($self->{next_input_character} == 0x003E) { # >
421 if ($self->{current_token}->{type} eq 'start tag') {
422 $self->{current_token}->{first_start_tag}
423 = not defined $self->{last_emitted_start_tag_name};
424 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
425 } elsif ($self->{current_token}->{type} eq 'end tag') {
426 $self->{content_model_flag} = 'PCDATA'; # MUST
427 if ($self->{current_token}->{attributes}) {
428 !!!parse-error (type => 'end tag attribute');
429 }
430 } else {
431 die "$0: $self->{current_token}->{type}: Unknown token type";
432 }
433 $self->{state} = 'data';
434 !!!next-input-character;
435
436 !!!emit ($self->{current_token}); # start tag or end tag
437
438 redo A;
439 } elsif (0x0041 <= $self->{next_input_character} and
440 $self->{next_input_character} <= 0x005A) { # A..Z
441 $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
442 # start tag or end tag
443 ## Stay in this state
444 !!!next-input-character;
445 redo A;
446 } elsif ($self->{next_input_character} == -1) {
447 !!!parse-error (type => 'unclosed tag');
448 if ($self->{current_token}->{type} eq 'start tag') {
449 $self->{current_token}->{first_start_tag}
450 = not defined $self->{last_emitted_start_tag_name};
451 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
452 } elsif ($self->{current_token}->{type} eq 'end tag') {
453 $self->{content_model_flag} = 'PCDATA'; # MUST
454 if ($self->{current_token}->{attributes}) {
455 !!!parse-error (type => 'end tag attribute');
456 }
457 } else {
458 die "$0: $self->{current_token}->{type}: Unknown token type";
459 }
460 $self->{state} = 'data';
461 # reconsume
462
463 !!!emit ($self->{current_token}); # start tag or end tag
464
465 redo A;
466 } elsif ($self->{next_input_character} == 0x002F) { # /
467 !!!next-input-character;
468 if ($self->{next_input_character} == 0x003E and # >
469 $self->{current_token}->{type} eq 'start tag' and
470 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
471 # permitted slash
472 #
473 } else {
474 !!!parse-error (type => 'nestc');
475 }
476 $self->{state} = 'before attribute name';
477 # next-input-character is already done
478 redo A;
479 } else {
480 $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
481 # start tag or end tag
482 ## Stay in the state
483 !!!next-input-character;
484 redo A;
485 }
486 } elsif ($self->{state} eq 'before attribute name') {
487 if ($self->{next_input_character} == 0x0009 or # HT
488 $self->{next_input_character} == 0x000A or # LF
489 $self->{next_input_character} == 0x000B or # VT
490 $self->{next_input_character} == 0x000C or # FF
491 $self->{next_input_character} == 0x0020) { # SP
492 ## Stay in the state
493 !!!next-input-character;
494 redo A;
495 } elsif ($self->{next_input_character} == 0x003E) { # >
496 if ($self->{current_token}->{type} eq 'start tag') {
497 $self->{current_token}->{first_start_tag}
498 = not defined $self->{last_emitted_start_tag_name};
499 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
500 } elsif ($self->{current_token}->{type} eq 'end tag') {
501 $self->{content_model_flag} = 'PCDATA'; # MUST
502 if ($self->{current_token}->{attributes}) {
503 !!!parse-error (type => 'end tag attribute');
504 }
505 } else {
506 die "$0: $self->{current_token}->{type}: Unknown token type";
507 }
508 $self->{state} = 'data';
509 !!!next-input-character;
510
511 !!!emit ($self->{current_token}); # start tag or end tag
512
513 redo A;
514 } elsif (0x0041 <= $self->{next_input_character} and
515 $self->{next_input_character} <= 0x005A) { # A..Z
516 $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
517 value => ''};
518 $self->{state} = 'attribute name';
519 !!!next-input-character;
520 redo A;
521 } elsif ($self->{next_input_character} == 0x002F) { # /
522 !!!next-input-character;
523 if ($self->{next_input_character} == 0x003E and # >
524 $self->{current_token}->{type} eq 'start tag' and
525 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
526 # permitted slash
527 #
528 } else {
529 !!!parse-error (type => 'nestc');
530 }
531 ## Stay in the state
532 # next-input-character is already done
533 redo A;
534 } elsif ($self->{next_input_character} == -1) {
535 !!!parse-error (type => 'unclosed tag');
536 if ($self->{current_token}->{type} eq 'start tag') {
537 $self->{current_token}->{first_start_tag}
538 = not defined $self->{last_emitted_start_tag_name};
539 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
540 } elsif ($self->{current_token}->{type} eq 'end tag') {
541 $self->{content_model_flag} = 'PCDATA'; # MUST
542 if ($self->{current_token}->{attributes}) {
543 !!!parse-error (type => 'end tag attribute');
544 }
545 } else {
546 die "$0: $self->{current_token}->{type}: Unknown token type";
547 }
548 $self->{state} = 'data';
549 # reconsume
550
551 !!!emit ($self->{current_token}); # start tag or end tag
552
553 redo A;
554 } else {
555 $self->{current_attribute} = {name => chr ($self->{next_input_character}),
556 value => ''};
557 $self->{state} = 'attribute name';
558 !!!next-input-character;
559 redo A;
560 }
561 } elsif ($self->{state} eq 'attribute name') {
562 my $before_leave = sub {
563 if (exists $self->{current_token}->{attributes} # start tag or end tag
564 ->{$self->{current_attribute}->{name}}) { # MUST
565 !!!parse-error (type => 'dupulicate attribute');
566 ## Discard $self->{current_attribute} # MUST
567 } else {
568 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
569 = $self->{current_attribute};
570 }
571 }; # $before_leave
572
573 if ($self->{next_input_character} == 0x0009 or # HT
574 $self->{next_input_character} == 0x000A or # LF
575 $self->{next_input_character} == 0x000B or # VT
576 $self->{next_input_character} == 0x000C or # FF
577 $self->{next_input_character} == 0x0020) { # SP
578 $before_leave->();
579 $self->{state} = 'after attribute name';
580 !!!next-input-character;
581 redo A;
582 } elsif ($self->{next_input_character} == 0x003D) { # =
583 $before_leave->();
584 $self->{state} = 'before attribute value';
585 !!!next-input-character;
586 redo A;
587 } elsif ($self->{next_input_character} == 0x003E) { # >
588 $before_leave->();
589 if ($self->{current_token}->{type} eq 'start tag') {
590 $self->{current_token}->{first_start_tag}
591 = not defined $self->{last_emitted_start_tag_name};
592 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
593 } elsif ($self->{current_token}->{type} eq 'end tag') {
594 $self->{content_model_flag} = 'PCDATA'; # MUST
595 if ($self->{current_token}->{attributes}) {
596 !!!parse-error (type => 'end tag attribute');
597 }
598 } else {
599 die "$0: $self->{current_token}->{type}: Unknown token type";
600 }
601 $self->{state} = 'data';
602 !!!next-input-character;
603
604 !!!emit ($self->{current_token}); # start tag or end tag
605
606 redo A;
607 } elsif (0x0041 <= $self->{next_input_character} and
608 $self->{next_input_character} <= 0x005A) { # A..Z
609 $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
610 ## Stay in the state
611 !!!next-input-character;
612 redo A;
613 } elsif ($self->{next_input_character} == 0x002F) { # /
614 $before_leave->();
615 !!!next-input-character;
616 if ($self->{next_input_character} == 0x003E and # >
617 $self->{current_token}->{type} eq 'start tag' and
618 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
619 # permitted slash
620 #
621 } else {
622 !!!parse-error (type => 'nestc');
623 }
624 $self->{state} = 'before attribute name';
625 # next-input-character is already done
626 redo A;
627 } elsif ($self->{next_input_character} == -1) {
628 !!!parse-error (type => 'unclosed tag');
629 $before_leave->();
630 if ($self->{current_token}->{type} eq 'start tag') {
631 $self->{current_token}->{first_start_tag}
632 = not defined $self->{last_emitted_start_tag_name};
633 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
634 } elsif ($self->{current_token}->{type} eq 'end tag') {
635 $self->{content_model_flag} = 'PCDATA'; # MUST
636 if ($self->{current_token}->{attributes}) {
637 !!!parse-error (type => 'end tag attribute');
638 }
639 } else {
640 die "$0: $self->{current_token}->{type}: Unknown token type";
641 }
642 $self->{state} = 'data';
643 # reconsume
644
645 !!!emit ($self->{current_token}); # start tag or end tag
646
647 redo A;
648 } else {
649 $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
650 ## Stay in the state
651 !!!next-input-character;
652 redo A;
653 }
654 } elsif ($self->{state} eq 'after attribute name') {
655 if ($self->{next_input_character} == 0x0009 or # HT
656 $self->{next_input_character} == 0x000A or # LF
657 $self->{next_input_character} == 0x000B or # VT
658 $self->{next_input_character} == 0x000C or # FF
659 $self->{next_input_character} == 0x0020) { # SP
660 ## Stay in the state
661 !!!next-input-character;
662 redo A;
663 } elsif ($self->{next_input_character} == 0x003D) { # =
664 $self->{state} = 'before attribute value';
665 !!!next-input-character;
666 redo A;
667 } elsif ($self->{next_input_character} == 0x003E) { # >
668 if ($self->{current_token}->{type} eq 'start tag') {
669 $self->{current_token}->{first_start_tag}
670 = not defined $self->{last_emitted_start_tag_name};
671 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
672 } elsif ($self->{current_token}->{type} eq 'end tag') {
673 $self->{content_model_flag} = 'PCDATA'; # MUST
674 if ($self->{current_token}->{attributes}) {
675 !!!parse-error (type => 'end tag attribute');
676 }
677 } else {
678 die "$0: $self->{current_token}->{type}: Unknown token type";
679 }
680 $self->{state} = 'data';
681 !!!next-input-character;
682
683 !!!emit ($self->{current_token}); # start tag or end tag
684
685 redo A;
686 } elsif (0x0041 <= $self->{next_input_character} and
687 $self->{next_input_character} <= 0x005A) { # A..Z
688 $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
689 value => ''};
690 $self->{state} = 'attribute name';
691 !!!next-input-character;
692 redo A;
693 } elsif ($self->{next_input_character} == 0x002F) { # /
694 !!!next-input-character;
695 if ($self->{next_input_character} == 0x003E and # >
696 $self->{current_token}->{type} eq 'start tag' and
697 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
698 # permitted slash
699 #
700 } else {
701 !!!parse-error (type => 'nestc');
702 }
703 $self->{state} = 'before attribute name';
704 # next-input-character is already done
705 redo A;
706 } elsif ($self->{next_input_character} == -1) {
707 !!!parse-error (type => 'unclosed tag');
708 if ($self->{current_token}->{type} eq 'start tag') {
709 $self->{current_token}->{first_start_tag}
710 = not defined $self->{last_emitted_start_tag_name};
711 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
712 } elsif ($self->{current_token}->{type} eq 'end tag') {
713 $self->{content_model_flag} = 'PCDATA'; # MUST
714 if ($self->{current_token}->{attributes}) {
715 !!!parse-error (type => 'end tag attribute');
716 }
717 } else {
718 die "$0: $self->{current_token}->{type}: Unknown token type";
719 }
720 $self->{state} = 'data';
721 # reconsume
722
723 !!!emit ($self->{current_token}); # start tag or end tag
724
725 redo A;
726 } else {
727 $self->{current_attribute} = {name => chr ($self->{next_input_character}),
728 value => ''};
729 $self->{state} = 'attribute name';
730 !!!next-input-character;
731 redo A;
732 }
733 } elsif ($self->{state} eq 'before attribute value') {
734 if ($self->{next_input_character} == 0x0009 or # HT
735 $self->{next_input_character} == 0x000A or # LF
736 $self->{next_input_character} == 0x000B or # VT
737 $self->{next_input_character} == 0x000C or # FF
738 $self->{next_input_character} == 0x0020) { # SP
739 ## Stay in the state
740 !!!next-input-character;
741 redo A;
742 } elsif ($self->{next_input_character} == 0x0022) { # "
743 $self->{state} = 'attribute value (double-quoted)';
744 !!!next-input-character;
745 redo A;
746 } elsif ($self->{next_input_character} == 0x0026) { # &
747 $self->{state} = 'attribute value (unquoted)';
748 ## reconsume
749 redo A;
750 } elsif ($self->{next_input_character} == 0x0027) { # '
751 $self->{state} = 'attribute value (single-quoted)';
752 !!!next-input-character;
753 redo A;
754 } elsif ($self->{next_input_character} == 0x003E) { # >
755 if ($self->{current_token}->{type} eq 'start tag') {
756 $self->{current_token}->{first_start_tag}
757 = not defined $self->{last_emitted_start_tag_name};
758 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
759 } elsif ($self->{current_token}->{type} eq 'end tag') {
760 $self->{content_model_flag} = 'PCDATA'; # MUST
761 if ($self->{current_token}->{attributes}) {
762 !!!parse-error (type => 'end tag attribute');
763 }
764 } else {
765 die "$0: $self->{current_token}->{type}: Unknown token type";
766 }
767 $self->{state} = 'data';
768 !!!next-input-character;
769
770 !!!emit ($self->{current_token}); # start tag or end tag
771
772 redo A;
773 } elsif ($self->{next_input_character} == -1) {
774 !!!parse-error (type => 'unclosed tag');
775 if ($self->{current_token}->{type} eq 'start tag') {
776 $self->{current_token}->{first_start_tag}
777 = not defined $self->{last_emitted_start_tag_name};
778 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
779 } elsif ($self->{current_token}->{type} eq 'end tag') {
780 $self->{content_model_flag} = 'PCDATA'; # MUST
781 if ($self->{current_token}->{attributes}) {
782 !!!parse-error (type => 'end tag attribute');
783 }
784 } else {
785 die "$0: $self->{current_token}->{type}: Unknown token type";
786 }
787 $self->{state} = 'data';
788 ## reconsume
789
790 !!!emit ($self->{current_token}); # start tag or end tag
791
792 redo A;
793 } else {
794 $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
795 $self->{state} = 'attribute value (unquoted)';
796 !!!next-input-character;
797 redo A;
798 }
799 } elsif ($self->{state} eq 'attribute value (double-quoted)') {
800 if ($self->{next_input_character} == 0x0022) { # "
801 $self->{state} = 'before attribute name';
802 !!!next-input-character;
803 redo A;
804 } elsif ($self->{next_input_character} == 0x0026) { # &
805 $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
806 $self->{state} = 'entity in attribute value';
807 !!!next-input-character;
808 redo A;
809 } elsif ($self->{next_input_character} == -1) {
810 !!!parse-error (type => 'unclosed attribute value');
811 if ($self->{current_token}->{type} eq 'start tag') {
812 $self->{current_token}->{first_start_tag}
813 = not defined $self->{last_emitted_start_tag_name};
814 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
815 } elsif ($self->{current_token}->{type} eq 'end tag') {
816 $self->{content_model_flag} = 'PCDATA'; # MUST
817 if ($self->{current_token}->{attributes}) {
818 !!!parse-error (type => 'end tag attribute');
819 }
820 } else {
821 die "$0: $self->{current_token}->{type}: Unknown token type";
822 }
823 $self->{state} = 'data';
824 ## reconsume
825
826 !!!emit ($self->{current_token}); # start tag or end tag
827
828 redo A;
829 } else {
830 $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
831 ## Stay in the state
832 !!!next-input-character;
833 redo A;
834 }
835 } elsif ($self->{state} eq 'attribute value (single-quoted)') {
836 if ($self->{next_input_character} == 0x0027) { # '
837 $self->{state} = 'before attribute name';
838 !!!next-input-character;
839 redo A;
840 } elsif ($self->{next_input_character} == 0x0026) { # &
841 $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
842 $self->{state} = 'entity in attribute value';
843 !!!next-input-character;
844 redo A;
845 } elsif ($self->{next_input_character} == -1) {
846 !!!parse-error (type => 'unclosed attribute value');
847 if ($self->{current_token}->{type} eq 'start tag') {
848 $self->{current_token}->{first_start_tag}
849 = not defined $self->{last_emitted_start_tag_name};
850 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
851 } elsif ($self->{current_token}->{type} eq 'end tag') {
852 $self->{content_model_flag} = 'PCDATA'; # MUST
853 if ($self->{current_token}->{attributes}) {
854 !!!parse-error (type => 'end tag attribute');
855 }
856 } else {
857 die "$0: $self->{current_token}->{type}: Unknown token type";
858 }
859 $self->{state} = 'data';
860 ## reconsume
861
862 !!!emit ($self->{current_token}); # start tag or end tag
863
864 redo A;
865 } else {
866 $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
867 ## Stay in the state
868 !!!next-input-character;
869 redo A;
870 }
871 } elsif ($self->{state} eq 'attribute value (unquoted)') {
872 if ($self->{next_input_character} == 0x0009 or # HT
873 $self->{next_input_character} == 0x000A or # LF
874 $self->{next_input_character} == 0x000B or # HT
875 $self->{next_input_character} == 0x000C or # FF
876 $self->{next_input_character} == 0x0020) { # SP
877 $self->{state} = 'before attribute name';
878 !!!next-input-character;
879 redo A;
880 } elsif ($self->{next_input_character} == 0x0026) { # &
881 $self->{last_attribute_value_state} = 'attribute value (unquoted)';
882 $self->{state} = 'entity in attribute value';
883 !!!next-input-character;
884 redo A;
885 } elsif ($self->{next_input_character} == 0x003E) { # >
886 if ($self->{current_token}->{type} eq 'start tag') {
887 $self->{current_token}->{first_start_tag}
888 = not defined $self->{last_emitted_start_tag_name};
889 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
890 } elsif ($self->{current_token}->{type} eq 'end tag') {
891 $self->{content_model_flag} = 'PCDATA'; # MUST
892 if ($self->{current_token}->{attributes}) {
893 !!!parse-error (type => 'end tag attribute');
894 }
895 } else {
896 die "$0: $self->{current_token}->{type}: Unknown token type";
897 }
898 $self->{state} = 'data';
899 !!!next-input-character;
900
901 !!!emit ($self->{current_token}); # start tag or end tag
902
903 redo A;
904 } elsif ($self->{next_input_character} == -1) {
905 !!!parse-error (type => 'unclosed tag');
906 if ($self->{current_token}->{type} eq 'start tag') {
907 $self->{current_token}->{first_start_tag}
908 = not defined $self->{last_emitted_start_tag_name};
909 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
910 } elsif ($self->{current_token}->{type} eq 'end tag') {
911 $self->{content_model_flag} = 'PCDATA'; # MUST
912 if ($self->{current_token}->{attributes}) {
913 !!!parse-error (type => 'end tag attribute');
914 }
915 } else {
916 die "$0: $self->{current_token}->{type}: Unknown token type";
917 }
918 $self->{state} = 'data';
919 ## reconsume
920
921 !!!emit ($self->{current_token}); # start tag or end tag
922
923 redo A;
924 } else {
925 $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
926 ## Stay in the state
927 !!!next-input-character;
928 redo A;
929 }
930 } elsif ($self->{state} eq 'entity in attribute value') {
931 my $token = $self->_tokenize_attempt_to_consume_an_entity (1);
932
933 unless (defined $token) {
934 $self->{current_attribute}->{value} .= '&';
935 } else {
936 $self->{current_attribute}->{value} .= $token->{data};
937 ## ISSUE: spec says "append the returned character token to the current attribute's value"
938 }
939
940 $self->{state} = $self->{last_attribute_value_state};
941 # next-input-character is already done
942 redo A;
943 } elsif ($self->{state} eq 'bogus comment') {
944 ## (only happen if PCDATA state)
945
946 my $token = {type => 'comment', data => ''};
947
948 BC: {
949 if ($self->{next_input_character} == 0x003E) { # >
950 $self->{state} = 'data';
951 !!!next-input-character;
952
953 !!!emit ($token);
954
955 redo A;
956 } elsif ($self->{next_input_character} == -1) {
957 $self->{state} = 'data';
958 ## reconsume
959
960 !!!emit ($token);
961
962 redo A;
963 } else {
964 $token->{data} .= chr ($self->{next_input_character});
965 !!!next-input-character;
966 redo BC;
967 }
968 } # BC
969 } elsif ($self->{state} eq 'markup declaration open') {
970 ## (only happen if PCDATA state)
971
972 my @next_char;
973 push @next_char, $self->{next_input_character};
974
975 if ($self->{next_input_character} == 0x002D) { # -
976 !!!next-input-character;
977 push @next_char, $self->{next_input_character};
978 if ($self->{next_input_character} == 0x002D) { # -
979 $self->{current_token} = {type => 'comment', data => ''};
980 $self->{state} = 'comment start';
981 !!!next-input-character;
982 redo A;
983 }
984 } elsif ($self->{next_input_character} == 0x0044 or # D
985 $self->{next_input_character} == 0x0064) { # d
986 !!!next-input-character;
987 push @next_char, $self->{next_input_character};
988 if ($self->{next_input_character} == 0x004F or # O
989 $self->{next_input_character} == 0x006F) { # o
990 !!!next-input-character;
991 push @next_char, $self->{next_input_character};
992 if ($self->{next_input_character} == 0x0043 or # C
993 $self->{next_input_character} == 0x0063) { # c
994 !!!next-input-character;
995 push @next_char, $self->{next_input_character};
996 if ($self->{next_input_character} == 0x0054 or # T
997 $self->{next_input_character} == 0x0074) { # t
998 !!!next-input-character;
999 push @next_char, $self->{next_input_character};
1000 if ($self->{next_input_character} == 0x0059 or # Y
1001 $self->{next_input_character} == 0x0079) { # y
1002 !!!next-input-character;
1003 push @next_char, $self->{next_input_character};
1004 if ($self->{next_input_character} == 0x0050 or # P
1005 $self->{next_input_character} == 0x0070) { # p
1006 !!!next-input-character;
1007 push @next_char, $self->{next_input_character};
1008 if ($self->{next_input_character} == 0x0045 or # E
1009 $self->{next_input_character} == 0x0065) { # e
1010 ## ISSUE: What a stupid code this is!
1011 $self->{state} = 'DOCTYPE';
1012 !!!next-input-character;
1013 redo A;
1014 }
1015 }
1016 }
1017 }
1018 }
1019 }
1020 }
1021
1022 !!!parse-error (type => 'bogus comment');
1023 $self->{next_input_character} = shift @next_char;
1024 !!!back-next-input-character (@next_char);
1025 $self->{state} = 'bogus comment';
1026 redo A;
1027
1028 ## ISSUE: typos in spec: chacacters, is is a parse error
1029 ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1030 } elsif ($self->{state} eq 'comment start') {
1031 if ($self->{next_input_character} == 0x002D) { # -
1032 $self->{state} = 'comment start dash';
1033 !!!next-input-character;
1034 redo A;
1035 } elsif ($self->{next_input_character} == 0x003E) { # >
1036 !!!parse-error (type => 'bogus comment');
1037 $self->{state} = 'data';
1038 !!!next-input-character;
1039
1040 !!!emit ($self->{current_token}); # comment
1041
1042 redo A;
1043 } elsif ($self->{next_input_character} == -1) {
1044 !!!parse-error (type => 'unclosed comment');
1045 $self->{state} = 'data';
1046 ## reconsume
1047
1048 !!!emit ($self->{current_token}); # comment
1049
1050 redo A;
1051 } else {
1052 $self->{current_token}->{data} # comment
1053 .= chr ($self->{next_input_character});
1054 $self->{state} = 'comment';
1055 !!!next-input-character;
1056 redo A;
1057 }
1058 } elsif ($self->{state} eq 'comment start dash') {
1059 if ($self->{next_input_character} == 0x002D) { # -
1060 $self->{state} = 'comment end';
1061 !!!next-input-character;
1062 redo A;
1063 } elsif ($self->{next_input_character} == 0x003E) { # >
1064 !!!parse-error (type => 'bogus comment');
1065 $self->{state} = 'data';
1066 !!!next-input-character;
1067
1068 !!!emit ($self->{current_token}); # comment
1069
1070 redo A;
1071 } elsif ($self->{next_input_character} == -1) {
1072 !!!parse-error (type => 'unclosed comment');
1073 $self->{state} = 'data';
1074 ## reconsume
1075
1076 !!!emit ($self->{current_token}); # comment
1077
1078 redo A;
1079 } else {
1080 $self->{current_token}->{data} # comment
1081 .= chr ($self->{next_input_character});
1082 $self->{state} = 'comment';
1083 !!!next-input-character;
1084 redo A;
1085 }
1086 } elsif ($self->{state} eq 'comment') {
1087 if ($self->{next_input_character} == 0x002D) { # -
1088 $self->{state} = 'comment end dash';
1089 !!!next-input-character;
1090 redo A;
1091 } elsif ($self->{next_input_character} == -1) {
1092 !!!parse-error (type => 'unclosed comment');
1093 $self->{state} = 'data';
1094 ## reconsume
1095
1096 !!!emit ($self->{current_token}); # comment
1097
1098 redo A;
1099 } else {
1100 $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1101 ## Stay in the state
1102 !!!next-input-character;
1103 redo A;
1104 }
1105 } elsif ($self->{state} eq 'comment end dash') {
1106 if ($self->{next_input_character} == 0x002D) { # -
1107 $self->{state} = 'comment end';
1108 !!!next-input-character;
1109 redo A;
1110 } elsif ($self->{next_input_character} == -1) {
1111 !!!parse-error (type => 'unclosed comment');
1112 $self->{state} = 'data';
1113 ## reconsume
1114
1115 !!!emit ($self->{current_token}); # comment
1116
1117 redo A;
1118 } else {
1119 $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1120 $self->{state} = 'comment';
1121 !!!next-input-character;
1122 redo A;
1123 }
1124 } elsif ($self->{state} eq 'comment end') {
1125 if ($self->{next_input_character} == 0x003E) { # >
1126 $self->{state} = 'data';
1127 !!!next-input-character;
1128
1129 !!!emit ($self->{current_token}); # comment
1130
1131 redo A;
1132 } elsif ($self->{next_input_character} == 0x002D) { # -
1133 !!!parse-error (type => 'dash in comment');
1134 $self->{current_token}->{data} .= '-'; # comment
1135 ## Stay in the state
1136 !!!next-input-character;
1137 redo A;
1138 } elsif ($self->{next_input_character} == -1) {
1139 !!!parse-error (type => 'unclosed comment');
1140 $self->{state} = 'data';
1141 ## reconsume
1142
1143 !!!emit ($self->{current_token}); # comment
1144
1145 redo A;
1146 } else {
1147 !!!parse-error (type => 'dash in comment');
1148 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1149 $self->{state} = 'comment';
1150 !!!next-input-character;
1151 redo A;
1152 }
1153 } elsif ($self->{state} eq 'DOCTYPE') {
1154 if ($self->{next_input_character} == 0x0009 or # HT
1155 $self->{next_input_character} == 0x000A or # LF
1156 $self->{next_input_character} == 0x000B or # VT
1157 $self->{next_input_character} == 0x000C or # FF
1158 $self->{next_input_character} == 0x0020) { # SP
1159 $self->{state} = 'before DOCTYPE name';
1160 !!!next-input-character;
1161 redo A;
1162 } else {
1163 !!!parse-error (type => 'no space before DOCTYPE name');
1164 $self->{state} = 'before DOCTYPE name';
1165 ## reconsume
1166 redo A;
1167 }
1168 } elsif ($self->{state} eq 'before DOCTYPE name') {
1169 if ($self->{next_input_character} == 0x0009 or # HT
1170 $self->{next_input_character} == 0x000A or # LF
1171 $self->{next_input_character} == 0x000B or # VT
1172 $self->{next_input_character} == 0x000C or # FF
1173 $self->{next_input_character} == 0x0020) { # SP
1174 ## Stay in the state
1175 !!!next-input-character;
1176 redo A;
1177 } elsif ($self->{next_input_character} == 0x003E) { # >
1178 !!!parse-error (type => 'no DOCTYPE name');
1179 $self->{state} = 'data';
1180 !!!next-input-character;
1181
1182 !!!emit ({type => 'DOCTYPE'}); # incorrect
1183
1184 redo A;
1185 } elsif ($self->{next_input_character} == -1) {
1186 !!!parse-error (type => 'no DOCTYPE name');
1187 $self->{state} = 'data';
1188 ## reconsume
1189
1190 !!!emit ({type => 'DOCTYPE'}); # incorrect
1191
1192 redo A;
1193 } else {
1194 $self->{current_token}
1195 = {type => 'DOCTYPE',
1196 name => chr ($self->{next_input_character}),
1197 correct => 1};
1198 ## ISSUE: "Set the token's name name to the" in the spec
1199 $self->{state} = 'DOCTYPE name';
1200 !!!next-input-character;
1201 redo A;
1202 }
1203 } elsif ($self->{state} eq 'DOCTYPE name') {
1204 ## ISSUE: Redundant "First," in the spec.
1205 if ($self->{next_input_character} == 0x0009 or # HT
1206 $self->{next_input_character} == 0x000A or # LF
1207 $self->{next_input_character} == 0x000B or # VT
1208 $self->{next_input_character} == 0x000C or # FF
1209 $self->{next_input_character} == 0x0020) { # SP
1210 $self->{state} = 'after DOCTYPE name';
1211 !!!next-input-character;
1212 redo A;
1213 } elsif ($self->{next_input_character} == 0x003E) { # >
1214 $self->{state} = 'data';
1215 !!!next-input-character;
1216
1217 !!!emit ($self->{current_token}); # DOCTYPE
1218
1219 redo A;
1220 } elsif ($self->{next_input_character} == -1) {
1221 !!!parse-error (type => 'unclosed DOCTYPE');
1222 $self->{state} = 'data';
1223 ## reconsume
1224
1225 delete $self->{current_token}->{correct};
1226 !!!emit ($self->{current_token}); # DOCTYPE
1227
1228 redo A;
1229 } else {
1230 $self->{current_token}->{name}
1231 .= chr ($self->{next_input_character}); # DOCTYPE
1232 ## Stay in the state
1233 !!!next-input-character;
1234 redo A;
1235 }
1236 } elsif ($self->{state} eq 'after DOCTYPE name') {
1237 if ($self->{next_input_character} == 0x0009 or # HT
1238 $self->{next_input_character} == 0x000A or # LF
1239 $self->{next_input_character} == 0x000B or # VT
1240 $self->{next_input_character} == 0x000C or # FF
1241 $self->{next_input_character} == 0x0020) { # SP
1242 ## Stay in the state
1243 !!!next-input-character;
1244 redo A;
1245 } elsif ($self->{next_input_character} == 0x003E) { # >
1246 $self->{state} = 'data';
1247 !!!next-input-character;
1248
1249 !!!emit ($self->{current_token}); # DOCTYPE
1250
1251 redo A;
1252 } elsif ($self->{next_input_character} == -1) {
1253 !!!parse-error (type => 'unclosed DOCTYPE');
1254 $self->{state} = 'data';
1255 ## reconsume
1256
1257 delete $self->{current_token}->{correct};
1258 !!!emit ($self->{current_token}); # DOCTYPE
1259
1260 redo A;
1261 } elsif ($self->{next_input_character} == 0x0050 or # P
1262 $self->{next_input_character} == 0x0070) { # p
1263 !!!next-input-character;
1264 if ($self->{next_input_character} == 0x0055 or # U
1265 $self->{next_input_character} == 0x0075) { # u
1266 !!!next-input-character;
1267 if ($self->{next_input_character} == 0x0042 or # B
1268 $self->{next_input_character} == 0x0062) { # b
1269 !!!next-input-character;
1270 if ($self->{next_input_character} == 0x004C or # L
1271 $self->{next_input_character} == 0x006C) { # l
1272 !!!next-input-character;
1273 if ($self->{next_input_character} == 0x0049 or # I
1274 $self->{next_input_character} == 0x0069) { # i
1275 !!!next-input-character;
1276 if ($self->{next_input_character} == 0x0043 or # C
1277 $self->{next_input_character} == 0x0063) { # c
1278 $self->{state} = 'before DOCTYPE public identifier';
1279 !!!next-input-character;
1280 redo A;
1281 }
1282 }
1283 }
1284 }
1285 }
1286
1287 #
1288 } elsif ($self->{next_input_character} == 0x0053 or # S
1289 $self->{next_input_character} == 0x0073) { # s
1290 !!!next-input-character;
1291 if ($self->{next_input_character} == 0x0059 or # Y
1292 $self->{next_input_character} == 0x0079) { # y
1293 !!!next-input-character;
1294 if ($self->{next_input_character} == 0x0053 or # S
1295 $self->{next_input_character} == 0x0073) { # s
1296 !!!next-input-character;
1297 if ($self->{next_input_character} == 0x0054 or # T
1298 $self->{next_input_character} == 0x0074) { # t
1299 !!!next-input-character;
1300 if ($self->{next_input_character} == 0x0045 or # E
1301 $self->{next_input_character} == 0x0065) { # e
1302 !!!next-input-character;
1303 if ($self->{next_input_character} == 0x004D or # M
1304 $self->{next_input_character} == 0x006D) { # m
1305 $self->{state} = 'before DOCTYPE system identifier';
1306 !!!next-input-character;
1307 redo A;
1308 }
1309 }
1310 }
1311 }
1312 }
1313
1314 #
1315 } else {
1316 !!!next-input-character;
1317 #
1318 }
1319
1320 !!!parse-error (type => 'string after DOCTYPE name');
1321 $self->{state} = 'bogus DOCTYPE';
1322 # next-input-character is already done
1323 redo A;
1324 } elsif ($self->{state} eq 'before DOCTYPE public identifier') {
1325 if ({
1326 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1327 #0x000D => 1, # HT, LF, VT, FF, SP, CR
1328 }->{$self->{next_input_character}}) {
1329 ## Stay in the state
1330 !!!next-input-character;
1331 redo A;
1332 } elsif ($self->{next_input_character} eq 0x0022) { # "
1333 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1334 $self->{state} = 'DOCTYPE public identifier (double-quoted)';
1335 !!!next-input-character;
1336 redo A;
1337 } elsif ($self->{next_input_character} eq 0x0027) { # '
1338 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1339 $self->{state} = 'DOCTYPE public identifier (single-quoted)';
1340 !!!next-input-character;
1341 redo A;
1342 } elsif ($self->{next_input_character} eq 0x003E) { # >
1343 !!!parse-error (type => 'no PUBLIC literal');
1344
1345 $self->{state} = 'data';
1346 !!!next-input-character;
1347
1348 delete $self->{current_token}->{correct};
1349 !!!emit ($self->{current_token}); # DOCTYPE
1350
1351 redo A;
1352 } elsif ($self->{next_input_character} == -1) {
1353 !!!parse-error (type => 'unclosed DOCTYPE');
1354
1355 $self->{state} = 'data';
1356 ## reconsume
1357
1358 delete $self->{current_token}->{correct};
1359 !!!emit ($self->{current_token}); # DOCTYPE
1360
1361 redo A;
1362 } else {
1363 !!!parse-error (type => 'string after PUBLIC');
1364 $self->{state} = 'bogus DOCTYPE';
1365 !!!next-input-character;
1366 redo A;
1367 }
1368 } elsif ($self->{state} eq 'DOCTYPE public identifier (double-quoted)') {
1369 if ($self->{next_input_character} == 0x0022) { # "
1370 $self->{state} = 'after DOCTYPE public identifier';
1371 !!!next-input-character;
1372 redo A;
1373 } elsif ($self->{next_input_character} == -1) {
1374 !!!parse-error (type => 'unclosed PUBLIC literal');
1375
1376 $self->{state} = 'data';
1377 ## reconsume
1378
1379 delete $self->{current_token}->{correct};
1380 !!!emit ($self->{current_token}); # DOCTYPE
1381
1382 redo A;
1383 } else {
1384 $self->{current_token}->{public_identifier} # DOCTYPE
1385 .= chr $self->{next_input_character};
1386 ## Stay in the state
1387 !!!next-input-character;
1388 redo A;
1389 }
1390 } elsif ($self->{state} eq 'DOCTYPE public identifier (single-quoted)') {
1391 if ($self->{next_input_character} == 0x0027) { # '
1392 $self->{state} = 'after DOCTYPE public identifier';
1393 !!!next-input-character;
1394 redo A;
1395 } elsif ($self->{next_input_character} == -1) {
1396 !!!parse-error (type => 'unclosed PUBLIC literal');
1397
1398 $self->{state} = 'data';
1399 ## reconsume
1400
1401 delete $self->{current_token}->{correct};
1402 !!!emit ($self->{current_token}); # DOCTYPE
1403
1404 redo A;
1405 } else {
1406 $self->{current_token}->{public_identifier} # DOCTYPE
1407 .= chr $self->{next_input_character};
1408 ## Stay in the state
1409 !!!next-input-character;
1410 redo A;
1411 }
1412 } elsif ($self->{state} eq 'after DOCTYPE public identifier') {
1413 if ({
1414 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1415 #0x000D => 1, # HT, LF, VT, FF, SP, CR
1416 }->{$self->{next_input_character}}) {
1417 ## Stay in the state
1418 !!!next-input-character;
1419 redo A;
1420 } elsif ($self->{next_input_character} == 0x0022) { # "
1421 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1422 $self->{state} = 'DOCTYPE system identifier (double-quoted)';
1423 !!!next-input-character;
1424 redo A;
1425 } elsif ($self->{next_input_character} == 0x0027) { # '
1426 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1427 $self->{state} = 'DOCTYPE system identifier (single-quoted)';
1428 !!!next-input-character;
1429 redo A;
1430 } elsif ($self->{next_input_character} == 0x003E) { # >
1431 $self->{state} = 'data';
1432 !!!next-input-character;
1433
1434 !!!emit ($self->{current_token}); # DOCTYPE
1435
1436 redo A;
1437 } elsif ($self->{next_input_character} == -1) {
1438 !!!parse-error (type => 'unclosed DOCTYPE');
1439
1440 $self->{state} = 'data';
1441 ## reconsume
1442
1443 delete $self->{current_token}->{correct};
1444 !!!emit ($self->{current_token}); # DOCTYPE
1445
1446 redo A;
1447 } else {
1448 !!!parse-error (type => 'string after PUBLIC literal');
1449 $self->{state} = 'bogus DOCTYPE';
1450 !!!next-input-character;
1451 redo A;
1452 }
1453 } elsif ($self->{state} eq 'before DOCTYPE system identifier') {
1454 if ({
1455 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1456 #0x000D => 1, # HT, LF, VT, FF, SP, CR
1457 }->{$self->{next_input_character}}) {
1458 ## Stay in the state
1459 !!!next-input-character;
1460 redo A;
1461 } elsif ($self->{next_input_character} == 0x0022) { # "
1462 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1463 $self->{state} = 'DOCTYPE system identifier (double-quoted)';
1464 !!!next-input-character;
1465 redo A;
1466 } elsif ($self->{next_input_character} == 0x0027) { # '
1467 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1468 $self->{state} = 'DOCTYPE system identifier (single-quoted)';
1469 !!!next-input-character;
1470 redo A;
1471 } elsif ($self->{next_input_character} == 0x003E) { # >
1472 !!!parse-error (type => 'no SYSTEM literal');
1473 $self->{state} = 'data';
1474 !!!next-input-character;
1475
1476 delete $self->{current_token}->{correct};
1477 !!!emit ($self->{current_token}); # DOCTYPE
1478
1479 redo A;
1480 } elsif ($self->{next_input_character} == -1) {
1481 !!!parse-error (type => 'unclosed DOCTYPE');
1482
1483 $self->{state} = 'data';
1484 ## reconsume
1485
1486 delete $self->{current_token}->{correct};
1487 !!!emit ($self->{current_token}); # DOCTYPE
1488
1489 redo A;
1490 } else {
1491 !!!parse-error (type => 'string after SYSTEM');
1492 $self->{state} = 'bogus DOCTYPE';
1493 !!!next-input-character;
1494 redo A;
1495 }
1496 } elsif ($self->{state} eq 'DOCTYPE system identifier (double-quoted)') {
1497 if ($self->{next_input_character} == 0x0022) { # "
1498 $self->{state} = 'after DOCTYPE system identifier';
1499 !!!next-input-character;
1500 redo A;
1501 } elsif ($self->{next_input_character} == -1) {
1502 !!!parse-error (type => 'unclosed SYSTEM literal');
1503
1504 $self->{state} = 'data';
1505 ## reconsume
1506
1507 delete $self->{current_token}->{correct};
1508 !!!emit ($self->{current_token}); # DOCTYPE
1509
1510 redo A;
1511 } else {
1512 $self->{current_token}->{system_identifier} # DOCTYPE
1513 .= chr $self->{next_input_character};
1514 ## Stay in the state
1515 !!!next-input-character;
1516 redo A;
1517 }
1518 } elsif ($self->{state} eq 'DOCTYPE system identifier (single-quoted)') {
1519 if ($self->{next_input_character} == 0x0027) { # '
1520 $self->{state} = 'after DOCTYPE system identifier';
1521 !!!next-input-character;
1522 redo A;
1523 } elsif ($self->{next_input_character} == -1) {
1524 !!!parse-error (type => 'unclosed SYSTEM literal');
1525
1526 $self->{state} = 'data';
1527 ## reconsume
1528
1529 delete $self->{current_token}->{correct};
1530 !!!emit ($self->{current_token}); # DOCTYPE
1531
1532 redo A;
1533 } else {
1534 $self->{current_token}->{system_identifier} # DOCTYPE
1535 .= chr $self->{next_input_character};
1536 ## Stay in the state
1537 !!!next-input-character;
1538 redo A;
1539 }
1540 } elsif ($self->{state} eq 'after DOCTYPE system identifier') {
1541 if ({
1542 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1543 #0x000D => 1, # HT, LF, VT, FF, SP, CR
1544 }->{$self->{next_input_character}}) {
1545 ## Stay in the state
1546 !!!next-input-character;
1547 redo A;
1548 } elsif ($self->{next_input_character} == 0x003E) { # >
1549 $self->{state} = 'data';
1550 !!!next-input-character;
1551
1552 !!!emit ($self->{current_token}); # DOCTYPE
1553
1554 redo A;
1555 } elsif ($self->{next_input_character} == -1) {
1556 !!!parse-error (type => 'unclosed DOCTYPE');
1557
1558 $self->{state} = 'data';
1559 ## reconsume
1560
1561 delete $self->{current_token}->{correct};
1562 !!!emit ($self->{current_token}); # DOCTYPE
1563
1564 redo A;
1565 } else {
1566 !!!parse-error (type => 'string after SYSTEM literal');
1567 $self->{state} = 'bogus DOCTYPE';
1568 !!!next-input-character;
1569 redo A;
1570 }
1571 } elsif ($self->{state} eq 'bogus DOCTYPE') {
1572 if ($self->{next_input_character} == 0x003E) { # >
1573 $self->{state} = 'data';
1574 !!!next-input-character;
1575
1576 delete $self->{current_token}->{correct};
1577 !!!emit ($self->{current_token}); # DOCTYPE
1578
1579 redo A;
1580 } elsif ($self->{next_input_character} == -1) {
1581 !!!parse-error (type => 'unclosed DOCTYPE');
1582 $self->{state} = 'data';
1583 ## reconsume
1584
1585 delete $self->{current_token}->{correct};
1586 !!!emit ($self->{current_token}); # DOCTYPE
1587
1588 redo A;
1589 } else {
1590 ## Stay in the state
1591 !!!next-input-character;
1592 redo A;
1593 }
1594 } else {
1595 die "$0: $self->{state}: Unknown state";
1596 }
1597 } # A
1598
1599 die "$0: _get_next_token: unexpected case";
1600 } # _get_next_token
1601
1602 sub _tokenize_attempt_to_consume_an_entity ($$) {
1603 my ($self, $in_attr) = @_;
1604
1605 if ({
1606 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
1607 0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
1608 }->{$self->{next_input_character}}) {
1609 ## Don't consume
1610 ## No error
1611 return undef;
1612 } elsif ($self->{next_input_character} == 0x0023) { # #
1613 !!!next-input-character;
1614 if ($self->{next_input_character} == 0x0078 or # x
1615 $self->{next_input_character} == 0x0058) { # X
1616 my $code;
1617 X: {
1618 my $x_char = $self->{next_input_character};
1619 !!!next-input-character;
1620 if (0x0030 <= $self->{next_input_character} and
1621 $self->{next_input_character} <= 0x0039) { # 0..9
1622 $code ||= 0;
1623 $code *= 0x10;
1624 $code += $self->{next_input_character} - 0x0030;
1625 redo X;
1626 } elsif (0x0061 <= $self->{next_input_character} and
1627 $self->{next_input_character} <= 0x0066) { # a..f
1628 $code ||= 0;
1629 $code *= 0x10;
1630 $code += $self->{next_input_character} - 0x0060 + 9;
1631 redo X;
1632 } elsif (0x0041 <= $self->{next_input_character} and
1633 $self->{next_input_character} <= 0x0046) { # A..F
1634 $code ||= 0;
1635 $code *= 0x10;
1636 $code += $self->{next_input_character} - 0x0040 + 9;
1637 redo X;
1638 } elsif (not defined $code) { # no hexadecimal digit
1639 !!!parse-error (type => 'bare hcro');
1640 $self->{next_input_character} = 0x0023; # #
1641 !!!back-next-input-character ($x_char);
1642 return undef;
1643 } elsif ($self->{next_input_character} == 0x003B) { # ;
1644 !!!next-input-character;
1645 } else {
1646 !!!parse-error (type => 'no refc');
1647 }
1648
1649 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
1650 !!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code);
1651 $code = 0xFFFD;
1652 } elsif ($code > 0x10FFFF) {
1653 !!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code);
1654 $code = 0xFFFD;
1655 } elsif ($code == 0x000D) {
1656 !!!parse-error (type => 'CR character reference');
1657 $code = 0x000A;
1658 } elsif (0x80 <= $code and $code <= 0x9F) {
1659 !!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code);
1660 $code = $c1_entity_char->{$code};
1661 }
1662
1663 return {type => 'character', data => chr $code};
1664 } # X
1665 } elsif (0x0030 <= $self->{next_input_character} and
1666 $self->{next_input_character} <= 0x0039) { # 0..9
1667 my $code = $self->{next_input_character} - 0x0030;
1668 !!!next-input-character;
1669
1670 while (0x0030 <= $self->{next_input_character} and
1671 $self->{next_input_character} <= 0x0039) { # 0..9
1672 $code *= 10;
1673 $code += $self->{next_input_character} - 0x0030;
1674
1675 !!!next-input-character;
1676 }
1677
1678 if ($self->{next_input_character} == 0x003B) { # ;
1679 !!!next-input-character;
1680 } else {
1681 !!!parse-error (type => 'no refc');
1682 }
1683
1684 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
1685 !!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code);
1686 $code = 0xFFFD;
1687 } elsif ($code > 0x10FFFF) {
1688 !!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code);
1689 $code = 0xFFFD;
1690 } elsif ($code == 0x000D) {
1691 !!!parse-error (type => 'CR character reference');
1692 $code = 0x000A;
1693 } elsif (0x80 <= $code and $code <= 0x9F) {
1694 !!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code);
1695 $code = $c1_entity_char->{$code};
1696 }
1697
1698 return {type => 'character', data => chr $code};
1699 } else {
1700 !!!parse-error (type => 'bare nero');
1701 !!!back-next-input-character ($self->{next_input_character});
1702 $self->{next_input_character} = 0x0023; # #
1703 return undef;
1704 }
1705 } elsif ((0x0041 <= $self->{next_input_character} and
1706 $self->{next_input_character} <= 0x005A) or
1707 (0x0061 <= $self->{next_input_character} and
1708 $self->{next_input_character} <= 0x007A)) {
1709 my $entity_name = chr $self->{next_input_character};
1710 !!!next-input-character;
1711
1712 my $value = $entity_name;
1713 my $match;
1714 require Whatpm::_NamedEntityList;
1715 our $EntityChar;
1716
1717 while (length $entity_name < 10 and
1718 ## NOTE: Some number greater than the maximum length of entity name
1719 ((0x0041 <= $self->{next_input_character} and # a
1720 $self->{next_input_character} <= 0x005A) or # x
1721 (0x0061 <= $self->{next_input_character} and # a
1722 $self->{next_input_character} <= 0x007A) or # z
1723 (0x0030 <= $self->{next_input_character} and # 0
1724 $self->{next_input_character} <= 0x0039) or # 9
1725 $self->{next_input_character} == 0x003B)) { # ;
1726 $entity_name .= chr $self->{next_input_character};
1727 if (defined $EntityChar->{$entity_name}) {
1728 if ($self->{next_input_character} == 0x003B) { # ;
1729 $value = $EntityChar->{$entity_name};
1730 $match = 1;
1731 !!!next-input-character;
1732 last;
1733 } elsif (not $in_attr) {
1734 $value = $EntityChar->{$entity_name};
1735 $match = -1;
1736 } else {
1737 $value .= chr $self->{next_input_character};
1738 }
1739 } else {
1740 $value .= chr $self->{next_input_character};
1741 }
1742 !!!next-input-character;
1743 }
1744
1745 if ($match > 0) {
1746 return {type => 'character', data => $value};
1747 } elsif ($match < 0) {
1748 !!!parse-error (type => 'no refc');
1749 return {type => 'character', data => $value};
1750 } else {
1751 !!!parse-error (type => 'bare ero');
1752 ## NOTE: No characters are consumed in the spec.
1753 return {type => 'character', data => '&'.$value};
1754 }
1755 } else {
1756 ## no characters are consumed
1757 !!!parse-error (type => 'bare ero');
1758 return undef;
1759 }
1760 } # _tokenize_attempt_to_consume_an_entity
1761
1762 sub _initialize_tree_constructor ($) {
1763 my $self = shift;
1764 ## NOTE: $self->{document} MUST be specified before this method is called
1765 $self->{document}->strict_error_checking (0);
1766 ## TODO: Turn mutation events off # MUST
1767 ## TODO: Turn loose Document option (manakai extension) on
1768 $self->{document}->manakai_is_html (1); # MUST
1769 } # _initialize_tree_constructor
1770
1771 sub _terminate_tree_constructor ($) {
1772 my $self = shift;
1773 $self->{document}->strict_error_checking (1);
1774 ## TODO: Turn mutation events on
1775 } # _terminate_tree_constructor
1776
1777 ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1778
1779 { # tree construction stage
1780 my $token;
1781
1782 sub _construct_tree ($) {
1783 my ($self) = @_;
1784
1785 ## When an interactive UA render the $self->{document} available
1786 ## to the user, or when it begin accepting user input, are
1787 ## not defined.
1788
1789 ## Append a character: collect it and all subsequent consecutive
1790 ## characters and insert one Text node whose data is concatenation
1791 ## of all those characters. # MUST
1792
1793 !!!next-token;
1794
1795 $self->{insertion_mode} = 'before head';
1796 undef $self->{form_element};
1797 undef $self->{head_element};
1798 $self->{open_elements} = [];
1799 undef $self->{inner_html_node};
1800
1801 $self->_tree_construction_initial; # MUST
1802 $self->_tree_construction_root_element;
1803 $self->_tree_construction_main;
1804 } # _construct_tree
1805
1806 sub _tree_construction_initial ($) {
1807 my $self = shift;
1808 INITIAL: {
1809 if ($token->{type} eq 'DOCTYPE') {
1810 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
1811 ## error, switch to a conformance checking mode for another
1812 ## language.
1813 my $doctype_name = $token->{name};
1814 $doctype_name = '' unless defined $doctype_name;
1815 $doctype_name =~ tr/a-z/A-Z/;
1816 if (not defined $token->{name} or # <!DOCTYPE>
1817 defined $token->{public_identifier} or
1818 defined $token->{system_identifier}) {
1819 !!!parse-error (type => 'not HTML5');
1820 } elsif ($doctype_name ne 'HTML') {
1821 ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
1822 !!!parse-error (type => 'not HTML5');
1823 }
1824
1825 my $doctype = $self->{document}->create_document_type_definition
1826 ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
1827 $doctype->public_id ($token->{public_identifier})
1828 if defined $token->{public_identifier};
1829 $doctype->system_id ($token->{system_identifier})
1830 if defined $token->{system_identifier};
1831 ## NOTE: Other DocumentType attributes are null or empty lists.
1832 ## ISSUE: internalSubset = null??
1833 $self->{document}->append_child ($doctype);
1834
1835 if (not $token->{correct} or $doctype_name ne 'HTML') {
1836 $self->{document}->manakai_compat_mode ('quirks');
1837 } elsif (defined $token->{public_identifier}) {
1838 my $pubid = $token->{public_identifier};
1839 $pubid =~ tr/a-z/A-z/;
1840 if ({
1841 "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
1842 "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
1843 "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
1844 "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
1845 "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
1846 "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
1847 "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
1848 "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
1849 "-//IETF//DTD HTML 2.0//EN" => 1,
1850 "-//IETF//DTD HTML 2.1E//EN" => 1,
1851 "-//IETF//DTD HTML 3.0//EN" => 1,
1852 "-//IETF//DTD HTML 3.0//EN//" => 1,
1853 "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
1854 "-//IETF//DTD HTML 3.2//EN" => 1,
1855 "-//IETF//DTD HTML 3//EN" => 1,
1856 "-//IETF//DTD HTML LEVEL 0//EN" => 1,
1857 "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
1858 "-//IETF//DTD HTML LEVEL 1//EN" => 1,
1859 "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
1860 "-//IETF//DTD HTML LEVEL 2//EN" => 1,
1861 "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
1862 "-//IETF//DTD HTML LEVEL 3//EN" => 1,
1863 "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
1864 "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
1865 "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
1866 "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
1867 "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
1868 "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
1869 "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
1870 "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
1871 "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
1872 "-//IETF//DTD HTML STRICT//EN" => 1,
1873 "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
1874 "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
1875 "-//IETF//DTD HTML//EN" => 1,
1876 "-//IETF//DTD HTML//EN//2.0" => 1,
1877 "-//IETF//DTD HTML//EN//3.0" => 1,
1878 "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
1879 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
1880 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
1881 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
1882 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
1883 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
1884 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
1885 "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
1886 "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
1887 "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
1888 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
1889 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
1890 "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
1891 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
1892 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
1893 "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
1894 "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
1895 "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
1896 "-//W3C//DTD HTML 3.2//EN" => 1,
1897 "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
1898 "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
1899 "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
1900 "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
1901 "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
1902 "-//W3C//DTD W3 HTML//EN" => 1,
1903 "-//W3O//DTD W3 HTML 3.0//EN" => 1,
1904 "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
1905 "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
1906 "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
1907 "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
1908 "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
1909 "HTML" => 1,
1910 }->{$pubid}) {
1911 $self->{document}->manakai_compat_mode ('quirks');
1912 } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
1913 $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
1914 if (defined $token->{system_identifier}) {
1915 $self->{document}->manakai_compat_mode ('quirks');
1916 } else {
1917 $self->{document}->manakai_compat_mode ('limited quirks');
1918 }
1919 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 Frameset//EN" or
1920 $pubid eq "-//W3C//DTD XHTML 1.0 Transitional//EN") {
1921 $self->{document}->manakai_compat_mode ('limited quirks');
1922 }
1923 }
1924 if (defined $token->{system_identifier}) {
1925 my $sysid = $token->{system_identifier};
1926 $sysid =~ tr/A-Z/a-z/;
1927 if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1928 $self->{document}->manakai_compat_mode ('quirks');
1929 }
1930 }
1931
1932 ## Go to the root element phase.
1933 !!!next-token;
1934 return;
1935 } elsif ({
1936 'start tag' => 1,
1937 'end tag' => 1,
1938 'end-of-file' => 1,
1939 }->{$token->{type}}) {
1940 !!!parse-error (type => 'no DOCTYPE');
1941 $self->{document}->manakai_compat_mode ('quirks');
1942 ## Go to the root element phase
1943 ## reprocess
1944 return;
1945 } elsif ($token->{type} eq 'character') {
1946 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
1947 ## Ignore the token
1948
1949 unless (length $token->{data}) {
1950 ## Stay in the phase
1951 !!!next-token;
1952 redo INITIAL;
1953 }
1954 }
1955
1956 !!!parse-error (type => 'no DOCTYPE');
1957 $self->{document}->manakai_compat_mode ('quirks');
1958 ## Go to the root element phase
1959 ## reprocess
1960 return;
1961 } elsif ($token->{type} eq 'comment') {
1962 my $comment = $self->{document}->create_comment ($token->{data});
1963 $self->{document}->append_child ($comment);
1964
1965 ## Stay in the phase.
1966 !!!next-token;
1967 redo INITIAL;
1968 } else {
1969 die "$0: $token->{type}: Unknown token";
1970 }
1971 } # INITIAL
1972 } # _tree_construction_initial
1973
1974 sub _tree_construction_root_element ($) {
1975 my $self = shift;
1976
1977 B: {
1978 if ($token->{type} eq 'DOCTYPE') {
1979 !!!parse-error (type => 'in html:#DOCTYPE');
1980 ## Ignore the token
1981 ## Stay in the phase
1982 !!!next-token;
1983 redo B;
1984 } elsif ($token->{type} eq 'comment') {
1985 my $comment = $self->{document}->create_comment ($token->{data});
1986 $self->{document}->append_child ($comment);
1987 ## Stay in the phase
1988 !!!next-token;
1989 redo B;
1990 } elsif ($token->{type} eq 'character') {
1991 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
1992 ## Ignore the token.
1993
1994 unless (length $token->{data}) {
1995 ## Stay in the phase
1996 !!!next-token;
1997 redo B;
1998 }
1999 }
2000 #
2001 } elsif ({
2002 'start tag' => 1,
2003 'end tag' => 1,
2004 'end-of-file' => 1,
2005 }->{$token->{type}}) {
2006 ## ISSUE: There is an issue in the spec
2007 #
2008 } else {
2009 die "$0: $token->{type}: Unknown token";
2010 }
2011 my $root_element; !!!create-element ($root_element, 'html');
2012 $self->{document}->append_child ($root_element);
2013 push @{$self->{open_elements}}, [$root_element, 'html'];
2014 #$phase = 'main';
2015 ## reprocess
2016 #redo B;
2017 return;
2018 } # B
2019 } # _tree_construction_root_element
2020
2021 sub _reset_insertion_mode ($) {
2022 my $self = shift;
2023
2024 ## Step 1
2025 my $last;
2026
2027 ## Step 2
2028 my $i = -1;
2029 my $node = $self->{open_elements}->[$i];
2030
2031 ## Step 3
2032 S3: {
2033 ## ISSUE: Oops! "If node is the first node in the stack of open
2034 ## elements, then set last to true. If the context element of the
2035 ## HTML fragment parsing algorithm is neither a td element nor a
2036 ## th element, then set node to the context element. (fragment case)":
2037 ## The second "if" is in the scope of the first "if"!?
2038 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
2039 $last = 1;
2040 if (defined $self->{inner_html_node}) {
2041 if ($self->{inner_html_node}->[1] eq 'td' or
2042 $self->{inner_html_node}->[1] eq 'th') {
2043 #
2044 } else {
2045 $node = $self->{inner_html_node};
2046 }
2047 }
2048 }
2049
2050 ## Step 4..13
2051 my $new_mode = {
2052 select => 'in select',
2053 td => 'in cell',
2054 th => 'in cell',
2055 tr => 'in row',
2056 tbody => 'in table body',
2057 thead => 'in table head',
2058 tfoot => 'in table foot',
2059 caption => 'in caption',
2060 colgroup => 'in column group',
2061 table => 'in table',
2062 head => 'in body', # not in head!
2063 body => 'in body',
2064 frameset => 'in frameset',
2065 }->{$node->[1]};
2066 $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2067
2068 ## Step 14
2069 if ($node->[1] eq 'html') {
2070 unless (defined $self->{head_element}) {
2071 $self->{insertion_mode} = 'before head';
2072 } else {
2073 $self->{insertion_mode} = 'after head';
2074 }
2075 return;
2076 }
2077
2078 ## Step 15
2079 $self->{insertion_mode} = 'in body' and return if $last;
2080
2081 ## Step 16
2082 $i--;
2083 $node = $self->{open_elements}->[$i];
2084
2085 ## Step 17
2086 redo S3;
2087 } # S3
2088 } # _reset_insertion_mode
2089
2090 sub _tree_construction_main ($) {
2091 my $self = shift;
2092
2093 my $phase = 'main';
2094
2095 my $active_formatting_elements = [];
2096
2097 my $reconstruct_active_formatting_elements = sub { # MUST
2098 my $insert = shift;
2099
2100 ## Step 1
2101 return unless @$active_formatting_elements;
2102
2103 ## Step 3
2104 my $i = -1;
2105 my $entry = $active_formatting_elements->[$i];
2106
2107 ## Step 2
2108 return if $entry->[0] eq '#marker';
2109 for (@{$self->{open_elements}}) {
2110 if ($entry->[0] eq $_->[0]) {
2111 return;
2112 }
2113 }
2114
2115 S4: {
2116 ## Step 4
2117 last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2118
2119 ## Step 5
2120 $i--;
2121 $entry = $active_formatting_elements->[$i];
2122
2123 ## Step 6
2124 if ($entry->[0] eq '#marker') {
2125 #
2126 } else {
2127 my $in_open_elements;
2128 OE: for (@{$self->{open_elements}}) {
2129 if ($entry->[0] eq $_->[0]) {
2130 $in_open_elements = 1;
2131 last OE;
2132 }
2133 }
2134 if ($in_open_elements) {
2135 #
2136 } else {
2137 redo S4;
2138 }
2139 }
2140
2141 ## Step 7
2142 $i++;
2143 $entry = $active_formatting_elements->[$i];
2144 } # S4
2145
2146 S7: {
2147 ## Step 8
2148 my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2149
2150 ## Step 9
2151 $insert->($clone->[0]);
2152 push @{$self->{open_elements}}, $clone;
2153
2154 ## Step 10
2155 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2156
2157 ## Step 11
2158 unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2159 ## Step 7'
2160 $i++;
2161 $entry = $active_formatting_elements->[$i];
2162
2163 redo S7;
2164 }
2165 } # S7
2166 }; # $reconstruct_active_formatting_elements
2167
2168 my $clear_up_to_marker = sub {
2169 for (reverse 0..$#$active_formatting_elements) {
2170 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2171 splice @$active_formatting_elements, $_;
2172 return;
2173 }
2174 }
2175 }; # $clear_up_to_marker
2176
2177 my $parse_rcdata = sub ($$) {
2178 my ($content_model_flag, $insert) = @_;
2179
2180 ## Step 1
2181 my $start_tag_name = $token->{tag_name};
2182 my $el;
2183 !!!create-element ($el, $start_tag_name, $token->{attributes});
2184
2185 ## Step 2
2186 $insert->($el); # /context node/->append_child ($el)
2187
2188 ## Step 3
2189 $self->{content_model_flag} = $content_model_flag; # CDATA or RCDATA
2190 delete $self->{escape}; # MUST
2191
2192 ## Step 4
2193 my $text = '';
2194 !!!next-token;
2195 while ($token->{type} eq 'character') { # or until stop tokenizing
2196 $text .= $token->{data};
2197 !!!next-token;
2198 }
2199
2200 ## Step 5
2201 if (length $text) {
2202 my $text = $self->{document}->create_text_node ($text);
2203 $el->append_child ($text);
2204 }
2205
2206 ## Step 6
2207 $self->{content_model_flag} = 'PCDATA';
2208
2209 ## Step 7
2210 if ($token->{type} eq 'end tag' and $token->{tag_name} eq $start_tag_name) {
2211 ## Ignore the token
2212 } else {
2213 !!!parse-error (type => 'in '.$content_model_flag.':#'.$token->{type});
2214 }
2215 !!!next-token;
2216 }; # $parse_rcdata
2217
2218 my $script_start_tag = sub ($) {
2219 my $insert = $_[0];
2220 my $script_el;
2221 !!!create-element ($script_el, 'script', $token->{attributes});
2222 ## TODO: mark as "parser-inserted"
2223
2224 $self->{content_model_flag} = 'CDATA';
2225 delete $self->{escape}; # MUST
2226
2227 my $text = '';
2228 !!!next-token;
2229 while ($token->{type} eq 'character') {
2230 $text .= $token->{data};
2231 !!!next-token;
2232 } # stop if non-character token or tokenizer stops tokenising
2233 if (length $text) {
2234 $script_el->manakai_append_text ($text);
2235 }
2236
2237 $self->{content_model_flag} = 'PCDATA';
2238
2239 if ($token->{type} eq 'end tag' and
2240 $token->{tag_name} eq 'script') {
2241 ## Ignore the token
2242 } else {
2243 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2244 ## ISSUE: And ignore?
2245 ## TODO: mark as "already executed"
2246 }
2247
2248 if (defined $self->{inner_html_node}) {
2249 ## TODO: mark as "already executed"
2250 } else {
2251 ## TODO: $old_insertion_point = current insertion point
2252 ## TODO: insertion point = just before the next input character
2253
2254 $insert->($script_el);
2255
2256 ## TODO: insertion point = $old_insertion_point (might be "undefined")
2257
2258 ## TODO: if there is a script that will execute as soon as the parser resume, then...
2259 }
2260
2261 !!!next-token;
2262 }; # $script_start_tag
2263
2264 my $formatting_end_tag = sub {
2265 my $tag_name = shift;
2266
2267 FET: {
2268 ## Step 1
2269 my $formatting_element;
2270 my $formatting_element_i_in_active;
2271 AFE: for (reverse 0..$#$active_formatting_elements) {
2272 if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2273 $formatting_element = $active_formatting_elements->[$_];
2274 $formatting_element_i_in_active = $_;
2275 last AFE;
2276 } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2277 last AFE;
2278 }
2279 } # AFE
2280 unless (defined $formatting_element) {
2281 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
2282 ## Ignore the token
2283 !!!next-token;
2284 return;
2285 }
2286 ## has an element in scope
2287 my $in_scope = 1;
2288 my $formatting_element_i_in_open;
2289 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2290 my $node = $self->{open_elements}->[$_];
2291 if ($node->[0] eq $formatting_element->[0]) {
2292 if ($in_scope) {
2293 $formatting_element_i_in_open = $_;
2294 last INSCOPE;
2295 } else { # in open elements but not in scope
2296 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2297 ## Ignore the token
2298 !!!next-token;
2299 return;
2300 }
2301 } elsif ({
2302 table => 1, caption => 1, td => 1, th => 1,
2303 button => 1, marquee => 1, object => 1, html => 1,
2304 }->{$node->[1]}) {
2305 $in_scope = 0;
2306 }
2307 } # INSCOPE
2308 unless (defined $formatting_element_i_in_open) {
2309 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2310 pop @$active_formatting_elements; # $formatting_element
2311 !!!next-token; ## TODO: ok?
2312 return;
2313 }
2314 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
2315 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2316 }
2317
2318 ## Step 2
2319 my $furthest_block;
2320 my $furthest_block_i_in_open;
2321 OE: for (reverse 0..$#{$self->{open_elements}}) {
2322 my $node = $self->{open_elements}->[$_];
2323 if (not $formatting_category->{$node->[1]} and
2324 #not $phrasing_category->{$node->[1]} and
2325 ($special_category->{$node->[1]} or
2326 $scoping_category->{$node->[1]})) {
2327 $furthest_block = $node;
2328 $furthest_block_i_in_open = $_;
2329 } elsif ($node->[0] eq $formatting_element->[0]) {
2330 last OE;
2331 }
2332 } # OE
2333
2334 ## Step 3
2335 unless (defined $furthest_block) { # MUST
2336 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
2337 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
2338 !!!next-token;
2339 return;
2340 }
2341
2342 ## Step 4
2343 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
2344
2345 ## Step 5
2346 my $furthest_block_parent = $furthest_block->[0]->parent_node;
2347 if (defined $furthest_block_parent) {
2348 $furthest_block_parent->remove_child ($furthest_block->[0]);
2349 }
2350
2351 ## Step 6
2352 my $bookmark_prev_el
2353 = $active_formatting_elements->[$formatting_element_i_in_active - 1]
2354 ->[0];
2355
2356 ## Step 7
2357 my $node = $furthest_block;
2358 my $node_i_in_open = $furthest_block_i_in_open;
2359 my $last_node = $furthest_block;
2360 S7: {
2361 ## Step 1
2362 $node_i_in_open--;
2363 $node = $self->{open_elements}->[$node_i_in_open];
2364
2365 ## Step 2
2366 my $node_i_in_active;
2367 S7S2: {
2368 for (reverse 0..$#$active_formatting_elements) {
2369 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2370 $node_i_in_active = $_;
2371 last S7S2;
2372 }
2373 }
2374 splice @{$self->{open_elements}}, $node_i_in_open, 1;
2375 redo S7;
2376 } # S7S2
2377
2378 ## Step 3
2379 last S7 if $node->[0] eq $formatting_element->[0];
2380
2381 ## Step 4
2382 if ($last_node->[0] eq $furthest_block->[0]) {
2383 $bookmark_prev_el = $node->[0];
2384 }
2385
2386 ## Step 5
2387 if ($node->[0]->has_child_nodes ()) {
2388 my $clone = [$node->[0]->clone_node (0), $node->[1]];
2389 $active_formatting_elements->[$node_i_in_active] = $clone;
2390 $self->{open_elements}->[$node_i_in_open] = $clone;
2391 $node = $clone;
2392 }
2393
2394 ## Step 6
2395 $node->[0]->append_child ($last_node->[0]);
2396
2397 ## Step 7
2398 $last_node = $node;
2399
2400 ## Step 8
2401 redo S7;
2402 } # S7
2403
2404 ## Step 8
2405 $common_ancestor_node->[0]->append_child ($last_node->[0]);
2406
2407 ## Step 9
2408 my $clone = [$formatting_element->[0]->clone_node (0),
2409 $formatting_element->[1]];
2410
2411 ## Step 10
2412 my @cn = @{$furthest_block->[0]->child_nodes};
2413 $clone->[0]->append_child ($_) for @cn;
2414
2415 ## Step 11
2416 $furthest_block->[0]->append_child ($clone->[0]);
2417
2418 ## Step 12
2419 my $i;
2420 AFE: for (reverse 0..$#$active_formatting_elements) {
2421 if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
2422 splice @$active_formatting_elements, $_, 1;
2423 $i-- and last AFE if defined $i;
2424 } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
2425 $i = $_;
2426 }
2427 } # AFE
2428 splice @$active_formatting_elements, $i + 1, 0, $clone;
2429
2430 ## Step 13
2431 undef $i;
2432 OE: for (reverse 0..$#{$self->{open_elements}}) {
2433 if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
2434 splice @{$self->{open_elements}}, $_, 1;
2435 $i-- and last OE if defined $i;
2436 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
2437 $i = $_;
2438 }
2439 } # OE
2440 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
2441
2442 ## Step 14
2443 redo FET;
2444 } # FET
2445 }; # $formatting_end_tag
2446
2447 my $insert_to_current = sub {
2448 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
2449 }; # $insert_to_current
2450
2451 my $insert_to_foster = sub {
2452 my $child = shift;
2453 if ({
2454 table => 1, tbody => 1, tfoot => 1,
2455 thead => 1, tr => 1,
2456 }->{$self->{open_elements}->[-1]->[1]}) {
2457 # MUST
2458 my $foster_parent_element;
2459 my $next_sibling;
2460 OE: for (reverse 0..$#{$self->{open_elements}}) {
2461 if ($self->{open_elements}->[$_]->[1] eq 'table') {
2462 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
2463 if (defined $parent and $parent->node_type == 1) {
2464 $foster_parent_element = $parent;
2465 $next_sibling = $self->{open_elements}->[$_]->[0];
2466 } else {
2467 $foster_parent_element
2468 = $self->{open_elements}->[$_ - 1]->[0];
2469 }
2470 last OE;
2471 }
2472 } # OE
2473 $foster_parent_element = $self->{open_elements}->[0]->[0]
2474 unless defined $foster_parent_element;
2475 $foster_parent_element->insert_before
2476 ($child, $next_sibling);
2477 } else {
2478 $self->{open_elements}->[-1]->[0]->append_child ($child);
2479 }
2480 }; # $insert_to_foster
2481
2482 my $in_body = sub {
2483 my $insert = shift;
2484 if ($token->{type} eq 'start tag') {
2485 if ($token->{tag_name} eq 'script') {
2486 ## NOTE: This is an "as if in head" code clone
2487 $script_start_tag->($insert);
2488 return;
2489 } elsif ($token->{tag_name} eq 'style') {
2490 ## NOTE: This is an "as if in head" code clone
2491 $parse_rcdata->('CDATA', $insert);
2492 return;
2493 } elsif ({
2494 base => 1, link => 1, meta => 1,
2495 }->{$token->{tag_name}}) {
2496 ## NOTE: This is an "as if in head" code clone, only "-t" differs
2497 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2498 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
2499 !!!next-token;
2500 ## TODO: Extracting |charset| from |meta|.
2501 return;
2502 } elsif ($token->{tag_name} eq 'title') {
2503 !!!parse-error (type => 'in body:title');
2504 ## NOTE: This is an "as if in head" code clone
2505 $parse_rcdata->('RCDATA', $insert);
2506 return;
2507 } elsif ($token->{tag_name} eq 'body') {
2508 !!!parse-error (type => 'in body:body');
2509
2510 if (@{$self->{open_elements}} == 1 or
2511 $self->{open_elements}->[1]->[1] ne 'body') {
2512 ## Ignore the token
2513 } else {
2514 my $body_el = $self->{open_elements}->[1]->[0];
2515 for my $attr_name (keys %{$token->{attributes}}) {
2516 unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2517 $body_el->set_attribute_ns
2518 (undef, [undef, $attr_name],
2519 $token->{attributes}->{$attr_name}->{value});
2520 }
2521 }
2522 }
2523 !!!next-token;
2524 return;
2525 } elsif ({
2526 address => 1, blockquote => 1, center => 1, dir => 1,
2527 div => 1, dl => 1, fieldset => 1, listing => 1,
2528 menu => 1, ol => 1, p => 1, ul => 1,
2529 pre => 1,
2530 }->{$token->{tag_name}}) {
2531 ## has a p element in scope
2532 INSCOPE: for (reverse @{$self->{open_elements}}) {
2533 if ($_->[1] eq 'p') {
2534 !!!back-token;
2535 $token = {type => 'end tag', tag_name => 'p'};
2536 return;
2537 } elsif ({
2538 table => 1, caption => 1, td => 1, th => 1,
2539 button => 1, marquee => 1, object => 1, html => 1,
2540 }->{$_->[1]}) {
2541 last INSCOPE;
2542 }
2543 } # INSCOPE
2544
2545 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2546 if ($token->{tag_name} eq 'pre') {
2547 !!!next-token;
2548 if ($token->{type} eq 'character') {
2549 $token->{data} =~ s/^\x0A//;
2550 unless (length $token->{data}) {
2551 !!!next-token;
2552 }
2553 }
2554 } else {
2555 !!!next-token;
2556 }
2557 return;
2558 } elsif ($token->{tag_name} eq 'form') {
2559 if (defined $self->{form_element}) {
2560 !!!parse-error (type => 'in form:form');
2561 ## Ignore the token
2562 !!!next-token;
2563 return;
2564 } else {
2565 ## has a p element in scope
2566 INSCOPE: for (reverse @{$self->{open_elements}}) {
2567 if ($_->[1] eq 'p') {
2568 !!!back-token;
2569 $token = {type => 'end tag', tag_name => 'p'};
2570 return;
2571 } elsif ({
2572 table => 1, caption => 1, td => 1, th => 1,
2573 button => 1, marquee => 1, object => 1, html => 1,
2574 }->{$_->[1]}) {
2575 last INSCOPE;
2576 }
2577 } # INSCOPE
2578
2579 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2580 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2581 !!!next-token;
2582 return;
2583 }
2584 } elsif ($token->{tag_name} eq 'li') {
2585 ## has a p element in scope
2586 INSCOPE: for (reverse @{$self->{open_elements}}) {
2587 if ($_->[1] eq 'p') {
2588 !!!back-token;
2589 $token = {type => 'end tag', tag_name => 'p'};
2590 return;
2591 } elsif ({
2592 table => 1, caption => 1, td => 1, th => 1,
2593 button => 1, marquee => 1, object => 1, html => 1,
2594 }->{$_->[1]}) {
2595 last INSCOPE;
2596 }
2597 } # INSCOPE
2598
2599 ## Step 1
2600 my $i = -1;
2601 my $node = $self->{open_elements}->[$i];
2602 LI: {
2603 ## Step 2
2604 if ($node->[1] eq 'li') {
2605 if ($i != -1) {
2606 !!!parse-error (type => 'end tag missing:'.
2607 $self->{open_elements}->[-1]->[1]);
2608 }
2609 splice @{$self->{open_elements}}, $i;
2610 last LI;
2611 }
2612
2613 ## Step 3
2614 if (not $formatting_category->{$node->[1]} and
2615 #not $phrasing_category->{$node->[1]} and
2616 ($special_category->{$node->[1]} or
2617 $scoping_category->{$node->[1]}) and
2618 $node->[1] ne 'address' and $node->[1] ne 'div') {
2619 last LI;
2620 }
2621
2622 ## Step 4
2623 $i--;
2624 $node = $self->{open_elements}->[$i];
2625 redo LI;
2626 } # LI
2627
2628 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2629 !!!next-token;
2630 return;
2631 } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2632 ## has a p element in scope
2633 INSCOPE: for (reverse @{$self->{open_elements}}) {
2634 if ($_->[1] eq 'p') {
2635 !!!back-token;
2636 $token = {type => 'end tag', tag_name => 'p'};
2637 return;
2638 } elsif ({
2639 table => 1, caption => 1, td => 1, th => 1,
2640 button => 1, marquee => 1, object => 1, html => 1,
2641 }->{$_->[1]}) {
2642 last INSCOPE;
2643 }
2644 } # INSCOPE
2645
2646 ## Step 1
2647 my $i = -1;
2648 my $node = $self->{open_elements}->[$i];
2649 LI: {
2650 ## Step 2
2651 if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2652 if ($i != -1) {
2653 !!!parse-error (type => 'end tag missing:'.
2654 $self->{open_elements}->[-1]->[1]);
2655 }
2656 splice @{$self->{open_elements}}, $i;
2657 last LI;
2658 }
2659
2660 ## Step 3
2661 if (not $formatting_category->{$node->[1]} and
2662 #not $phrasing_category->{$node->[1]} and
2663 ($special_category->{$node->[1]} or
2664 $scoping_category->{$node->[1]}) and
2665 $node->[1] ne 'address' and $node->[1] ne 'div') {
2666 last LI;
2667 }
2668
2669 ## Step 4
2670 $i--;
2671 $node = $self->{open_elements}->[$i];
2672 redo LI;
2673 } # LI
2674
2675 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2676 !!!next-token;
2677 return;
2678 } elsif ($token->{tag_name} eq 'plaintext') {
2679 ## has a p element in scope
2680 INSCOPE: for (reverse @{$self->{open_elements}}) {
2681 if ($_->[1] eq 'p') {
2682 !!!back-token;
2683 $token = {type => 'end tag', tag_name => 'p'};
2684 return;
2685 } elsif ({
2686 table => 1, caption => 1, td => 1, th => 1,
2687 button => 1, marquee => 1, object => 1, html => 1,
2688 }->{$_->[1]}) {
2689 last INSCOPE;
2690 }
2691 } # INSCOPE
2692
2693 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2694
2695 $self->{content_model_flag} = 'PLAINTEXT';
2696
2697 !!!next-token;
2698 return;
2699 } elsif ({
2700 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2701 }->{$token->{tag_name}}) {
2702 ## has a p element in scope
2703 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2704 my $node = $self->{open_elements}->[$_];
2705 if ($node->[1] eq 'p') {
2706 !!!back-token;
2707 $token = {type => 'end tag', tag_name => 'p'};
2708 return;
2709 } elsif ({
2710 table => 1, caption => 1, td => 1, th => 1,
2711 button => 1, marquee => 1, object => 1, html => 1,
2712 }->{$node->[1]}) {
2713 last INSCOPE;
2714 }
2715 } # INSCOPE
2716
2717 ## NOTE: See <http://html5.org/tools/web-apps-tracker?from=925&to=926>
2718 ## has an element in scope
2719 #my $i;
2720 #INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2721 # my $node = $self->{open_elements}->[$_];
2722 # if ({
2723 # h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2724 # }->{$node->[1]}) {
2725 # $i = $_;
2726 # last INSCOPE;
2727 # } elsif ({
2728 # table => 1, caption => 1, td => 1, th => 1,
2729 # button => 1, marquee => 1, object => 1, html => 1,
2730 # }->{$node->[1]}) {
2731 # last INSCOPE;
2732 # }
2733 #} # INSCOPE
2734 #
2735 #if (defined $i) {
2736 # !!! parse-error (type => 'in hn:hn');
2737 # splice @{$self->{open_elements}}, $i;
2738 #}
2739
2740 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2741
2742 !!!next-token;
2743 return;
2744 } elsif ($token->{tag_name} eq 'a') {
2745 AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2746 my $node = $active_formatting_elements->[$i];
2747 if ($node->[1] eq 'a') {
2748 !!!parse-error (type => 'in a:a');
2749
2750 !!!back-token;
2751 $token = {type => 'end tag', tag_name => 'a'};
2752 $formatting_end_tag->($token->{tag_name});
2753
2754 AFE2: for (reverse 0..$#$active_formatting_elements) {
2755 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2756 splice @$active_formatting_elements, $_, 1;
2757 last AFE2;
2758 }
2759 } # AFE2
2760 OE: for (reverse 0..$#{$self->{open_elements}}) {
2761 if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2762 splice @{$self->{open_elements}}, $_, 1;
2763 last OE;
2764 }
2765 } # OE
2766 last AFE;
2767 } elsif ($node->[0] eq '#marker') {
2768 last AFE;
2769 }
2770 } # AFE
2771
2772 $reconstruct_active_formatting_elements->($insert_to_current);
2773
2774 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2775 push @$active_formatting_elements, $self->{open_elements}->[-1];
2776
2777 !!!next-token;
2778 return;
2779 } elsif ({
2780 b => 1, big => 1, em => 1, font => 1, i => 1,
2781 s => 1, small => 1, strile => 1,
2782 strong => 1, tt => 1, u => 1,
2783 }->{$token->{tag_name}}) {
2784 $reconstruct_active_formatting_elements->($insert_to_current);
2785
2786 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2787 push @$active_formatting_elements, $self->{open_elements}->[-1];
2788
2789 !!!next-token;
2790 return;
2791 } elsif ($token->{tag_name} eq 'nobr') {
2792 $reconstruct_active_formatting_elements->($insert_to_current);
2793
2794 ## has a |nobr| element in scope
2795 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2796 my $node = $self->{open_elements}->[$_];
2797 if ($node->[1] eq 'nobr') {
2798 !!!back-token;
2799 $token = {type => 'end tag', tag_name => 'nobr'};
2800 return;
2801 } elsif ({
2802 table => 1, caption => 1, td => 1, th => 1,
2803 button => 1, marquee => 1, object => 1, html => 1,
2804 }->{$node->[1]}) {
2805 last INSCOPE;
2806 }
2807 } # INSCOPE
2808
2809 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2810 push @$active_formatting_elements, $self->{open_elements}->[-1];
2811
2812 !!!next-token;
2813 return;
2814 } elsif ($token->{tag_name} eq 'button') {
2815 ## has a button element in scope
2816 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2817 my $node = $self->{open_elements}->[$_];
2818 if ($node->[1] eq 'button') {
2819 !!!parse-error (type => 'in button:button');
2820 !!!back-token;
2821 $token = {type => 'end tag', tag_name => 'button'};
2822 return;
2823 } elsif ({
2824 table => 1, caption => 1, td => 1, th => 1,
2825 button => 1, marquee => 1, object => 1, html => 1,
2826 }->{$node->[1]}) {
2827 last INSCOPE;
2828 }
2829 } # INSCOPE
2830
2831 $reconstruct_active_formatting_elements->($insert_to_current);
2832
2833 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2834 push @$active_formatting_elements, ['#marker', ''];
2835
2836 !!!next-token;
2837 return;
2838 } elsif ($token->{tag_name} eq 'marquee' or
2839 $token->{tag_name} eq 'object') {
2840 $reconstruct_active_formatting_elements->($insert_to_current);
2841
2842 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2843 push @$active_formatting_elements, ['#marker', ''];
2844
2845 !!!next-token;
2846 return;
2847 } elsif ($token->{tag_name} eq 'xmp') {
2848 $reconstruct_active_formatting_elements->($insert_to_current);
2849 $parse_rcdata->('CDATA', $insert);
2850 return;
2851 } elsif ($token->{tag_name} eq 'table') {
2852 ## has a p element in scope
2853 INSCOPE: for (reverse @{$self->{open_elements}}) {
2854 if ($_->[1] eq 'p') {
2855 !!!back-token;
2856 $token = {type => 'end tag', tag_name => 'p'};
2857 return;
2858 } elsif ({
2859 table => 1, caption => 1, td => 1, th => 1,
2860 button => 1, marquee => 1, object => 1, html => 1,
2861 }->{$_->[1]}) {
2862 last INSCOPE;
2863 }
2864 } # INSCOPE
2865
2866 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2867
2868 $self->{insertion_mode} = 'in table';
2869
2870 !!!next-token;
2871 return;
2872 } elsif ({
2873 area => 1, basefont => 1, bgsound => 1, br => 1,
2874 embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2875 image => 1,
2876 }->{$token->{tag_name}}) {
2877 if ($token->{tag_name} eq 'image') {
2878 !!!parse-error (type => 'image');
2879 $token->{tag_name} = 'img';
2880 }
2881
2882 $reconstruct_active_formatting_elements->($insert_to_current);
2883
2884 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2885 pop @{$self->{open_elements}};
2886
2887 !!!next-token;
2888 return;
2889 } elsif ($token->{tag_name} eq 'hr') {
2890 ## has a p element in scope
2891 INSCOPE: for (reverse @{$self->{open_elements}}) {
2892 if ($_->[1] eq 'p') {
2893 !!!back-token;
2894 $token = {type => 'end tag', tag_name => 'p'};
2895 return;
2896 } elsif ({
2897 table => 1, caption => 1, td => 1, th => 1,
2898 button => 1, marquee => 1, object => 1, html => 1,
2899 }->{$_->[1]}) {
2900 last INSCOPE;
2901 }
2902 } # INSCOPE
2903
2904 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2905 pop @{$self->{open_elements}};
2906
2907 !!!next-token;
2908 return;
2909 } elsif ($token->{tag_name} eq 'input') {
2910 $reconstruct_active_formatting_elements->($insert_to_current);
2911
2912 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2913 ## TODO: associate with $self->{form_element} if defined
2914 pop @{$self->{open_elements}};
2915
2916 !!!next-token;
2917 return;
2918 } elsif ($token->{tag_name} eq 'isindex') {
2919 !!!parse-error (type => 'isindex');
2920
2921 if (defined $self->{form_element}) {
2922 ## Ignore the token
2923 !!!next-token;
2924 return;
2925 } else {
2926 my $at = $token->{attributes};
2927 my $form_attrs;
2928 $form_attrs->{action} = $at->{action} if $at->{action};
2929 my $prompt_attr = $at->{prompt};
2930 $at->{name} = {name => 'name', value => 'isindex'};
2931 delete $at->{action};
2932 delete $at->{prompt};
2933 my @tokens = (
2934 {type => 'start tag', tag_name => 'form',
2935 attributes => $form_attrs},
2936 {type => 'start tag', tag_name => 'hr'},
2937 {type => 'start tag', tag_name => 'p'},
2938 {type => 'start tag', tag_name => 'label'},
2939 );
2940 if ($prompt_attr) {
2941 push @tokens, {type => 'character', data => $prompt_attr->{value}};
2942 } else {
2943 push @tokens, {type => 'character',
2944 data => 'This is a searchable index. Insert your search keywords here: '}; # SHOULD
2945 ## TODO: make this configurable
2946 }
2947 push @tokens,
2948 {type => 'start tag', tag_name => 'input', attributes => $at},
2949 #{type => 'character', data => ''}, # SHOULD
2950 {type => 'end tag', tag_name => 'label'},
2951 {type => 'end tag', tag_name => 'p'},
2952 {type => 'start tag', tag_name => 'hr'},
2953 {type => 'end tag', tag_name => 'form'};
2954 $token = shift @tokens;
2955 !!!back-token (@tokens);
2956 return;
2957 }
2958 } elsif ($token->{tag_name} eq 'textarea') {
2959 my $tag_name = $token->{tag_name};
2960 my $el;
2961 !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2962
2963 ## TODO: $self->{form_element} if defined
2964 $self->{content_model_flag} = 'RCDATA';
2965 delete $self->{escape}; # MUST
2966
2967 $insert->($el);
2968
2969 my $text = '';
2970 !!!next-token;
2971 if ($token->{type} eq 'character') {
2972 $token->{data} =~ s/^\x0A//;
2973 unless (length $token->{data}) {
2974 !!!next-token;
2975 }
2976 }
2977 while ($token->{type} eq 'character') {
2978 $text .= $token->{data};
2979 !!!next-token;
2980 }
2981 if (length $text) {
2982 $el->manakai_append_text ($text);
2983 }
2984
2985 $self->{content_model_flag} = 'PCDATA';
2986
2987 if ($token->{type} eq 'end tag' and
2988 $token->{tag_name} eq $tag_name) {
2989 ## Ignore the token
2990 } else {
2991 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2992 }
2993 !!!next-token;
2994 return;
2995 } elsif ({
2996 iframe => 1,
2997 noembed => 1,
2998 noframes => 1,
2999 noscript => 0, ## TODO: 1 if scripting is enabled
3000 }->{$token->{tag_name}}) {
3001 $parse_rcdata->('CDATA', $insert);
3002 return;
3003 } elsif ($token->{tag_name} eq 'select') {
3004 $reconstruct_active_formatting_elements->($insert_to_current);
3005
3006 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
3007
3008 $self->{insertion_mode} = 'in select';
3009 !!!next-token;
3010 return;
3011 } elsif ({
3012 caption => 1, col => 1, colgroup => 1, frame => 1,
3013 frameset => 1, head => 1, option => 1, optgroup => 1,
3014 tbody => 1, td => 1, tfoot => 1, th => 1,
3015 thead => 1, tr => 1,
3016 }->{$token->{tag_name}}) {
3017 !!!parse-error (type => 'in body:'.$token->{tag_name});
3018 ## Ignore the token
3019 !!!next-token;
3020 return;
3021
3022 ## ISSUE: An issue on HTML5 new elements in the spec.
3023 } else {
3024 $reconstruct_active_formatting_elements->($insert_to_current);
3025
3026 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
3027
3028 !!!next-token;
3029 return;
3030 }
3031 } elsif ($token->{type} eq 'end tag') {
3032 if ($token->{tag_name} eq 'body') {
3033 if (@{$self->{open_elements}} > 1 and
3034 $self->{open_elements}->[1]->[1] eq 'body') {
3035 for (@{$self->{open_elements}}) {
3036 unless ({
3037 dd => 1, dt => 1, li => 1, p => 1, td => 1,
3038 th => 1, tr => 1, body => 1, html => 1,
3039 }->{$_->[1]}) {
3040 !!!parse-error (type => 'not closed:'.$_->[1]);
3041 }
3042 }
3043
3044 $self->{insertion_mode} = 'after body';
3045 !!!next-token;
3046 return;
3047 } else {
3048 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3049 ## Ignore the token
3050 !!!next-token;
3051 return;
3052 }
3053 } elsif ($token->{tag_name} eq 'html') {
3054 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
3055 ## ISSUE: There is an issue in the spec.
3056 if ($self->{open_elements}->[-1]->[1] ne 'body') {
3057 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
3058 }
3059 $self->{insertion_mode} = 'after body';
3060 ## reprocess
3061 return;
3062 } else {
3063 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3064 ## Ignore the token
3065 !!!next-token;
3066 return;
3067 }
3068 } elsif ({
3069 address => 1, blockquote => 1, center => 1, dir => 1,
3070 div => 1, dl => 1, fieldset => 1, listing => 1,
3071 menu => 1, ol => 1, pre => 1, ul => 1,
3072 p => 1,
3073 dd => 1, dt => 1, li => 1,
3074 button => 1, marquee => 1, object => 1,
3075 }->{$token->{tag_name}}) {
3076 ## has an element in scope
3077 my $i;
3078 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3079 my $node = $self->{open_elements}->[$_];
3080 if ($node->[1] eq $token->{tag_name}) {
3081 ## generate implied end tags
3082 if ({
3083 dd => ($token->{tag_name} ne 'dd'),
3084 dt => ($token->{tag_name} ne 'dt'),
3085 li => ($token->{tag_name} ne 'li'),
3086 p => ($token->{tag_name} ne 'p'),
3087 td => 1, th => 1, tr => 1,
3088 }->{$self->{open_elements}->[-1]->[1]}) {
3089 !!!back-token;
3090 $token = {type => 'end tag',
3091 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3092 return;
3093 }
3094 $i = $_;
3095 last INSCOPE unless $token->{tag_name} eq 'p';
3096 } elsif ({
3097 table => 1, caption => 1, td => 1, th => 1,
3098 button => 1, marquee => 1, object => 1, html => 1,
3099 }->{$node->[1]}) {
3100 last INSCOPE;
3101 }
3102 } # INSCOPE
3103
3104 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
3105 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3106 }
3107
3108 splice @{$self->{open_elements}}, $i if defined $i;
3109 $clear_up_to_marker->()
3110 if {
3111 button => 1, marquee => 1, object => 1,
3112 }->{$token->{tag_name}};
3113 !!!next-token;
3114 return;
3115 } elsif ($token->{tag_name} eq 'form') {
3116 ## has an element in scope
3117 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3118 my $node = $self->{open_elements}->[$_];
3119 if ($node->[1] eq $token->{tag_name}) {
3120 ## generate implied end tags
3121 if ({
3122 dd => 1, dt => 1, li => 1, p => 1,
3123 td => 1, th => 1, tr => 1,
3124 }->{$self->{open_elements}->[-1]->[1]}) {
3125 !!!back-token;
3126 $token = {type => 'end tag',
3127 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3128 return;
3129 }
3130 last INSCOPE;
3131 } elsif ({
3132 table => 1, caption => 1, td => 1, th => 1,
3133 button => 1, marquee => 1, object => 1, html => 1,
3134 }->{$node->[1]}) {
3135 last INSCOPE;
3136 }
3137 } # INSCOPE
3138
3139 if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
3140 pop @{$self->{open_elements}};
3141 } else {
3142 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3143 }
3144
3145 undef $self->{form_element};
3146 !!!next-token;
3147 return;
3148 } elsif ({
3149 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3150 }->{$token->{tag_name}}) {
3151 ## has an element in scope
3152 my $i;
3153 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3154 my $node = $self->{open_elements}->[$_];
3155 if ({
3156 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3157 }->{$node->[1]}) {
3158 ## generate implied end tags
3159 if ({
3160 dd => 1, dt => 1, li => 1, p => 1,
3161 td => 1, th => 1, tr => 1,
3162 }->{$self->{open_elements}->[-1]->[1]}) {
3163 !!!back-token;
3164 $token = {type => 'end tag',
3165 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3166 return;
3167 }
3168 $i = $_;
3169 last INSCOPE;
3170 } elsif ({
3171 table => 1, caption => 1, td => 1, th => 1,
3172 button => 1, marquee => 1, object => 1, html => 1,
3173 }->{$node->[1]}) {
3174 last INSCOPE;
3175 }
3176 } # INSCOPE
3177
3178 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
3179 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3180 }
3181
3182 splice @{$self->{open_elements}}, $i if defined $i;
3183 !!!next-token;
3184 return;
3185 } elsif ({
3186 a => 1,
3187 b => 1, big => 1, em => 1, font => 1, i => 1,
3188 nobr => 1, s => 1, small => 1, strile => 1,
3189 strong => 1, tt => 1, u => 1,
3190 }->{$token->{tag_name}}) {
3191 $formatting_end_tag->($token->{tag_name});
3192 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
3193 return;
3194 } elsif ({
3195 caption => 1, col => 1, colgroup => 1, frame => 1,
3196 frameset => 1, head => 1, option => 1, optgroup => 1,
3197 tbody => 1, td => 1, tfoot => 1, th => 1,
3198 thead => 1, tr => 1,
3199 area => 1, basefont => 1, bgsound => 1, br => 1,
3200 embed => 1, hr => 1, iframe => 1, image => 1,
3201 img => 1, input => 1, isindex => 1, noembed => 1,
3202 noframes => 1, param => 1, select => 1, spacer => 1,
3203 table => 1, textarea => 1, wbr => 1,
3204 noscript => 0, ## TODO: if scripting is enabled
3205 }->{$token->{tag_name}}) {
3206 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3207 ## Ignore the token
3208 !!!next-token;
3209 return;
3210
3211 ## ISSUE: Issue on HTML5 new elements in spec
3212
3213 } else {
3214 ## Step 1
3215 my $node_i = -1;
3216 my $node = $self->{open_elements}->[$node_i];
3217
3218 ## Step 2
3219 S2: {
3220 if ($node->[1] eq $token->{tag_name}) {
3221 ## Step 1
3222 ## generate implied end tags
3223 if ({
3224 dd => 1, dt => 1, li => 1, p => 1,
3225 td => 1, th => 1, tr => 1,
3226 }->{$self->{open_elements}->[-1]->[1]}) {
3227 !!!back-token;
3228 $token = {type => 'end tag',
3229 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3230 return;
3231 }
3232
3233 ## Step 2
3234 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
3235 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3236 }
3237
3238 ## Step 3
3239 splice @{$self->{open_elements}}, $node_i;
3240
3241 !!!next-token;
3242 last S2;
3243 } else {
3244 ## Step 3
3245 if (not $formatting_category->{$node->[1]} and
3246 #not $phrasing_category->{$node->[1]} and
3247 ($special_category->{$node->[1]} or
3248 $scoping_category->{$node->[1]})) {
3249 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3250 ## Ignore the token
3251 !!!next-token;
3252 last S2;
3253 }
3254 }
3255
3256 ## Step 4
3257 $node_i--;
3258 $node = $self->{open_elements}->[$node_i];
3259
3260 ## Step 5;
3261 redo S2;
3262 } # S2
3263 return;
3264 }
3265 }
3266 }; # $in_body
3267
3268 B: {
3269 if ($phase eq 'main') {
3270 if ($token->{type} eq 'DOCTYPE') {
3271 !!!parse-error (type => 'in html:#DOCTYPE');
3272 ## Ignore the token
3273 ## Stay in the phase
3274 !!!next-token;
3275 redo B;
3276 } elsif ($token->{type} eq 'start tag' and
3277 $token->{tag_name} eq 'html') {
3278 ## ISSUE: "aa<html>" is not a parse error.
3279 ## ISSUE: "<html>" in fragment is not a parse error.
3280 unless ($token->{first_start_tag}) {
3281 !!!parse-error (type => 'not first start tag');
3282 }
3283 my $top_el = $self->{open_elements}->[0]->[0];
3284 for my $attr_name (keys %{$token->{attributes}}) {
3285 unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3286 $top_el->set_attribute_ns
3287 (undef, [undef, $attr_name],
3288 $token->{attributes}->{$attr_name}->{value});
3289 }
3290 }
3291 !!!next-token;
3292 redo B;
3293 } elsif ($token->{type} eq 'end-of-file') {
3294 ## Generate implied end tags
3295 if ({
3296 dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
3297 }->{$self->{open_elements}->[-1]->[1]}) {
3298 !!!back-token;
3299 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
3300 redo B;
3301 }
3302
3303 if (@{$self->{open_elements}} > 2 or
3304 (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
3305 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3306 } elsif (defined $self->{inner_html_node} and
3307 @{$self->{open_elements}} > 1 and
3308 $self->{open_elements}->[1]->[1] ne 'body') {
3309 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3310 }
3311
3312 ## Stop parsing
3313 last B;
3314
3315 ## ISSUE: There is an issue in the spec.
3316 } else {
3317 if ($self->{insertion_mode} eq 'before head') {
3318 if ($token->{type} eq 'character') {
3319 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3320 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3321 unless (length $token->{data}) {
3322 !!!next-token;
3323 redo B;
3324 }
3325 }
3326 ## As if <head>
3327 !!!create-element ($self->{head_element}, 'head');
3328 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3329 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3330 $self->{insertion_mode} = 'in head';
3331 ## reprocess
3332 redo B;
3333 } elsif ($token->{type} eq 'comment') {
3334 my $comment = $self->{document}->create_comment ($token->{data});
3335 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3336 !!!next-token;
3337 redo B;
3338 } elsif ($token->{type} eq 'start tag') {
3339 my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
3340 !!!create-element ($self->{head_element}, 'head', $attr);
3341 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3342 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3343 $self->{insertion_mode} = 'in head';
3344 if ($token->{tag_name} eq 'head') {
3345 !!!next-token;
3346 #} elsif ({
3347 # base => 1, link => 1, meta => 1,
3348 # script => 1, style => 1, title => 1,
3349 # }->{$token->{tag_name}}) {
3350 # ## reprocess
3351 } else {
3352 ## reprocess
3353 }
3354 redo B;
3355 } elsif ($token->{type} eq 'end tag') {
3356 if ({head => 1, body => 1, html => 1}->{$token->{tag_name}}) {
3357 ## As if <head>
3358 !!!create-element ($self->{head_element}, 'head');
3359 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3360 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3361 $self->{insertion_mode} = 'in head';
3362 ## reprocess
3363 redo B;
3364 } else {
3365 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3366 ## Ignore the token ## ISSUE: An issue in the spec.
3367 !!!next-token;
3368 redo B;
3369 }
3370 } else {
3371 die "$0: $token->{type}: Unknown type";
3372 }
3373 } elsif ($self->{insertion_mode} eq 'in head' or
3374 $self->{insertion_mode} eq 'in head noscript' or
3375 $self->{insertion_mode} eq 'after head') {
3376 if ($token->{type} eq 'character') {
3377 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3378 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3379 unless (length $token->{data}) {
3380 !!!next-token;
3381 redo B;
3382 }
3383 }
3384
3385 #
3386 } elsif ($token->{type} eq 'comment') {
3387 my $comment = $self->{document}->create_comment ($token->{data});
3388 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3389 !!!next-token;
3390 redo B;
3391 } elsif ($token->{type} eq 'start tag') {
3392 if ({base => ($self->{insertion_mode} eq 'in head' or
3393 $self->{insertion_mode} eq 'after head'),
3394 link => 1, meta => 1}->{$token->{tag_name}}) {
3395 ## NOTE: There is a "as if in head" code clone.
3396 if ($self->{insertion_mode} eq 'after head') {
3397 !!!parse-error (type => 'after head:'.$token->{tag_name});
3398 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3399 }
3400 !!!insert-element ($token->{tag_name}, $token->{attributes});
3401 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3402 ## TODO: Extracting |charset| from |meta|.
3403 pop @{$self->{open_elements}}
3404 if $self->{insertion_mode} eq 'after head';
3405 !!!next-token;
3406 redo B;
3407 } elsif ($token->{tag_name} eq 'title' and
3408 $self->{insertion_mode} eq 'in head') {
3409 ## NOTE: There is a "as if in head" code clone.
3410 if ($self->{insertion_mode} eq 'after head') {
3411 !!!parse-error (type => 'after head:'.$token->{tag_name});
3412 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3413 }
3414 $parse_rcdata->('RCDATA', $insert_to_current);
3415 pop @{$self->{open_elements}}
3416 if $self->{insertion_mode} eq 'after head';
3417 redo B;
3418 } elsif ($token->{tag_name} eq 'style') {
3419 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
3420 ## insertion mode 'in head')
3421 ## NOTE: There is a "as if in head" code clone.
3422 if ($self->{insertion_mode} eq 'after head') {
3423 !!!parse-error (type => 'after head:'.$token->{tag_name});
3424 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3425 }
3426 $parse_rcdata->('CDATA', $insert_to_current);
3427 pop @{$self->{open_elements}}
3428 if $self->{insertion_mode} eq 'after head';
3429 redo B;
3430 } elsif ($token->{tag_name} eq 'noscript') {
3431 if ($self->{insertion_mode} eq 'in head') {
3432 ## NOTE: and scripting is disalbed
3433 !!!insert-element ($token->{tag_name}, $token->{attributes});
3434 $self->{insertion_mode} = 'in head noscript';
3435 !!!next-token;
3436 redo B;
3437 } elsif ($self->{insertion_mode} eq 'in head noscript') {
3438 !!!parse-error (type => 'in noscript:noscript');
3439 ## Ignore the token
3440 redo B;
3441 } else {
3442 #
3443 }
3444 } elsif ($token->{tag_name} eq 'head' and
3445 $self->{insertion_mode} ne 'after head') {
3446 !!!parse-error (type => 'in head:head'); # or in head noscript
3447 ## Ignore the token
3448 !!!next-token;
3449 redo B;
3450 } elsif ($self->{insertion_mode} ne 'in head noscript' and
3451 $token->{tag_name} eq 'script') {
3452 if ($self->{insertion_mode} eq 'after head') {
3453 !!!parse-error (type => 'after head:'.$token->{tag_name});
3454 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3455 }
3456 ## NOTE: There is a "as if in head" code clone.
3457 $script_start_tag->($insert_to_current);
3458 pop @{$self->{open_elements}}
3459 if $self->{insertion_mode} eq 'after head';
3460 redo B;
3461 } elsif ($self->{insertion_mode} eq 'after head' and
3462 $token->{tag_name} eq 'body') {
3463 !!!insert-element ('body', $token->{attributes});
3464 $self->{insertion_mode} = 'in body';
3465 !!!next-token;
3466 redo B;
3467 } elsif ($self->{insertion_mode} eq 'after head' and
3468 $token->{tag_name} eq 'frameset') {
3469 !!!insert-element ('frameset', $token->{attributes});
3470 $self->{insertion_mode} = 'in frameset';
3471 !!!next-token;
3472 redo B;
3473 } else {
3474 #
3475 }
3476 } elsif ($token->{type} eq 'end tag') {
3477 if ($self->{insertion_mode} eq 'in head' and
3478 $token->{tag_name} eq 'head') {
3479 pop @{$self->{open_elements}};
3480 $self->{insertion_mode} = 'after head';
3481 !!!next-token;
3482 redo B;
3483 } elsif ($self->{insertion_mode} eq 'in head noscript' and
3484 $token->{tag_name} eq 'noscript') {
3485 pop @{$self->{open_elements}};
3486 $self->{insertion_mode} = 'in head';
3487 !!!next-token;
3488 redo B;
3489 } elsif ($self->{insertion_mode} eq 'in head' and
3490 ($token->{tag_name} eq 'body' or
3491 $token->{tag_name} eq 'html')) {
3492 #
3493 } elsif ($self->{insertion_mode} ne 'after head') {
3494 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3495 ## Ignore the token
3496 !!!next-token;
3497 redo B;
3498 } else {
3499 #
3500 }
3501 } else {
3502 #
3503 }
3504
3505 ## As if </head> or </noscript> or <body>
3506 if ($self->{insertion_mode} eq 'in head') {
3507 pop @{$self->{open_elements}};
3508 $self->{insertion_mode} = 'after head';
3509 } elsif ($self->{insertion_mode} eq 'in head noscript') {
3510 pop @{$self->{open_elements}};
3511 !!!parse-error (type => 'in noscript:'.(defined $token->{tag_name} ? ($token->{type} eq 'end tag' ? '/' : '') . $token->{tag_name} : '#' . $token->{type}));
3512 $self->{insertion_mode} = 'in head';
3513 } else { # 'after head'
3514 !!!insert-element ('body');
3515 $self->{insertion_mode} = 'in body';
3516 }
3517 ## reprocess
3518 redo B;
3519
3520 ## ISSUE: An issue in the spec.
3521 } elsif ($self->{insertion_mode} eq 'in body') {
3522 if ($token->{type} eq 'character') {
3523 ## NOTE: There is a code clone of "character in body".
3524 $reconstruct_active_formatting_elements->($insert_to_current);
3525
3526 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3527
3528 !!!next-token;
3529 redo B;
3530 } elsif ($token->{type} eq 'comment') {
3531 ## NOTE: There is a code clone of "comment in body".
3532 my $comment = $self->{document}->create_comment ($token->{data});
3533 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3534 !!!next-token;
3535 redo B;
3536 } else {
3537 $in_body->($insert_to_current);
3538 redo B;
3539 }
3540 } elsif ($self->{insertion_mode} eq 'in table') {
3541 if ($token->{type} eq 'character') {
3542 ## NOTE: There are "character in table" code clones.
3543 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3544 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3545
3546 unless (length $token->{data}) {
3547 !!!next-token;
3548 redo B;
3549 }
3550 }
3551
3552 !!!parse-error (type => 'in table:#character');
3553
3554 ## As if in body, but insert into foster parent element
3555 ## ISSUE: Spec says that "whenever a node would be inserted
3556 ## into the current node" while characters might not be
3557 ## result in a new Text node.
3558 $reconstruct_active_formatting_elements->($insert_to_foster);
3559
3560 if ({
3561 table => 1, tbody => 1, tfoot => 1,
3562 thead => 1, tr => 1,
3563 }->{$self->{open_elements}->[-1]->[1]}) {
3564 # MUST
3565 my $foster_parent_element;
3566 my $next_sibling;
3567 my $prev_sibling;
3568 OE: for (reverse 0..$#{$self->{open_elements}}) {
3569 if ($self->{open_elements}->[$_]->[1] eq 'table') {
3570 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3571 if (defined $parent and $parent->node_type == 1) {
3572 $foster_parent_element = $parent;
3573 $next_sibling = $self->{open_elements}->[$_]->[0];
3574 $prev_sibling = $next_sibling->previous_sibling;
3575 } else {
3576 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3577 $prev_sibling = $foster_parent_element->last_child;
3578 }
3579 last OE;
3580 }
3581 } # OE
3582 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3583 $prev_sibling = $foster_parent_element->last_child
3584 unless defined $foster_parent_element;
3585 if (defined $prev_sibling and
3586 $prev_sibling->node_type == 3) {
3587 $prev_sibling->manakai_append_text ($token->{data});
3588 } else {
3589 $foster_parent_element->insert_before
3590 ($self->{document}->create_text_node ($token->{data}),
3591 $next_sibling);
3592 }
3593 } else {
3594 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3595 }
3596
3597 !!!next-token;
3598 redo B;
3599 } elsif ($token->{type} eq 'comment') {
3600 my $comment = $self->{document}->create_comment ($token->{data});
3601 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3602 !!!next-token;
3603 redo B;
3604 } elsif ($token->{type} eq 'start tag') {
3605 if ({
3606 caption => 1,
3607 colgroup => 1,
3608 tbody => 1, tfoot => 1, thead => 1,
3609 }->{$token->{tag_name}}) {
3610 ## Clear back to table context
3611 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3612 $self->{open_elements}->[-1]->[1] ne 'html') {
3613 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3614 pop @{$self->{open_elements}};
3615 }
3616
3617 push @$active_formatting_elements, ['#marker', '']
3618 if $token->{tag_name} eq 'caption';
3619
3620 !!!insert-element ($token->{tag_name}, $token->{attributes});
3621 $self->{insertion_mode} = {
3622 caption => 'in caption',
3623 colgroup => 'in column group',
3624 tbody => 'in table body',
3625 tfoot => 'in table body',
3626 thead => 'in table body',
3627 }->{$token->{tag_name}};
3628 !!!next-token;
3629 redo B;
3630 } elsif ({
3631 col => 1,
3632 td => 1, th => 1, tr => 1,
3633 }->{$token->{tag_name}}) {
3634 ## Clear back to table context
3635 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3636 $self->{open_elements}->[-1]->[1] ne 'html') {
3637 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3638 pop @{$self->{open_elements}};
3639 }
3640
3641 !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3642 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3643 ? 'in column group' : 'in table body';
3644 ## reprocess
3645 redo B;
3646 } elsif ($token->{tag_name} eq 'table') {
3647 ## NOTE: There are code clones for this "table in table"
3648 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3649
3650 ## As if </table>
3651 ## have a table element in table scope
3652 my $i;
3653 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3654 my $node = $self->{open_elements}->[$_];
3655 if ($node->[1] eq 'table') {
3656 $i = $_;
3657 last INSCOPE;
3658 } elsif ({
3659 table => 1, html => 1,
3660 }->{$node->[1]}) {
3661 last INSCOPE;
3662 }
3663 } # INSCOPE
3664 unless (defined $i) {
3665 !!!parse-error (type => 'unmatched end tag:table');
3666 ## Ignore tokens </table><table>
3667 !!!next-token;
3668 redo B;
3669 }
3670
3671 ## generate implied end tags
3672 if ({
3673 dd => 1, dt => 1, li => 1, p => 1,
3674 td => 1, th => 1, tr => 1,
3675 }->{$self->{open_elements}->[-1]->[1]}) {
3676 !!!back-token; # <table>
3677 $token = {type => 'end tag', tag_name => 'table'};
3678 !!!back-token;
3679 $token = {type => 'end tag',
3680 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3681 redo B;
3682 }
3683
3684 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3685 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3686 }
3687
3688 splice @{$self->{open_elements}}, $i;
3689
3690 $self->_reset_insertion_mode;
3691
3692 ## reprocess
3693 redo B;
3694 } else {
3695 #
3696 }
3697 } elsif ($token->{type} eq 'end tag') {
3698 if ($token->{tag_name} eq 'table') {
3699 ## have a table element in table scope
3700 my $i;
3701 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3702 my $node = $self->{open_elements}->[$_];
3703 if ($node->[1] eq $token->{tag_name}) {
3704 $i = $_;
3705 last INSCOPE;
3706 } elsif ({
3707 table => 1, html => 1,
3708 }->{$node->[1]}) {
3709 last INSCOPE;
3710 }
3711 } # INSCOPE
3712 unless (defined $i) {
3713 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3714 ## Ignore the token
3715 !!!next-token;
3716 redo B;
3717 }
3718
3719 ## generate implied end tags
3720 if ({
3721 dd => 1, dt => 1, li => 1, p => 1,
3722 td => 1, th => 1, tr => 1,
3723 }->{$self->{open_elements}->[-1]->[1]}) {
3724 !!!back-token;
3725 $token = {type => 'end tag',
3726 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3727 redo B;
3728 }
3729
3730 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3731 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3732 }
3733
3734 splice @{$self->{open_elements}}, $i;
3735
3736 $self->_reset_insertion_mode;
3737
3738 !!!next-token;
3739 redo B;
3740 } elsif ({
3741 body => 1, caption => 1, col => 1, colgroup => 1,
3742 html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3743 thead => 1, tr => 1,
3744 }->{$token->{tag_name}}) {
3745 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3746 ## Ignore the token
3747 !!!next-token;
3748 redo B;
3749 } else {
3750 #
3751 }
3752 } else {
3753 #
3754 }
3755
3756 !!!parse-error (type => 'in table:'.$token->{tag_name});
3757 $in_body->($insert_to_foster);
3758 redo B;
3759 } elsif ($self->{insertion_mode} eq 'in caption') {
3760 if ($token->{type} eq 'character') {
3761 ## NOTE: This is a code clone of "character in body".
3762 $reconstruct_active_formatting_elements->($insert_to_current);
3763
3764 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3765
3766 !!!next-token;
3767 redo B;
3768 } elsif ($token->{type} eq 'comment') {
3769 ## NOTE: This is a code clone of "comment in body".
3770 my $comment = $self->{document}->create_comment ($token->{data});
3771 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3772 !!!next-token;
3773 redo B;
3774 } elsif ($token->{type} eq 'start tag') {
3775 if ({
3776 caption => 1, col => 1, colgroup => 1, tbody => 1,
3777 td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3778 }->{$token->{tag_name}}) {
3779 !!!parse-error (type => 'not closed:caption');
3780
3781 ## As if </caption>
3782 ## have a table element in table scope
3783 my $i;
3784 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3785 my $node = $self->{open_elements}->[$_];
3786 if ($node->[1] eq 'caption') {
3787 $i = $_;
3788 last INSCOPE;
3789 } elsif ({
3790 table => 1, html => 1,
3791 }->{$node->[1]}) {
3792 last INSCOPE;
3793 }
3794 } # INSCOPE
3795 unless (defined $i) {
3796 !!!parse-error (type => 'unmatched end tag:caption');
3797 ## Ignore the token
3798 !!!next-token;
3799 redo B;
3800 }
3801
3802 ## generate implied end tags
3803 if ({
3804 dd => 1, dt => 1, li => 1, p => 1,
3805 td => 1, th => 1, tr => 1,
3806 }->{$self->{open_elements}->[-1]->[1]}) {
3807 !!!back-token; # <?>
3808 $token = {type => 'end tag', tag_name => 'caption'};
3809 !!!back-token;
3810 $token = {type => 'end tag',
3811 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3812 redo B;
3813 }
3814
3815 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3816 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3817 }
3818
3819 splice @{$self->{open_elements}}, $i;
3820
3821 $clear_up_to_marker->();
3822
3823 $self->{insertion_mode} = 'in table';
3824
3825 ## reprocess
3826 redo B;
3827 } else {
3828 #
3829 }
3830 } elsif ($token->{type} eq 'end tag') {
3831 if ($token->{tag_name} eq 'caption') {
3832 ## have a table element in table scope
3833 my $i;
3834 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3835 my $node = $self->{open_elements}->[$_];
3836 if ($node->[1] eq $token->{tag_name}) {
3837 $i = $_;
3838 last INSCOPE;
3839 } elsif ({
3840 table => 1, html => 1,
3841 }->{$node->[1]}) {
3842 last INSCOPE;
3843 }
3844 } # INSCOPE
3845 unless (defined $i) {
3846 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3847 ## Ignore the token
3848 !!!next-token;
3849 redo B;
3850 }
3851
3852 ## generate implied end tags
3853 if ({
3854 dd => 1, dt => 1, li => 1, p => 1,
3855 td => 1, th => 1, tr => 1,
3856 }->{$self->{open_elements}->[-1]->[1]}) {
3857 !!!back-token;
3858 $token = {type => 'end tag',
3859 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3860 redo B;
3861 }
3862
3863 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3864 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3865 }
3866
3867 splice @{$self->{open_elements}}, $i;
3868
3869 $clear_up_to_marker->();
3870
3871 $self->{insertion_mode} = 'in table';
3872
3873 !!!next-token;
3874 redo B;
3875 } elsif ($token->{tag_name} eq 'table') {
3876 !!!parse-error (type => 'not closed:caption');
3877
3878 ## As if </caption>
3879 ## have a table element in table scope
3880 my $i;
3881 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3882 my $node = $self->{open_elements}->[$_];
3883 if ($node->[1] eq 'caption') {
3884 $i = $_;
3885 last INSCOPE;
3886 } elsif ({
3887 table => 1, html => 1,
3888 }->{$node->[1]}) {
3889 last INSCOPE;
3890 }
3891 } # INSCOPE
3892 unless (defined $i) {
3893 !!!parse-error (type => 'unmatched end tag:caption');
3894 ## Ignore the token
3895 !!!next-token;
3896 redo B;
3897 }
3898
3899 ## generate implied end tags
3900 if ({
3901 dd => 1, dt => 1, li => 1, p => 1,
3902 td => 1, th => 1, tr => 1,
3903 }->{$self->{open_elements}->[-1]->[1]}) {
3904 !!!back-token; # </table>
3905 $token = {type => 'end tag', tag_name => 'caption'};
3906 !!!back-token;
3907 $token = {type => 'end tag',
3908 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3909 redo B;
3910 }
3911
3912 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3913 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3914 }
3915
3916 splice @{$self->{open_elements}}, $i;
3917
3918 $clear_up_to_marker->();
3919
3920 $self->{insertion_mode} = 'in table';
3921
3922 ## reprocess
3923 redo B;
3924 } elsif ({
3925 body => 1, col => 1, colgroup => 1,
3926 html => 1, tbody => 1, td => 1, tfoot => 1,
3927 th => 1, thead => 1, tr => 1,
3928 }->{$token->{tag_name}}) {
3929 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3930 ## Ignore the token
3931 redo B;
3932 } else {
3933 #
3934 }
3935 } else {
3936 #
3937 }
3938
3939 $in_body->($insert_to_current);
3940 redo B;
3941 } elsif ($self->{insertion_mode} eq 'in column group') {
3942 if ($token->{type} eq 'character') {
3943 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3944 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3945 unless (length $token->{data}) {
3946 !!!next-token;
3947 redo B;
3948 }
3949 }
3950
3951 #
3952 } elsif ($token->{type} eq 'comment') {
3953 my $comment = $self->{document}->create_comment ($token->{data});
3954 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3955 !!!next-token;
3956 redo B;
3957 } elsif ($token->{type} eq 'start tag') {
3958 if ($token->{tag_name} eq 'col') {
3959 !!!insert-element ($token->{tag_name}, $token->{attributes});
3960 pop @{$self->{open_elements}};
3961 !!!next-token;
3962 redo B;
3963 } else {
3964 #
3965 }
3966 } elsif ($token->{type} eq 'end tag') {
3967 if ($token->{tag_name} eq 'colgroup') {
3968 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3969 !!!parse-error (type => 'unmatched end tag:colgroup');
3970 ## Ignore the token
3971 !!!next-token;
3972 redo B;
3973 } else {
3974 pop @{$self->{open_elements}}; # colgroup
3975 $self->{insertion_mode} = 'in table';
3976 !!!next-token;
3977 redo B;
3978 }
3979 } elsif ($token->{tag_name} eq 'col') {
3980 !!!parse-error (type => 'unmatched end tag:col');
3981 ## Ignore the token
3982 !!!next-token;
3983 redo B;
3984 } else {
3985 #
3986 }
3987 } else {
3988 #
3989 }
3990
3991 ## As if </colgroup>
3992 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3993 !!!parse-error (type => 'unmatched end tag:colgroup');
3994 ## Ignore the token
3995 !!!next-token;
3996 redo B;
3997 } else {
3998 pop @{$self->{open_elements}}; # colgroup
3999 $self->{insertion_mode} = 'in table';
4000 ## reprocess
4001 redo B;
4002 }
4003 } elsif ($self->{insertion_mode} eq 'in table body') {
4004 if ($token->{type} eq 'character') {
4005 ## NOTE: This is a "character in table" code clone.
4006 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4007 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4008
4009 unless (length $token->{data}) {
4010 !!!next-token;
4011 redo B;
4012 }
4013 }
4014
4015 !!!parse-error (type => 'in table:#character');
4016
4017 ## As if in body, but insert into foster parent element
4018 ## ISSUE: Spec says that "whenever a node would be inserted
4019 ## into the current node" while characters might not be
4020 ## result in a new Text node.
4021 $reconstruct_active_formatting_elements->($insert_to_foster);
4022
4023 if ({
4024 table => 1, tbody => 1, tfoot => 1,
4025 thead => 1, tr => 1,
4026 }->{$self->{open_elements}->[-1]->[1]}) {
4027 # MUST
4028 my $foster_parent_element;
4029 my $next_sibling;
4030 my $prev_sibling;
4031 OE: for (reverse 0..$#{$self->{open_elements}}) {
4032 if ($self->{open_elements}->[$_]->[1] eq 'table') {
4033 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4034 if (defined $parent and $parent->node_type == 1) {
4035 $foster_parent_element = $parent;
4036 $next_sibling = $self->{open_elements}->[$_]->[0];
4037 $prev_sibling = $next_sibling->previous_sibling;
4038 } else {
4039 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4040 $prev_sibling = $foster_parent_element->last_child;
4041 }
4042 last OE;
4043 }
4044 } # OE
4045 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4046 $prev_sibling = $foster_parent_element->last_child
4047 unless defined $foster_parent_element;
4048 if (defined $prev_sibling and
4049 $prev_sibling->node_type == 3) {
4050 $prev_sibling->manakai_append_text ($token->{data});
4051 } else {
4052 $foster_parent_element->insert_before
4053 ($self->{document}->create_text_node ($token->{data}),
4054 $next_sibling);
4055 }
4056 } else {
4057 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4058 }
4059
4060 !!!next-token;
4061 redo B;
4062 } elsif ($token->{type} eq 'comment') {
4063 ## Copied from 'in table'
4064 my $comment = $self->{document}->create_comment ($token->{data});
4065 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4066 !!!next-token;
4067 redo B;
4068 } elsif ($token->{type} eq 'start tag') {
4069 if ({
4070 tr => 1,
4071 th => 1, td => 1,
4072 }->{$token->{tag_name}}) {
4073 unless ($token->{tag_name} eq 'tr') {
4074 !!!parse-error (type => 'missing start tag:tr');
4075 }
4076
4077 ## Clear back to table body context
4078 while (not {
4079 tbody => 1, tfoot => 1, thead => 1, html => 1,
4080 }->{$self->{open_elements}->[-1]->[1]}) {
4081 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4082 pop @{$self->{open_elements}};
4083 }
4084
4085 $self->{insertion_mode} = 'in row';
4086 if ($token->{tag_name} eq 'tr') {
4087 !!!insert-element ($token->{tag_name}, $token->{attributes});
4088 !!!next-token;
4089 } else {
4090 !!!insert-element ('tr');
4091 ## reprocess
4092 }
4093 redo B;
4094 } elsif ({
4095 caption => 1, col => 1, colgroup => 1,
4096 tbody => 1, tfoot => 1, thead => 1,
4097 }->{$token->{tag_name}}) {
4098 ## have an element in table scope
4099 my $i;
4100 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4101 my $node = $self->{open_elements}->[$_];
4102 if ({
4103 tbody => 1, thead => 1, tfoot => 1,
4104 }->{$node->[1]}) {
4105 $i = $_;
4106 last INSCOPE;
4107 } elsif ({
4108 table => 1, html => 1,
4109 }->{$node->[1]}) {
4110 last INSCOPE;
4111 }
4112 } # INSCOPE
4113 unless (defined $i) {
4114 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4115 ## Ignore the token
4116 !!!next-token;
4117 redo B;
4118 }
4119
4120 ## Clear back to table body context
4121 while (not {
4122 tbody => 1, tfoot => 1, thead => 1, html => 1,
4123 }->{$self->{open_elements}->[-1]->[1]}) {
4124 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4125 pop @{$self->{open_elements}};
4126 }
4127
4128 ## As if <{current node}>
4129 ## have an element in table scope
4130 ## true by definition
4131
4132 ## Clear back to table body context
4133 ## nop by definition
4134
4135 pop @{$self->{open_elements}};
4136 $self->{insertion_mode} = 'in table';
4137 ## reprocess
4138 redo B;
4139 } elsif ($token->{tag_name} eq 'table') {
4140 ## NOTE: This is a code clone of "table in table"
4141 !!!parse-error (type => 'not closed:table');
4142
4143 ## As if </table>
4144 ## have a table element in table scope
4145 my $i;
4146 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4147 my $node = $self->{open_elements}->[$_];
4148 if ($node->[1] eq 'table') {
4149 $i = $_;
4150 last INSCOPE;
4151 } elsif ({
4152 table => 1, html => 1,
4153 }->{$node->[1]}) {
4154 last INSCOPE;
4155 }
4156 } # INSCOPE
4157 unless (defined $i) {
4158 !!!parse-error (type => 'unmatched end tag:table');
4159 ## Ignore tokens </table><table>
4160 !!!next-token;
4161 redo B;
4162 }
4163
4164 ## generate implied end tags
4165 if ({
4166 dd => 1, dt => 1, li => 1, p => 1,
4167 td => 1, th => 1, tr => 1,
4168 }->{$self->{open_elements}->[-1]->[1]}) {
4169 !!!back-token; # <table>
4170 $token = {type => 'end tag', tag_name => 'table'};
4171 !!!back-token;
4172 $token = {type => 'end tag',
4173 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4174 redo B;
4175 }
4176
4177 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4178 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4179 }
4180
4181 splice @{$self->{open_elements}}, $i;
4182
4183 $self->_reset_insertion_mode;
4184
4185 ## reprocess
4186 redo B;
4187 } else {
4188 #
4189 }
4190 } elsif ($token->{type} eq 'end tag') {
4191 if ({
4192 tbody => 1, tfoot => 1, thead => 1,
4193 }->{$token->{tag_name}}) {
4194 ## have an element in table scope
4195 my $i;
4196 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4197 my $node = $self->{open_elements}->[$_];
4198 if ($node->[1] eq $token->{tag_name}) {
4199 $i = $_;
4200 last INSCOPE;
4201 } elsif ({
4202 table => 1, html => 1,
4203 }->{$node->[1]}) {
4204 last INSCOPE;
4205 }
4206 } # INSCOPE
4207 unless (defined $i) {
4208 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4209 ## Ignore the token
4210 !!!next-token;
4211 redo B;
4212 }
4213
4214 ## Clear back to table body context
4215 while (not {
4216 tbody => 1, tfoot => 1, thead => 1, html => 1,
4217 }->{$self->{open_elements}->[-1]->[1]}) {
4218 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4219 pop @{$self->{open_elements}};
4220 }
4221
4222 pop @{$self->{open_elements}};
4223 $self->{insertion_mode} = 'in table';
4224 !!!next-token;
4225 redo B;
4226 } elsif ($token->{tag_name} eq 'table') {
4227 ## have an element in table scope
4228 my $i;
4229 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4230 my $node = $self->{open_elements}->[$_];
4231 if ({
4232 tbody => 1, thead => 1, tfoot => 1,
4233 }->{$node->[1]}) {
4234 $i = $_;
4235 last INSCOPE;
4236 } elsif ({
4237 table => 1, html => 1,
4238 }->{$node->[1]}) {
4239 last INSCOPE;
4240 }
4241 } # INSCOPE
4242 unless (defined $i) {
4243 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4244 ## Ignore the token
4245 !!!next-token;
4246 redo B;
4247 }
4248
4249 ## Clear back to table body context
4250 while (not {
4251 tbody => 1, tfoot => 1, thead => 1, html => 1,
4252 }->{$self->{open_elements}->[-1]->[1]}) {
4253 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4254 pop @{$self->{open_elements}};
4255 }
4256
4257 ## As if <{current node}>
4258 ## have an element in table scope
4259 ## true by definition
4260
4261 ## Clear back to table body context
4262 ## nop by definition
4263
4264 pop @{$self->{open_elements}};
4265 $self->{insertion_mode} = 'in table';
4266 ## reprocess
4267 redo B;
4268 } elsif ({
4269 body => 1, caption => 1, col => 1, colgroup => 1,
4270 html => 1, td => 1, th => 1, tr => 1,
4271 }->{$token->{tag_name}}) {
4272 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4273 ## Ignore the token
4274 !!!next-token;
4275 redo B;
4276 } else {
4277 #
4278 }
4279 } else {
4280 #
4281 }
4282
4283 ## As if in table
4284 !!!parse-error (type => 'in table:'.$token->{tag_name});
4285 $in_body->($insert_to_foster);
4286 redo B;
4287 } elsif ($self->{insertion_mode} eq 'in row') {
4288 if ($token->{type} eq 'character') {
4289 ## NOTE: This is a "character in table" code clone.
4290 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4291 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4292
4293 unless (length $token->{data}) {
4294 !!!next-token;
4295 redo B;
4296 }
4297 }
4298
4299 !!!parse-error (type => 'in table:#character');
4300
4301 ## As if in body, but insert into foster parent element
4302 ## ISSUE: Spec says that "whenever a node would be inserted
4303 ## into the current node" while characters might not be
4304 ## result in a new Text node.
4305 $reconstruct_active_formatting_elements->($insert_to_foster);
4306
4307 if ({
4308 table => 1, tbody => 1, tfoot => 1,
4309 thead => 1, tr => 1,
4310 }->{$self->{open_elements}->[-1]->[1]}) {
4311 # MUST
4312 my $foster_parent_element;
4313 my $next_sibling;
4314 my $prev_sibling;
4315 OE: for (reverse 0..$#{$self->{open_elements}}) {
4316 if ($self->{open_elements}->[$_]->[1] eq 'table') {
4317 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4318 if (defined $parent and $parent->node_type == 1) {
4319 $foster_parent_element = $parent;
4320 $next_sibling = $self->{open_elements}->[$_]->[0];
4321 $prev_sibling = $next_sibling->previous_sibling;
4322 } else {
4323 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4324 $prev_sibling = $foster_parent_element->last_child;
4325 }
4326 last OE;
4327 }
4328 } # OE
4329 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4330 $prev_sibling = $foster_parent_element->last_child
4331 unless defined $foster_parent_element;
4332 if (defined $prev_sibling and
4333 $prev_sibling->node_type == 3) {
4334 $prev_sibling->manakai_append_text ($token->{data});
4335 } else {
4336 $foster_parent_element->insert_before
4337 ($self->{document}->create_text_node ($token->{data}),
4338 $next_sibling);
4339 }
4340 } else {
4341 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4342 }
4343
4344 !!!next-token;
4345 redo B;
4346 } elsif ($token->{type} eq 'comment') {
4347 ## Copied from 'in table'
4348 my $comment = $self->{document}->create_comment ($token->{data});
4349 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4350 !!!next-token;
4351 redo B;
4352 } elsif ($token->{type} eq 'start tag') {
4353 if ($token->{tag_name} eq 'th' or
4354 $token->{tag_name} eq 'td') {
4355 ## Clear back to table row context
4356 while (not {
4357 tr => 1, html => 1,
4358 }->{$self->{open_elements}->[-1]->[1]}) {
4359 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4360 pop @{$self->{open_elements}};
4361 }
4362
4363 !!!insert-element ($token->{tag_name}, $token->{attributes});
4364 $self->{insertion_mode} = 'in cell';
4365
4366 push @$active_formatting_elements, ['#marker', ''];
4367
4368 !!!next-token;
4369 redo B;
4370 } elsif ({
4371 caption => 1, col => 1, colgroup => 1,
4372 tbody => 1, tfoot => 1, thead => 1, tr => 1,
4373 }->{$token->{tag_name}}) {
4374 ## As if </tr>
4375 ## have an element in table scope
4376 my $i;
4377 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4378 my $node = $self->{open_elements}->[$_];
4379 if ($node->[1] eq 'tr') {
4380 $i = $_;
4381 last INSCOPE;
4382 } elsif ({
4383 table => 1, html => 1,
4384 }->{$node->[1]}) {
4385 last INSCOPE;
4386 }
4387 } # INSCOPE
4388 unless (defined $i) {
4389 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4390 ## Ignore the token
4391 !!!next-token;
4392 redo B;
4393 }
4394
4395 ## Clear back to table row context
4396 while (not {
4397 tr => 1, html => 1,
4398 }->{$self->{open_elements}->[-1]->[1]}) {
4399 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4400 pop @{$self->{open_elements}};
4401 }
4402
4403 pop @{$self->{open_elements}}; # tr
4404 $self->{insertion_mode} = 'in table body';
4405 ## reprocess
4406 redo B;
4407 } elsif ($token->{tag_name} eq 'table') {
4408 ## NOTE: This is a code clone of "table in table"
4409 !!!parse-error (type => 'not closed:table');
4410
4411 ## As if </table>
4412 ## have a table element in table scope
4413 my $i;
4414 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4415 my $node = $self->{open_elements}->[$_];
4416 if ($node->[1] eq 'table') {
4417 $i = $_;
4418 last INSCOPE;
4419 } elsif ({
4420 table => 1, html => 1,
4421 }->{$node->[1]}) {
4422 last INSCOPE;
4423 }
4424 } # INSCOPE
4425 unless (defined $i) {
4426 !!!parse-error (type => 'unmatched end tag:table');
4427 ## Ignore tokens </table><table>
4428 !!!next-token;
4429 redo B;
4430 }
4431
4432 ## generate implied end tags
4433 if ({
4434 dd => 1, dt => 1, li => 1, p => 1,
4435 td => 1, th => 1, tr => 1,
4436 }->{$self->{open_elements}->[-1]->[1]}) {
4437 !!!back-token; # <table>
4438 $token = {type => 'end tag', tag_name => 'table'};
4439 !!!back-token;
4440 $token = {type => 'end tag',
4441 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4442 redo B;
4443 }
4444
4445 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4446 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4447 }
4448
4449 splice @{$self->{open_elements}}, $i;
4450
4451 $self->_reset_insertion_mode;
4452
4453 ## reprocess
4454 redo B;
4455 } else {
4456 #
4457 }
4458 } elsif ($token->{type} eq 'end tag') {
4459 if ($token->{tag_name} eq 'tr') {
4460 ## have an element in table scope
4461 my $i;
4462 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4463 my $node = $self->{open_elements}->[$_];
4464 if ($node->[1] eq $token->{tag_name}) {
4465 $i = $_;
4466 last INSCOPE;
4467 } elsif ({
4468 table => 1, html => 1,
4469 }->{$node->[1]}) {
4470 last INSCOPE;
4471 }
4472 } # INSCOPE
4473 unless (defined $i) {
4474 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4475 ## Ignore the token
4476 !!!next-token;
4477 redo B;
4478 }
4479
4480 ## Clear back to table row context
4481 while (not {
4482 tr => 1, html => 1,
4483 }->{$self->{open_elements}->[-1]->[1]}) {
4484 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4485 pop @{$self->{open_elements}};
4486 }
4487
4488 pop @{$self->{open_elements}}; # tr
4489 $self->{insertion_mode} = 'in table body';
4490 !!!next-token;
4491 redo B;
4492 } elsif ($token->{tag_name} eq 'table') {
4493 ## As if </tr>
4494 ## have an element in table scope
4495 my $i;
4496 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4497 my $node = $self->{open_elements}->[$_];
4498 if ($node->[1] eq 'tr') {
4499 $i = $_;
4500 last INSCOPE;
4501 } elsif ({
4502 table => 1, html => 1,
4503 }->{$node->[1]}) {
4504 last INSCOPE;
4505 }
4506 } # INSCOPE
4507 unless (defined $i) {
4508 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4509 ## Ignore the token
4510 !!!next-token;
4511 redo B;
4512 }
4513
4514 ## Clear back to table row context
4515 while (not {
4516 tr => 1, html => 1,
4517 }->{$self->{open_elements}->[-1]->[1]}) {
4518 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4519 pop @{$self->{open_elements}};
4520 }
4521
4522 pop @{$self->{open_elements}}; # tr
4523 $self->{insertion_mode} = 'in table body';
4524 ## reprocess
4525 redo B;
4526 } elsif ({
4527 tbody => 1, tfoot => 1, thead => 1,
4528 }->{$token->{tag_name}}) {
4529 ## have an element in table scope
4530 my $i;
4531 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4532 my $node = $self->{open_elements}->[$_];
4533 if ($node->[1] eq $token->{tag_name}) {
4534 $i = $_;
4535 last INSCOPE;
4536 } elsif ({
4537 table => 1, html => 1,
4538 }->{$node->[1]}) {
4539 last INSCOPE;
4540 }
4541 } # INSCOPE
4542 unless (defined $i) {
4543 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4544 ## Ignore the token
4545 !!!next-token;
4546 redo B;
4547 }
4548
4549 ## As if </tr>
4550 ## have an element in table scope
4551 my $i;
4552 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4553 my $node = $self->{open_elements}->[$_];
4554 if ($node->[1] eq 'tr') {
4555 $i = $_;
4556 last INSCOPE;
4557 } elsif ({
4558 table => 1, html => 1,
4559 }->{$node->[1]}) {
4560 last INSCOPE;
4561 }
4562 } # INSCOPE
4563 unless (defined $i) {
4564 !!!parse-error (type => 'unmatched end tag:tr');
4565 ## Ignore the token
4566 !!!next-token;
4567 redo B;
4568 }
4569
4570 ## Clear back to table row context
4571 while (not {
4572 tr => 1, html => 1,
4573 }->{$self->{open_elements}->[-1]->[1]}) {
4574 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4575 pop @{$self->{open_elements}};
4576 }
4577
4578 pop @{$self->{open_elements}}; # tr
4579 $self->{insertion_mode} = 'in table body';
4580 ## reprocess
4581 redo B;
4582 } elsif ({
4583 body => 1, caption => 1, col => 1,
4584 colgroup => 1, html => 1, td => 1, th => 1,
4585 }->{$token->{tag_name}}) {
4586 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4587 ## Ignore the token
4588 !!!next-token;
4589 redo B;
4590 } else {
4591 #
4592 }
4593 } else {
4594 #
4595 }
4596
4597 ## As if in table
4598 !!!parse-error (type => 'in table:'.$token->{tag_name});
4599 $in_body->($insert_to_foster);
4600 redo B;
4601 } elsif ($self->{insertion_mode} eq 'in cell') {
4602 if ($token->{type} eq 'character') {
4603 ## NOTE: This is a code clone of "character in body".
4604 $reconstruct_active_formatting_elements->($insert_to_current);
4605
4606 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4607
4608 !!!next-token;
4609 redo B;
4610 } elsif ($token->{type} eq 'comment') {
4611 ## NOTE: This is a code clone of "comment in body".
4612 my $comment = $self->{document}->create_comment ($token->{data});
4613 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4614 !!!next-token;
4615 redo B;
4616 } elsif ($token->{type} eq 'start tag') {
4617 if ({
4618 caption => 1, col => 1, colgroup => 1,
4619 tbody => 1, td => 1, tfoot => 1, th => 1,
4620 thead => 1, tr => 1,
4621 }->{$token->{tag_name}}) {
4622 ## have an element in table scope
4623 my $tn;
4624 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4625 my $node = $self->{open_elements}->[$_];
4626 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4627 $tn = $node->[1];
4628 last INSCOPE;
4629 } elsif ({
4630 table => 1, html => 1,
4631 }->{$node->[1]}) {
4632 last INSCOPE;
4633 }
4634 } # INSCOPE
4635 unless (defined $tn) {
4636 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4637 ## Ignore the token
4638 !!!next-token;
4639 redo B;
4640 }
4641
4642 ## Close the cell
4643 !!!back-token; # <?>
4644 $token = {type => 'end tag', tag_name => $tn};
4645 redo B;
4646 } else {
4647 #
4648 }
4649 } elsif ($token->{type} eq 'end tag') {
4650 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4651 ## have an element in table scope
4652 my $i;
4653 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4654 my $node = $self->{open_elements}->[$_];
4655 if ($node->[1] eq $token->{tag_name}) {
4656 $i = $_;
4657 last INSCOPE;
4658 } elsif ({
4659 table => 1, html => 1,
4660 }->{$node->[1]}) {
4661 last INSCOPE;
4662 }
4663 } # INSCOPE
4664 unless (defined $i) {
4665 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4666 ## Ignore the token
4667 !!!next-token;
4668 redo B;
4669 }
4670
4671 ## generate implied end tags
4672 if ({
4673 dd => 1, dt => 1, li => 1, p => 1,
4674 td => ($token->{tag_name} eq 'th'),
4675 th => ($token->{tag_name} eq 'td'),
4676 tr => 1,
4677 }->{$self->{open_elements}->[-1]->[1]}) {
4678 !!!back-token;
4679 $token = {type => 'end tag',
4680 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4681 redo B;
4682 }
4683
4684 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4685 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4686 }
4687
4688 splice @{$self->{open_elements}}, $i;
4689
4690 $clear_up_to_marker->();
4691
4692 $self->{insertion_mode} = 'in row';
4693
4694 !!!next-token;
4695 redo B;
4696 } elsif ({
4697 body => 1, caption => 1, col => 1,
4698 colgroup => 1, html => 1,
4699 }->{$token->{tag_name}}) {
4700 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4701 ## Ignore the token
4702 !!!next-token;
4703 redo B;
4704 } elsif ({
4705 table => 1, tbody => 1, tfoot => 1,
4706 thead => 1, tr => 1,
4707 }->{$token->{tag_name}}) {
4708 ## have an element in table scope
4709 my $i;
4710 my $tn;
4711 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4712 my $node = $self->{open_elements}->[$_];
4713 if ($node->[1] eq $token->{tag_name}) {
4714 $i = $_;
4715 last INSCOPE;
4716 } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4717 $tn = $node->[1];
4718 ## NOTE: There is exactly one |td| or |th| element
4719 ## in scope in the stack of open elements by definition.
4720 } elsif ({
4721 table => 1, html => 1,
4722 }->{$node->[1]}) {
4723 last INSCOPE;
4724 }
4725 } # INSCOPE
4726 unless (defined $i) {
4727 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4728 ## Ignore the token
4729 !!!next-token;
4730 redo B;
4731 }
4732
4733 ## Close the cell
4734 !!!back-token; # </?>
4735 $token = {type => 'end tag', tag_name => $tn};
4736 redo B;
4737 } else {
4738 #
4739 }
4740 } else {
4741 #
4742 }
4743
4744 $in_body->($insert_to_current);
4745 redo B;
4746 } elsif ($self->{insertion_mode} eq 'in select') {
4747 if ($token->{type} eq 'character') {
4748 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4749 !!!next-token;
4750 redo B;
4751 } elsif ($token->{type} eq 'comment') {
4752 my $comment = $self->{document}->create_comment ($token->{data});
4753 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4754 !!!next-token;
4755 redo B;
4756 } elsif ($token->{type} eq 'start tag') {
4757 if ($token->{tag_name} eq 'option') {
4758 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4759 ## As if </option>
4760 pop @{$self->{open_elements}};
4761 }
4762
4763 !!!insert-element ($token->{tag_name}, $token->{attributes});
4764 !!!next-token;
4765 redo B;
4766 } elsif ($token->{tag_name} eq 'optgroup') {
4767 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4768 ## As if </option>
4769 pop @{$self->{open_elements}};
4770 }
4771
4772 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4773 ## As if </optgroup>
4774 pop @{$self->{open_elements}};
4775 }
4776
4777 !!!insert-element ($token->{tag_name}, $token->{attributes});
4778 !!!next-token;
4779 redo B;
4780 } elsif ($token->{tag_name} eq 'select') {
4781 !!!parse-error (type => 'not closed:select');
4782 ## As if </select> instead
4783 ## have an element in table scope
4784 my $i;
4785 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4786 my $node = $self->{open_elements}->[$_];
4787 if ($node->[1] eq $token->{tag_name}) {
4788 $i = $_;
4789 last INSCOPE;
4790 } elsif ({
4791 table => 1, html => 1,
4792 }->{$node->[1]}) {
4793 last INSCOPE;
4794 }
4795 } # INSCOPE
4796 unless (defined $i) {
4797 !!!parse-error (type => 'unmatched end tag:select');
4798 ## Ignore the token
4799 !!!next-token;
4800 redo B;
4801 }
4802
4803 splice @{$self->{open_elements}}, $i;
4804
4805 $self->_reset_insertion_mode;
4806
4807 !!!next-token;
4808 redo B;
4809 } else {
4810 #
4811 }
4812 } elsif ($token->{type} eq 'end tag') {
4813 if ($token->{tag_name} eq 'optgroup') {
4814 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4815 $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4816 ## As if </option>
4817 splice @{$self->{open_elements}}, -2;
4818 } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4819 pop @{$self->{open_elements}};
4820 } else {
4821 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4822 ## Ignore the token
4823 }
4824 !!!next-token;
4825 redo B;
4826 } elsif ($token->{tag_name} eq 'option') {
4827 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4828 pop @{$self->{open_elements}};
4829 } else {
4830 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4831 ## Ignore the token
4832 }
4833 !!!next-token;
4834 redo B;
4835 } elsif ($token->{tag_name} eq 'select') {
4836 ## have an element in table scope
4837 my $i;
4838 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4839 my $node = $self->{open_elements}->[$_];
4840 if ($node->[1] eq $token->{tag_name}) {
4841 $i = $_;
4842 last INSCOPE;
4843 } elsif ({
4844 table => 1, html => 1,
4845 }->{$node->[1]}) {
4846 last INSCOPE;
4847 }
4848 } # INSCOPE
4849 unless (defined $i) {
4850 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4851 ## Ignore the token
4852 !!!next-token;
4853 redo B;
4854 }
4855
4856 splice @{$self->{open_elements}}, $i;
4857
4858 $self->_reset_insertion_mode;
4859
4860 !!!next-token;
4861 redo B;
4862 } elsif ({
4863 caption => 1, table => 1, tbody => 1,
4864 tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4865 }->{$token->{tag_name}}) {
4866 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4867
4868 ## have an element in table scope
4869 my $i;
4870 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4871 my $node = $self->{open_elements}->[$_];
4872 if ($node->[1] eq $token->{tag_name}) {
4873 $i = $_;
4874 last INSCOPE;
4875 } elsif ({
4876 table => 1, html => 1,
4877 }->{$node->[1]}) {
4878 last INSCOPE;
4879 }
4880 } # INSCOPE
4881 unless (defined $i) {
4882 ## Ignore the token
4883 !!!next-token;
4884 redo B;
4885 }
4886
4887 ## As if </select>
4888 ## have an element in table scope
4889 undef $i;
4890 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4891 my $node = $self->{open_elements}->[$_];
4892 if ($node->[1] eq 'select') {
4893 $i = $_;
4894 last INSCOPE;
4895 } elsif ({
4896 table => 1, html => 1,
4897 }->{$node->[1]}) {
4898 last INSCOPE;
4899 }
4900 } # INSCOPE
4901 unless (defined $i) {
4902 !!!parse-error (type => 'unmatched end tag:select');
4903 ## Ignore the </select> token
4904 !!!next-token; ## TODO: ok?
4905 redo B;
4906 }
4907
4908 splice @{$self->{open_elements}}, $i;
4909
4910 $self->_reset_insertion_mode;
4911
4912 ## reprocess
4913 redo B;
4914 } else {
4915 #
4916 }
4917 } else {
4918 #
4919 }
4920
4921 !!!parse-error (type => 'in select:'.$token->{tag_name});
4922 ## Ignore the token
4923 !!!next-token;
4924 redo B;
4925 } elsif ($self->{insertion_mode} eq 'after body') {
4926 if ($token->{type} eq 'character') {
4927 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4928 ## As if in body
4929 $reconstruct_active_formatting_elements->($insert_to_current);
4930
4931 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4932
4933 unless (length $token->{data}) {
4934 !!!next-token;
4935 redo B;
4936 }
4937 }
4938
4939 #
4940 !!!parse-error (type => 'after body:#'.$token->{type});
4941 } elsif ($token->{type} eq 'comment') {
4942 my $comment = $self->{document}->create_comment ($token->{data});
4943 $self->{open_elements}->[0]->[0]->append_child ($comment);
4944 !!!next-token;
4945 redo B;
4946 } elsif ($token->{type} eq 'start tag') {
4947 !!!parse-error (type => 'after body:'.$token->{tag_name});
4948 #
4949 } elsif ($token->{type} eq 'end tag') {
4950 if ($token->{tag_name} eq 'html') {
4951 if (defined $self->{inner_html_node}) {
4952 !!!parse-error (type => 'unmatched end tag:html');
4953 ## Ignore the token
4954 !!!next-token;
4955 redo B;
4956 } else {
4957 $phase = 'trailing end';
4958 !!!next-token;
4959 redo B;
4960 }
4961 } else {
4962 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4963 }
4964 } else {
4965 !!!parse-error (type => 'after body:#'.$token->{type});
4966 }
4967
4968 $self->{insertion_mode} = 'in body';
4969 ## reprocess
4970 redo B;
4971 } elsif ($self->{insertion_mode} eq 'in frameset') {
4972 if ($token->{type} eq 'character') {
4973 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4974 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4975
4976 unless (length $token->{data}) {
4977 !!!next-token;
4978 redo B;
4979 }
4980 }
4981
4982 #
4983 } elsif ($token->{type} eq 'comment') {
4984 my $comment = $self->{document}->create_comment ($token->{data});
4985 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4986 !!!next-token;
4987 redo B;
4988 } elsif ($token->{type} eq 'start tag') {
4989 if ($token->{tag_name} eq 'frameset') {
4990 !!!insert-element ($token->{tag_name}, $token->{attributes});
4991 !!!next-token;
4992 redo B;
4993 } elsif ($token->{tag_name} eq 'frame') {
4994 !!!insert-element ($token->{tag_name}, $token->{attributes});
4995 pop @{$self->{open_elements}};
4996 !!!next-token;
4997 redo B;
4998 } elsif ($token->{tag_name} eq 'noframes') {
4999 $in_body->($insert_to_current);
5000 redo B;
5001 } else {
5002 #
5003 }
5004 } elsif ($token->{type} eq 'end tag') {
5005 if ($token->{tag_name} eq 'frameset') {
5006 if ($self->{open_elements}->[-1]->[1] eq 'html' and
5007 @{$self->{open_elements}} == 1) {
5008 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
5009 ## Ignore the token
5010 !!!next-token;
5011 } else {
5012 pop @{$self->{open_elements}};
5013 !!!next-token;
5014 }
5015
5016 ## if not inner_html and
5017 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
5018 $self->{insertion_mode} = 'after frameset';
5019 }
5020 redo B;
5021 } else {
5022 #
5023 }
5024 } else {
5025 #
5026 }
5027
5028 if (defined $token->{tag_name}) {
5029 !!!parse-error (type => 'in frameset:'.($token->{type} eq 'end tag' ? '/' : '').$token->{tag_name});
5030 } else {
5031 !!!parse-error (type => 'in frameset:#'.$token->{type});
5032 }
5033 ## Ignore the token
5034 !!!next-token;
5035 redo B;
5036 } elsif ($self->{insertion_mode} eq 'after frameset') {
5037 if ($token->{type} eq 'character') {
5038 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5039 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5040
5041 unless (length $token->{data}) {
5042 !!!next-token;
5043 redo B;
5044 }
5045 }
5046
5047 #
5048 } elsif ($token->{type} eq 'comment') {
5049 my $comment = $self->{document}->create_comment ($token->{data});
5050 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5051 !!!next-token;
5052 redo B;
5053 } elsif ($token->{type} eq 'start tag') {
5054 if ($token->{tag_name} eq 'noframes') {
5055 $in_body->($insert_to_current);
5056 redo B;
5057 } else {
5058 #
5059 }
5060 } elsif ($token->{type} eq 'end tag') {
5061 if ($token->{tag_name} eq 'html') {
5062 $phase = 'trailing end';
5063 !!!next-token;
5064 redo B;
5065 } else {
5066 #
5067 }
5068 } else {
5069 #
5070 }
5071
5072 if (defined $token->{tag_name}) {
5073 !!!parse-error (type => 'after frameset:'.($token->{tag_name} eq 'end tag' ? '/' : '').$token->{tag_name});
5074 } else {
5075 !!!parse-error (type => 'after frameset:#'.$token->{type});
5076 }
5077 ## Ignore the token
5078 !!!next-token;
5079 redo B;
5080
5081 ## ISSUE: An issue in spec there
5082 } else {
5083 die "$0: $self->{insertion_mode}: Unknown insertion mode";
5084 }
5085 }
5086 } elsif ($phase eq 'trailing end') {
5087 ## states in the main stage is preserved yet # MUST
5088
5089 if ($token->{type} eq 'DOCTYPE') {
5090 !!!parse-error (type => 'after html:#DOCTYPE');
5091 ## Ignore the token
5092 !!!next-token;
5093 redo B;
5094 } elsif ($token->{type} eq 'comment') {
5095 my $comment = $self->{document}->create_comment ($token->{data});
5096 $self->{document}->append_child ($comment);
5097 !!!next-token;
5098 redo B;
5099 } elsif ($token->{type} eq 'character') {
5100 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5101 my $data = $1;
5102 ## As if in the main phase.
5103 ## NOTE: The insertion mode in the main phase
5104 ## just before the phase has been changed to the trailing
5105 ## end phase is either "after body" or "after frameset".
5106 $reconstruct_active_formatting_elements->($insert_to_current)
5107 if $phase eq 'main';
5108
5109 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
5110
5111 unless (length $token->{data}) {
5112 !!!next-token;
5113 redo B;
5114 }
5115 }
5116
5117 !!!parse-error (type => 'after html:#character');
5118 $phase = 'main';
5119 ## reprocess
5120 redo B;
5121 } elsif ($token->{type} eq 'start tag' or
5122 $token->{type} eq 'end tag') {
5123 !!!parse-error (type => 'after html:'.($token->{type} eq 'end tag' ? '/' : '').$token->{tag_name});
5124 $phase = 'main';
5125 ## reprocess
5126 redo B;
5127 } elsif ($token->{type} eq 'end-of-file') {
5128 ## Stop parsing
5129 last B;
5130 } else {
5131 die "$0: $token->{type}: Unknown token";
5132 }
5133 }
5134 } # B
5135
5136 ## Stop parsing # MUST
5137
5138 ## TODO: script stuffs
5139 } # _tree_construct_main
5140
5141 sub set_inner_html ($$$) {
5142 my $class = shift;
5143 my $node = shift;
5144 my $s = \$_[0];
5145 my $onerror = $_[1];
5146
5147 my $nt = $node->node_type;
5148 if ($nt == 9) {
5149 # MUST
5150
5151 ## Step 1 # MUST
5152 ## TODO: If the document has an active parser, ...
5153 ## ISSUE: There is an issue in the spec.
5154
5155 ## Step 2 # MUST
5156 my @cn = @{$node->child_nodes};
5157 for (@cn) {
5158 $node->remove_child ($_);
5159 }
5160
5161 ## Step 3, 4, 5 # MUST
5162 $class->parse_string ($$s => $node, $onerror);
5163 } elsif ($nt == 1) {
5164 ## TODO: If non-html element
5165
5166 ## NOTE: Most of this code is copied from |parse_string|
5167
5168 ## Step 1 # MUST
5169 my $this_doc = $node->owner_document;
5170 my $doc = $this_doc->implementation->create_document;
5171 $doc->manakai_is_html (1);
5172 my $p = $class->new;
5173 $p->{document} = $doc;
5174
5175 ## Step 9 # MUST
5176 my $i = 0;
5177 my $line = 1;
5178 my $column = 0;
5179 $p->{set_next_input_character} = sub {
5180 my $self = shift;
5181
5182 pop @{$self->{prev_input_character}};
5183 unshift @{$self->{prev_input_character}}, $self->{next_input_character};
5184
5185 $self->{next_input_character} = -1 and return if $i >= length $$s;
5186 $self->{next_input_character} = ord substr $$s, $i++, 1;
5187 $column++;
5188
5189 if ($self->{next_input_character} == 0x000A) { # LF
5190 $line++;
5191 $column = 0;
5192 } elsif ($self->{next_input_character} == 0x000D) { # CR
5193 $i++ if substr ($$s, $i, 1) eq "\x0A";
5194 $self->{next_input_character} = 0x000A; # LF # MUST
5195 $line++;
5196 $column = 0;
5197 } elsif ($self->{next_input_character} > 0x10FFFF) {
5198 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5199 } elsif ($self->{next_input_character} == 0x0000) { # NULL
5200 !!!parse-error (type => 'NULL');
5201 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5202 }
5203 };
5204 $p->{prev_input_character} = [-1, -1, -1];
5205 $p->{next_input_character} = -1;
5206
5207 my $ponerror = $onerror || sub {
5208 my (%opt) = @_;
5209 warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
5210 };
5211 $p->{parse_error} = sub {
5212 $ponerror->(@_, line => $line, column => $column);
5213 };
5214
5215 $p->_initialize_tokenizer;
5216 $p->_initialize_tree_constructor;
5217
5218 ## Step 2
5219 my $node_ln = $node->local_name;
5220 $p->{content_model_flag} = {
5221 title => 'RCDATA',
5222 textarea => 'RCDATA',
5223 style => 'CDATA',
5224 script => 'CDATA',
5225 xmp => 'CDATA',
5226 iframe => 'CDATA',
5227 noembed => 'CDATA',
5228 noframes => 'CDATA',
5229 noscript => 'CDATA',
5230 plaintext => 'PLAINTEXT',
5231 }->{$node_ln} || 'PCDATA';
5232 ## ISSUE: What is "the name of the element"? local name?
5233
5234 $p->{inner_html_node} = [$node, $node_ln];
5235
5236 ## Step 4
5237 my $root = $doc->create_element_ns
5238 ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5239
5240 ## Step 5 # MUST
5241 $doc->append_child ($root);
5242
5243 ## Step 6 # MUST
5244 push @{$p->{open_elements}}, [$root, 'html'];
5245
5246 undef $p->{head_element};
5247
5248 ## Step 7 # MUST
5249 $p->_reset_insertion_mode;
5250
5251 ## Step 8 # MUST
5252 my $anode = $node;
5253 AN: while (defined $anode) {
5254 if ($anode->node_type == 1) {
5255 my $nsuri = $anode->namespace_uri;
5256 if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5257 if ($anode->local_name eq 'form') { ## TODO: case?
5258 $p->{form_element} = $anode;
5259 last AN;
5260 }
5261 }
5262 }
5263 $anode = $anode->parent_node;
5264 } # AN
5265
5266 ## Step 3 # MUST
5267 ## Step 10 # MUST
5268 {
5269 my $self = $p;
5270 !!!next-token;
5271 }
5272 $p->_tree_construction_main;
5273
5274 ## Step 11 # MUST
5275 my @cn = @{$node->child_nodes};
5276 for (@cn) {
5277 $node->remove_child ($_);
5278 }
5279 ## ISSUE: mutation events? read-only?
5280
5281 ## Step 12 # MUST
5282 @cn = @{$root->child_nodes};
5283 for (@cn) {
5284 $this_doc->adopt_node ($_);
5285 $node->append_child ($_);
5286 }
5287 ## ISSUE: mutation events?
5288
5289 $p->_terminate_tree_constructor;
5290 } else {
5291 die "$0: |set_inner_html| is not defined for node of type $nt";
5292 }
5293 } # set_inner_html
5294
5295 } # tree construction stage
5296
5297 sub get_inner_html ($$$) {
5298 my (undef, $node, $on_error) = @_;
5299
5300 ## Step 1
5301 my $s = '';
5302
5303 my $in_cdata;
5304 my $parent = $node;
5305 while (defined $parent) {
5306 if ($parent->node_type == 1 and
5307 $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
5308 {
5309 style => 1, script => 1, xmp => 1, iframe => 1,
5310 noembed => 1, noframes => 1, noscript => 1,
5311 }->{$parent->local_name}) { ## TODO: case thingy
5312 $in_cdata = 1;
5313 }
5314 $parent = $parent->parent_node;
5315 }
5316
5317 ## Step 2
5318 my @node = @{$node->child_nodes};
5319 C: while (@node) {
5320 my $child = shift @node;
5321 unless (ref $child) {
5322 if ($child eq 'cdata-out') {
5323 $in_cdata = 0;
5324 } else {
5325 $s .= $child; # end tag
5326 }
5327 next C;
5328 }
5329
5330 my $nt = $child->node_type;
5331 if ($nt == 1) { # Element
5332 my $tag_name = $child->tag_name; ## TODO: manakai_tag_name
5333 $s .= '<' . $tag_name;
5334 ## NOTE: Non-HTML case:
5335 ## <http://permalink.gmane.org/gmane.org.w3c.whatwg.discuss/11191>
5336
5337 my @attrs = @{$child->attributes}; # sort order MUST be stable
5338 for my $attr (@attrs) { # order is implementation dependent
5339 my $attr_name = $attr->name; ## TODO: manakai_name
5340 $s .= ' ' . $attr_name . '="';
5341 my $attr_value = $attr->value;
5342 ## escape
5343 $attr_value =~ s/&/&amp;/g;
5344 $attr_value =~ s/</&lt;/g;
5345 $attr_value =~ s/>/&gt;/g;
5346 $attr_value =~ s/"/&quot;/g;
5347 $s .= $attr_value . '"';
5348 }
5349 $s .= '>';
5350
5351 next C if {
5352 area => 1, base => 1, basefont => 1, bgsound => 1,
5353 br => 1, col => 1, embed => 1, frame => 1, hr => 1,
5354 img => 1, input => 1, link => 1, meta => 1, param => 1,
5355 spacer => 1, wbr => 1,
5356 }->{$tag_name};
5357
5358 $s .= "\x0A" if $tag_name eq 'pre' or $tag_name eq 'textarea';
5359
5360 if (not $in_cdata and {
5361 style => 1, script => 1, xmp => 1, iframe => 1,
5362 noembed => 1, noframes => 1, noscript => 1,
5363 plaintext => 1,
5364 }->{$tag_name}) {
5365 unshift @node, 'cdata-out';
5366 $in_cdata = 1;
5367 }
5368
5369 unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
5370 } elsif ($nt == 3 or $nt == 4) {
5371 if ($in_cdata) {
5372 $s .= $child->data;
5373 } else {
5374 my $value = $child->data;
5375 $value =~ s/&/&amp;/g;
5376 $value =~ s/</&lt;/g;
5377 $value =~ s/>/&gt;/g;
5378 $value =~ s/"/&quot;/g;
5379 $s .= $value;
5380 }
5381 } elsif ($nt == 8) {
5382 $s .= '<!--' . $child->data . '-->';
5383 } elsif ($nt == 10) {
5384 $s .= '<!DOCTYPE ' . $child->name . '>';
5385 } elsif ($nt == 5) { # entrefs
5386 push @node, @{$child->child_nodes};
5387 } else {
5388 $on_error->($child) if defined $on_error;
5389 }
5390 ## ISSUE: This code does not support PIs.
5391 } # C
5392
5393 ## Step 3
5394 return \$s;
5395 } # get_inner_html
5396
5397 1;
5398 # $Date: 2007/06/25 11:05:57 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24