/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.27 - (show annotations) (download) (as text)
Sun Jun 24 14:24:21 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.26: +6 -6 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	24 Jun 2007 14:19:51 -0000
	* content-model-1.dat: Tests for |footer|
	content model are added.

	* content-model-2.dat: Tests for |ping|
	and |tabindex| attributes are added.  Tests for |datetime|
	attribute of |ins| and |del| elements are added.

	* content-model-4.dat: New test data.

	* ContentChecker.t: |content-model-4.dat| is added.

2007-06-24  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	24 Jun 2007 14:20:06 -0000
	* URIChecker.pm (check_iri_reference): A |decode| method name was
	incorrect.

	* ContentChecker.pm: Support for the |footer| element.
	Check URI syntax for space-separated URI attributes.
	Support for the |tabindex| attribute.  Support
	for |datetime| attribute.

2007-06-24  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::HTML;
2 use strict;
3 our $VERSION=do{my @r=(q$Revision: 1.26 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4
5 ## ISSUE:
6 ## var doc = implementation.createDocument (null, null, null);
7 ## doc.write ('');
8 ## alert (doc.compatMode);
9
10 my $permitted_slash_tag_name = {
11 base => 1,
12 link => 1,
13 meta => 1,
14 hr => 1,
15 br => 1,
16 img=> 1,
17 embed => 1,
18 param => 1,
19 area => 1,
20 col => 1,
21 input => 1,
22 };
23
24 my $c1_entity_char = {
25 0x80 => 0x20AC,
26 0x81 => 0xFFFD,
27 0x82 => 0x201A,
28 0x83 => 0x0192,
29 0x84 => 0x201E,
30 0x85 => 0x2026,
31 0x86 => 0x2020,
32 0x87 => 0x2021,
33 0x88 => 0x02C6,
34 0x89 => 0x2030,
35 0x8A => 0x0160,
36 0x8B => 0x2039,
37 0x8C => 0x0152,
38 0x8D => 0xFFFD,
39 0x8E => 0x017D,
40 0x8F => 0xFFFD,
41 0x90 => 0xFFFD,
42 0x91 => 0x2018,
43 0x92 => 0x2019,
44 0x93 => 0x201C,
45 0x94 => 0x201D,
46 0x95 => 0x2022,
47 0x96 => 0x2013,
48 0x97 => 0x2014,
49 0x98 => 0x02DC,
50 0x99 => 0x2122,
51 0x9A => 0x0161,
52 0x9B => 0x203A,
53 0x9C => 0x0153,
54 0x9D => 0xFFFD,
55 0x9E => 0x017E,
56 0x9F => 0x0178,
57 }; # $c1_entity_char
58
59 my $special_category = {
60 address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
61 blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
62 dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
63 form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
64 h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
65 img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
66 menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
67 ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
68 pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
69 textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
70 };
71 my $scoping_category = {
72 button => 1, caption => 1, html => 1, marquee => 1, object => 1,
73 table => 1, td => 1, th => 1,
74 };
75 my $formatting_category = {
76 a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
77 s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
78 };
79 # $phrasing_category: all other elements
80
81 sub parse_string ($$$;$) {
82 my $self = shift->new;
83 my $s = \$_[0];
84 $self->{document} = $_[1];
85
86 ## NOTE: |set_inner_html| copies most of this method's code
87
88 my $i = 0;
89 my $line = 1;
90 my $column = 0;
91 $self->{set_next_input_character} = sub {
92 my $self = shift;
93
94 pop @{$self->{prev_input_character}};
95 unshift @{$self->{prev_input_character}}, $self->{next_input_character};
96
97 $self->{next_input_character} = -1 and return if $i >= length $$s;
98 $self->{next_input_character} = ord substr $$s, $i++, 1;
99 $column++;
100
101 if ($self->{next_input_character} == 0x000A) { # LF
102 $line++;
103 $column = 0;
104 } elsif ($self->{next_input_character} == 0x000D) { # CR
105 $i++ if substr ($$s, $i, 1) eq "\x0A";
106 $self->{next_input_character} = 0x000A; # LF # MUST
107 $line++;
108 $column = 0;
109 } elsif ($self->{next_input_character} > 0x10FFFF) {
110 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
111 } elsif ($self->{next_input_character} == 0x0000) { # NULL
112 !!!parse-error (type => 'NULL');
113 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
114 }
115 };
116 $self->{prev_input_character} = [-1, -1, -1];
117 $self->{next_input_character} = -1;
118
119 my $onerror = $_[2] || sub {
120 my (%opt) = @_;
121 warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
122 };
123 $self->{parse_error} = sub {
124 $onerror->(@_, line => $line, column => $column);
125 };
126
127 $self->_initialize_tokenizer;
128 $self->_initialize_tree_constructor;
129 $self->_construct_tree;
130 $self->_terminate_tree_constructor;
131
132 return $self->{document};
133 } # parse_string
134
135 sub new ($) {
136 my $class = shift;
137 my $self = bless {}, $class;
138 $self->{set_next_input_character} = sub {
139 $self->{next_input_character} = -1;
140 };
141 $self->{parse_error} = sub {
142 #
143 };
144 return $self;
145 } # new
146
147 ## Implementations MUST act as if state machine in the spec
148
149 sub _initialize_tokenizer ($) {
150 my $self = shift;
151 $self->{state} = 'data'; # MUST
152 $self->{content_model_flag} = 'PCDATA'; # be
153 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
154 undef $self->{current_attribute};
155 undef $self->{last_emitted_start_tag_name};
156 undef $self->{last_attribute_value_state};
157 $self->{char} = [];
158 # $self->{next_input_character}
159 !!!next-input-character;
160 $self->{token} = [];
161 # $self->{escape}
162 } # _initialize_tokenizer
163
164 ## A token has:
165 ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
166 ## 'character', or 'end-of-file'
167 ## ->{name} (DOCTYPE, start tag (tag name), end tag (tag name))
168 ## ->{public_identifier} (DOCTYPE)
169 ## ->{system_identifier} (DOCTYPE)
170 ## ->{correct} == 1 or 0 (DOCTYPE)
171 ## ->{attributes} isa HASH (start tag, end tag)
172 ## ->{data} (comment, character)
173
174 ## Emitted token MUST immediately be handled by the tree construction state.
175
176 ## Before each step, UA MAY check to see if either one of the scripts in
177 ## "list of scripts that will execute as soon as possible" or the first
178 ## script in the "list of scripts that will execute asynchronously",
179 ## has completed loading. If one has, then it MUST be executed
180 ## and removed from the list.
181
182 sub _get_next_token ($) {
183 my $self = shift;
184 if (@{$self->{token}}) {
185 return shift @{$self->{token}};
186 }
187
188 A: {
189 if ($self->{state} eq 'data') {
190 if ($self->{next_input_character} == 0x0026) { # &
191 if ($self->{content_model_flag} eq 'PCDATA' or
192 $self->{content_model_flag} eq 'RCDATA') {
193 $self->{state} = 'entity data';
194 !!!next-input-character;
195 redo A;
196 } else {
197 #
198 }
199 } elsif ($self->{next_input_character} == 0x002D) { # -
200 if ($self->{content_model_flag} eq 'RCDATA' or
201 $self->{content_model_flag} eq 'CDATA') {
202 unless ($self->{escape}) {
203 if ($self->{prev_input_character}->[0] == 0x002D and # -
204 $self->{prev_input_character}->[1] == 0x0021 and # !
205 $self->{prev_input_character}->[2] == 0x003C) { # <
206 $self->{escape} = 1;
207 }
208 }
209 }
210
211 #
212 } elsif ($self->{next_input_character} == 0x003C) { # <
213 if ($self->{content_model_flag} eq 'PCDATA' or
214 (($self->{content_model_flag} eq 'CDATA' or
215 $self->{content_model_flag} eq 'RCDATA') and
216 not $self->{escape})) {
217 $self->{state} = 'tag open';
218 !!!next-input-character;
219 redo A;
220 } else {
221 #
222 }
223 } elsif ($self->{next_input_character} == 0x003E) { # >
224 if ($self->{escape} and
225 ($self->{content_model_flag} eq 'RCDATA' or
226 $self->{content_model_flag} eq 'CDATA')) {
227 if ($self->{prev_input_character}->[0] == 0x002D and # -
228 $self->{prev_input_character}->[1] == 0x002D) { # -
229 delete $self->{escape};
230 }
231 }
232
233 #
234 } elsif ($self->{next_input_character} == -1) {
235 !!!emit ({type => 'end-of-file'});
236 last A; ## TODO: ok?
237 }
238 # Anything else
239 my $token = {type => 'character',
240 data => chr $self->{next_input_character}};
241 ## Stay in the data state
242 !!!next-input-character;
243
244 !!!emit ($token);
245
246 redo A;
247 } elsif ($self->{state} eq 'entity data') {
248 ## (cannot happen in CDATA state)
249
250 my $token = $self->_tokenize_attempt_to_consume_an_entity (0);
251
252 $self->{state} = 'data';
253 # next-input-character is already done
254
255 unless (defined $token) {
256 !!!emit ({type => 'character', data => '&'});
257 } else {
258 !!!emit ($token);
259 }
260
261 redo A;
262 } elsif ($self->{state} eq 'tag open') {
263 if ($self->{content_model_flag} eq 'RCDATA' or
264 $self->{content_model_flag} eq 'CDATA') {
265 if ($self->{next_input_character} == 0x002F) { # /
266 !!!next-input-character;
267 $self->{state} = 'close tag open';
268 redo A;
269 } else {
270 ## reconsume
271 $self->{state} = 'data';
272
273 !!!emit ({type => 'character', data => '<'});
274
275 redo A;
276 }
277 } elsif ($self->{content_model_flag} eq 'PCDATA') {
278 if ($self->{next_input_character} == 0x0021) { # !
279 $self->{state} = 'markup declaration open';
280 !!!next-input-character;
281 redo A;
282 } elsif ($self->{next_input_character} == 0x002F) { # /
283 $self->{state} = 'close tag open';
284 !!!next-input-character;
285 redo A;
286 } elsif (0x0041 <= $self->{next_input_character} and
287 $self->{next_input_character} <= 0x005A) { # A..Z
288 $self->{current_token}
289 = {type => 'start tag',
290 tag_name => chr ($self->{next_input_character} + 0x0020)};
291 $self->{state} = 'tag name';
292 !!!next-input-character;
293 redo A;
294 } elsif (0x0061 <= $self->{next_input_character} and
295 $self->{next_input_character} <= 0x007A) { # a..z
296 $self->{current_token} = {type => 'start tag',
297 tag_name => chr ($self->{next_input_character})};
298 $self->{state} = 'tag name';
299 !!!next-input-character;
300 redo A;
301 } elsif ($self->{next_input_character} == 0x003E) { # >
302 !!!parse-error (type => 'empty start tag');
303 $self->{state} = 'data';
304 !!!next-input-character;
305
306 !!!emit ({type => 'character', data => '<>'});
307
308 redo A;
309 } elsif ($self->{next_input_character} == 0x003F) { # ?
310 !!!parse-error (type => 'pio');
311 $self->{state} = 'bogus comment';
312 ## $self->{next_input_character} is intentionally left as is
313 redo A;
314 } else {
315 !!!parse-error (type => 'bare stago');
316 $self->{state} = 'data';
317 ## reconsume
318
319 !!!emit ({type => 'character', data => '<'});
320
321 redo A;
322 }
323 } else {
324 die "$0: $self->{content_model_flag}: Unknown content model flag";
325 }
326 } elsif ($self->{state} eq 'close tag open') {
327 if ($self->{content_model_flag} eq 'RCDATA' or
328 $self->{content_model_flag} eq 'CDATA') {
329 if (defined $self->{last_emitted_start_tag_name}) {
330 my @next_char;
331 TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
332 push @next_char, $self->{next_input_character};
333 my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
334 my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
335 if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
336 !!!next-input-character;
337 next TAGNAME;
338 } else {
339 $self->{next_input_character} = shift @next_char; # reconsume
340 !!!back-next-input-character (@next_char);
341 $self->{state} = 'data';
342
343 !!!emit ({type => 'character', data => '</'});
344
345 redo A;
346 }
347 }
348 push @next_char, $self->{next_input_character};
349
350 unless ($self->{next_input_character} == 0x0009 or # HT
351 $self->{next_input_character} == 0x000A or # LF
352 $self->{next_input_character} == 0x000B or # VT
353 $self->{next_input_character} == 0x000C or # FF
354 $self->{next_input_character} == 0x0020 or # SP
355 $self->{next_input_character} == 0x003E or # >
356 $self->{next_input_character} == 0x002F or # /
357 $self->{next_input_character} == -1) {
358 $self->{next_input_character} = shift @next_char; # reconsume
359 !!!back-next-input-character (@next_char);
360 $self->{state} = 'data';
361 !!!emit ({type => 'character', data => '</'});
362 redo A;
363 } else {
364 $self->{next_input_character} = shift @next_char;
365 !!!back-next-input-character (@next_char);
366 # and consume...
367 }
368 } else {
369 ## No start tag token has ever been emitted
370 # next-input-character is already done
371 $self->{state} = 'data';
372 !!!emit ({type => 'character', data => '</'});
373 redo A;
374 }
375 }
376
377 if (0x0041 <= $self->{next_input_character} and
378 $self->{next_input_character} <= 0x005A) { # A..Z
379 $self->{current_token} = {type => 'end tag',
380 tag_name => chr ($self->{next_input_character} + 0x0020)};
381 $self->{state} = 'tag name';
382 !!!next-input-character;
383 redo A;
384 } elsif (0x0061 <= $self->{next_input_character} and
385 $self->{next_input_character} <= 0x007A) { # a..z
386 $self->{current_token} = {type => 'end tag',
387 tag_name => chr ($self->{next_input_character})};
388 $self->{state} = 'tag name';
389 !!!next-input-character;
390 redo A;
391 } elsif ($self->{next_input_character} == 0x003E) { # >
392 !!!parse-error (type => 'empty end tag');
393 $self->{state} = 'data';
394 !!!next-input-character;
395 redo A;
396 } elsif ($self->{next_input_character} == -1) {
397 !!!parse-error (type => 'bare etago');
398 $self->{state} = 'data';
399 # reconsume
400
401 !!!emit ({type => 'character', data => '</'});
402
403 redo A;
404 } else {
405 !!!parse-error (type => 'bogus end tag');
406 $self->{state} = 'bogus comment';
407 ## $self->{next_input_character} is intentionally left as is
408 redo A;
409 }
410 } elsif ($self->{state} eq 'tag name') {
411 if ($self->{next_input_character} == 0x0009 or # HT
412 $self->{next_input_character} == 0x000A or # LF
413 $self->{next_input_character} == 0x000B or # VT
414 $self->{next_input_character} == 0x000C or # FF
415 $self->{next_input_character} == 0x0020) { # SP
416 $self->{state} = 'before attribute name';
417 !!!next-input-character;
418 redo A;
419 } elsif ($self->{next_input_character} == 0x003E) { # >
420 if ($self->{current_token}->{type} eq 'start tag') {
421 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
422 } elsif ($self->{current_token}->{type} eq 'end tag') {
423 $self->{content_model_flag} = 'PCDATA'; # MUST
424 if ($self->{current_token}->{attributes}) {
425 !!!parse-error (type => 'end tag attribute');
426 }
427 } else {
428 die "$0: $self->{current_token}->{type}: Unknown token type";
429 }
430 $self->{state} = 'data';
431 !!!next-input-character;
432
433 !!!emit ($self->{current_token}); # start tag or end tag
434
435 redo A;
436 } elsif (0x0041 <= $self->{next_input_character} and
437 $self->{next_input_character} <= 0x005A) { # A..Z
438 $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
439 # start tag or end tag
440 ## Stay in this state
441 !!!next-input-character;
442 redo A;
443 } elsif ($self->{next_input_character} == -1) {
444 !!!parse-error (type => 'unclosed tag');
445 if ($self->{current_token}->{type} eq 'start tag') {
446 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
447 } elsif ($self->{current_token}->{type} eq 'end tag') {
448 $self->{content_model_flag} = 'PCDATA'; # MUST
449 if ($self->{current_token}->{attributes}) {
450 !!!parse-error (type => 'end tag attribute');
451 }
452 } else {
453 die "$0: $self->{current_token}->{type}: Unknown token type";
454 }
455 $self->{state} = 'data';
456 # reconsume
457
458 !!!emit ($self->{current_token}); # start tag or end tag
459
460 redo A;
461 } elsif ($self->{next_input_character} == 0x002F) { # /
462 !!!next-input-character;
463 if ($self->{next_input_character} == 0x003E and # >
464 $self->{current_token}->{type} eq 'start tag' and
465 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
466 # permitted slash
467 #
468 } else {
469 !!!parse-error (type => 'nestc');
470 }
471 $self->{state} = 'before attribute name';
472 # next-input-character is already done
473 redo A;
474 } else {
475 $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
476 # start tag or end tag
477 ## Stay in the state
478 !!!next-input-character;
479 redo A;
480 }
481 } elsif ($self->{state} eq 'before attribute name') {
482 if ($self->{next_input_character} == 0x0009 or # HT
483 $self->{next_input_character} == 0x000A or # LF
484 $self->{next_input_character} == 0x000B or # VT
485 $self->{next_input_character} == 0x000C or # FF
486 $self->{next_input_character} == 0x0020) { # SP
487 ## Stay in the state
488 !!!next-input-character;
489 redo A;
490 } elsif ($self->{next_input_character} == 0x003E) { # >
491 if ($self->{current_token}->{type} eq 'start tag') {
492 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
493 } elsif ($self->{current_token}->{type} eq 'end tag') {
494 $self->{content_model_flag} = 'PCDATA'; # MUST
495 if ($self->{current_token}->{attributes}) {
496 !!!parse-error (type => 'end tag attribute');
497 }
498 } else {
499 die "$0: $self->{current_token}->{type}: Unknown token type";
500 }
501 $self->{state} = 'data';
502 !!!next-input-character;
503
504 !!!emit ($self->{current_token}); # start tag or end tag
505
506 redo A;
507 } elsif (0x0041 <= $self->{next_input_character} and
508 $self->{next_input_character} <= 0x005A) { # A..Z
509 $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
510 value => ''};
511 $self->{state} = 'attribute name';
512 !!!next-input-character;
513 redo A;
514 } elsif ($self->{next_input_character} == 0x002F) { # /
515 !!!next-input-character;
516 if ($self->{next_input_character} == 0x003E and # >
517 $self->{current_token}->{type} eq 'start tag' and
518 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
519 # permitted slash
520 #
521 } else {
522 !!!parse-error (type => 'nestc');
523 }
524 ## Stay in the state
525 # next-input-character is already done
526 redo A;
527 } elsif ($self->{next_input_character} == -1) {
528 !!!parse-error (type => 'unclosed tag');
529 if ($self->{current_token}->{type} eq 'start tag') {
530 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
531 } elsif ($self->{current_token}->{type} eq 'end tag') {
532 $self->{content_model_flag} = 'PCDATA'; # MUST
533 if ($self->{current_token}->{attributes}) {
534 !!!parse-error (type => 'end tag attribute');
535 }
536 } else {
537 die "$0: $self->{current_token}->{type}: Unknown token type";
538 }
539 $self->{state} = 'data';
540 # reconsume
541
542 !!!emit ($self->{current_token}); # start tag or end tag
543
544 redo A;
545 } else {
546 $self->{current_attribute} = {name => chr ($self->{next_input_character}),
547 value => ''};
548 $self->{state} = 'attribute name';
549 !!!next-input-character;
550 redo A;
551 }
552 } elsif ($self->{state} eq 'attribute name') {
553 my $before_leave = sub {
554 if (exists $self->{current_token}->{attributes} # start tag or end tag
555 ->{$self->{current_attribute}->{name}}) { # MUST
556 !!!parse-error (type => 'dupulicate attribute');
557 ## Discard $self->{current_attribute} # MUST
558 } else {
559 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
560 = $self->{current_attribute};
561 }
562 }; # $before_leave
563
564 if ($self->{next_input_character} == 0x0009 or # HT
565 $self->{next_input_character} == 0x000A or # LF
566 $self->{next_input_character} == 0x000B or # VT
567 $self->{next_input_character} == 0x000C or # FF
568 $self->{next_input_character} == 0x0020) { # SP
569 $before_leave->();
570 $self->{state} = 'after attribute name';
571 !!!next-input-character;
572 redo A;
573 } elsif ($self->{next_input_character} == 0x003D) { # =
574 $before_leave->();
575 $self->{state} = 'before attribute value';
576 !!!next-input-character;
577 redo A;
578 } elsif ($self->{next_input_character} == 0x003E) { # >
579 $before_leave->();
580 if ($self->{current_token}->{type} eq 'start tag') {
581 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
582 } elsif ($self->{current_token}->{type} eq 'end tag') {
583 $self->{content_model_flag} = 'PCDATA'; # MUST
584 if ($self->{current_token}->{attributes}) {
585 !!!parse-error (type => 'end tag attribute');
586 }
587 } else {
588 die "$0: $self->{current_token}->{type}: Unknown token type";
589 }
590 $self->{state} = 'data';
591 !!!next-input-character;
592
593 !!!emit ($self->{current_token}); # start tag or end tag
594
595 redo A;
596 } elsif (0x0041 <= $self->{next_input_character} and
597 $self->{next_input_character} <= 0x005A) { # A..Z
598 $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
599 ## Stay in the state
600 !!!next-input-character;
601 redo A;
602 } elsif ($self->{next_input_character} == 0x002F) { # /
603 $before_leave->();
604 !!!next-input-character;
605 if ($self->{next_input_character} == 0x003E and # >
606 $self->{current_token}->{type} eq 'start tag' and
607 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
608 # permitted slash
609 #
610 } else {
611 !!!parse-error (type => 'nestc');
612 }
613 $self->{state} = 'before attribute name';
614 # next-input-character is already done
615 redo A;
616 } elsif ($self->{next_input_character} == -1) {
617 !!!parse-error (type => 'unclosed tag');
618 $before_leave->();
619 if ($self->{current_token}->{type} eq 'start tag') {
620 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
621 } elsif ($self->{current_token}->{type} eq 'end tag') {
622 $self->{content_model_flag} = 'PCDATA'; # MUST
623 if ($self->{current_token}->{attributes}) {
624 !!!parse-error (type => 'end tag attribute');
625 }
626 } else {
627 die "$0: $self->{current_token}->{type}: Unknown token type";
628 }
629 $self->{state} = 'data';
630 # reconsume
631
632 !!!emit ($self->{current_token}); # start tag or end tag
633
634 redo A;
635 } else {
636 $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
637 ## Stay in the state
638 !!!next-input-character;
639 redo A;
640 }
641 } elsif ($self->{state} eq 'after attribute name') {
642 if ($self->{next_input_character} == 0x0009 or # HT
643 $self->{next_input_character} == 0x000A or # LF
644 $self->{next_input_character} == 0x000B or # VT
645 $self->{next_input_character} == 0x000C or # FF
646 $self->{next_input_character} == 0x0020) { # SP
647 ## Stay in the state
648 !!!next-input-character;
649 redo A;
650 } elsif ($self->{next_input_character} == 0x003D) { # =
651 $self->{state} = 'before attribute value';
652 !!!next-input-character;
653 redo A;
654 } elsif ($self->{next_input_character} == 0x003E) { # >
655 if ($self->{current_token}->{type} eq 'start tag') {
656 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
657 } elsif ($self->{current_token}->{type} eq 'end tag') {
658 $self->{content_model_flag} = 'PCDATA'; # MUST
659 if ($self->{current_token}->{attributes}) {
660 !!!parse-error (type => 'end tag attribute');
661 }
662 } else {
663 die "$0: $self->{current_token}->{type}: Unknown token type";
664 }
665 $self->{state} = 'data';
666 !!!next-input-character;
667
668 !!!emit ($self->{current_token}); # start tag or end tag
669
670 redo A;
671 } elsif (0x0041 <= $self->{next_input_character} and
672 $self->{next_input_character} <= 0x005A) { # A..Z
673 $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
674 value => ''};
675 $self->{state} = 'attribute name';
676 !!!next-input-character;
677 redo A;
678 } elsif ($self->{next_input_character} == 0x002F) { # /
679 !!!next-input-character;
680 if ($self->{next_input_character} == 0x003E and # >
681 $self->{current_token}->{type} eq 'start tag' and
682 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
683 # permitted slash
684 #
685 } else {
686 !!!parse-error (type => 'nestc');
687 }
688 $self->{state} = 'before attribute name';
689 # next-input-character is already done
690 redo A;
691 } elsif ($self->{next_input_character} == -1) {
692 !!!parse-error (type => 'unclosed tag');
693 if ($self->{current_token}->{type} eq 'start tag') {
694 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
695 } elsif ($self->{current_token}->{type} eq 'end tag') {
696 $self->{content_model_flag} = 'PCDATA'; # MUST
697 if ($self->{current_token}->{attributes}) {
698 !!!parse-error (type => 'end tag attribute');
699 }
700 } else {
701 die "$0: $self->{current_token}->{type}: Unknown token type";
702 }
703 $self->{state} = 'data';
704 # reconsume
705
706 !!!emit ($self->{current_token}); # start tag or end tag
707
708 redo A;
709 } else {
710 $self->{current_attribute} = {name => chr ($self->{next_input_character}),
711 value => ''};
712 $self->{state} = 'attribute name';
713 !!!next-input-character;
714 redo A;
715 }
716 } elsif ($self->{state} eq 'before attribute value') {
717 if ($self->{next_input_character} == 0x0009 or # HT
718 $self->{next_input_character} == 0x000A or # LF
719 $self->{next_input_character} == 0x000B or # VT
720 $self->{next_input_character} == 0x000C or # FF
721 $self->{next_input_character} == 0x0020) { # SP
722 ## Stay in the state
723 !!!next-input-character;
724 redo A;
725 } elsif ($self->{next_input_character} == 0x0022) { # "
726 $self->{state} = 'attribute value (double-quoted)';
727 !!!next-input-character;
728 redo A;
729 } elsif ($self->{next_input_character} == 0x0026) { # &
730 $self->{state} = 'attribute value (unquoted)';
731 ## reconsume
732 redo A;
733 } elsif ($self->{next_input_character} == 0x0027) { # '
734 $self->{state} = 'attribute value (single-quoted)';
735 !!!next-input-character;
736 redo A;
737 } elsif ($self->{next_input_character} == 0x003E) { # >
738 if ($self->{current_token}->{type} eq 'start tag') {
739 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
740 } elsif ($self->{current_token}->{type} eq 'end tag') {
741 $self->{content_model_flag} = 'PCDATA'; # MUST
742 if ($self->{current_token}->{attributes}) {
743 !!!parse-error (type => 'end tag attribute');
744 }
745 } else {
746 die "$0: $self->{current_token}->{type}: Unknown token type";
747 }
748 $self->{state} = 'data';
749 !!!next-input-character;
750
751 !!!emit ($self->{current_token}); # start tag or end tag
752
753 redo A;
754 } elsif ($self->{next_input_character} == -1) {
755 !!!parse-error (type => 'unclosed tag');
756 if ($self->{current_token}->{type} eq 'start tag') {
757 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
758 } elsif ($self->{current_token}->{type} eq 'end tag') {
759 $self->{content_model_flag} = 'PCDATA'; # MUST
760 if ($self->{current_token}->{attributes}) {
761 !!!parse-error (type => 'end tag attribute');
762 }
763 } else {
764 die "$0: $self->{current_token}->{type}: Unknown token type";
765 }
766 $self->{state} = 'data';
767 ## reconsume
768
769 !!!emit ($self->{current_token}); # start tag or end tag
770
771 redo A;
772 } else {
773 $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
774 $self->{state} = 'attribute value (unquoted)';
775 !!!next-input-character;
776 redo A;
777 }
778 } elsif ($self->{state} eq 'attribute value (double-quoted)') {
779 if ($self->{next_input_character} == 0x0022) { # "
780 $self->{state} = 'before attribute name';
781 !!!next-input-character;
782 redo A;
783 } elsif ($self->{next_input_character} == 0x0026) { # &
784 $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
785 $self->{state} = 'entity in attribute value';
786 !!!next-input-character;
787 redo A;
788 } elsif ($self->{next_input_character} == -1) {
789 !!!parse-error (type => 'unclosed attribute value');
790 if ($self->{current_token}->{type} eq 'start tag') {
791 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
792 } elsif ($self->{current_token}->{type} eq 'end tag') {
793 $self->{content_model_flag} = 'PCDATA'; # MUST
794 if ($self->{current_token}->{attributes}) {
795 !!!parse-error (type => 'end tag attribute');
796 }
797 } else {
798 die "$0: $self->{current_token}->{type}: Unknown token type";
799 }
800 $self->{state} = 'data';
801 ## reconsume
802
803 !!!emit ($self->{current_token}); # start tag or end tag
804
805 redo A;
806 } else {
807 $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
808 ## Stay in the state
809 !!!next-input-character;
810 redo A;
811 }
812 } elsif ($self->{state} eq 'attribute value (single-quoted)') {
813 if ($self->{next_input_character} == 0x0027) { # '
814 $self->{state} = 'before attribute name';
815 !!!next-input-character;
816 redo A;
817 } elsif ($self->{next_input_character} == 0x0026) { # &
818 $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
819 $self->{state} = 'entity in attribute value';
820 !!!next-input-character;
821 redo A;
822 } elsif ($self->{next_input_character} == -1) {
823 !!!parse-error (type => 'unclosed attribute value');
824 if ($self->{current_token}->{type} eq 'start tag') {
825 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
826 } elsif ($self->{current_token}->{type} eq 'end tag') {
827 $self->{content_model_flag} = 'PCDATA'; # MUST
828 if ($self->{current_token}->{attributes}) {
829 !!!parse-error (type => 'end tag attribute');
830 }
831 } else {
832 die "$0: $self->{current_token}->{type}: Unknown token type";
833 }
834 $self->{state} = 'data';
835 ## reconsume
836
837 !!!emit ($self->{current_token}); # start tag or end tag
838
839 redo A;
840 } else {
841 $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
842 ## Stay in the state
843 !!!next-input-character;
844 redo A;
845 }
846 } elsif ($self->{state} eq 'attribute value (unquoted)') {
847 if ($self->{next_input_character} == 0x0009 or # HT
848 $self->{next_input_character} == 0x000A or # LF
849 $self->{next_input_character} == 0x000B or # HT
850 $self->{next_input_character} == 0x000C or # FF
851 $self->{next_input_character} == 0x0020) { # SP
852 $self->{state} = 'before attribute name';
853 !!!next-input-character;
854 redo A;
855 } elsif ($self->{next_input_character} == 0x0026) { # &
856 $self->{last_attribute_value_state} = 'attribute value (unquoted)';
857 $self->{state} = 'entity in attribute value';
858 !!!next-input-character;
859 redo A;
860 } elsif ($self->{next_input_character} == 0x003E) { # >
861 if ($self->{current_token}->{type} eq 'start tag') {
862 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
863 } elsif ($self->{current_token}->{type} eq 'end tag') {
864 $self->{content_model_flag} = 'PCDATA'; # MUST
865 if ($self->{current_token}->{attributes}) {
866 !!!parse-error (type => 'end tag attribute');
867 }
868 } else {
869 die "$0: $self->{current_token}->{type}: Unknown token type";
870 }
871 $self->{state} = 'data';
872 !!!next-input-character;
873
874 !!!emit ($self->{current_token}); # start tag or end tag
875
876 redo A;
877 } elsif ($self->{next_input_character} == -1) {
878 !!!parse-error (type => 'unclosed tag');
879 if ($self->{current_token}->{type} eq 'start tag') {
880 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
881 } elsif ($self->{current_token}->{type} eq 'end tag') {
882 $self->{content_model_flag} = 'PCDATA'; # MUST
883 if ($self->{current_token}->{attributes}) {
884 !!!parse-error (type => 'end tag attribute');
885 }
886 } else {
887 die "$0: $self->{current_token}->{type}: Unknown token type";
888 }
889 $self->{state} = 'data';
890 ## reconsume
891
892 !!!emit ($self->{current_token}); # start tag or end tag
893
894 redo A;
895 } else {
896 $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
897 ## Stay in the state
898 !!!next-input-character;
899 redo A;
900 }
901 } elsif ($self->{state} eq 'entity in attribute value') {
902 my $token = $self->_tokenize_attempt_to_consume_an_entity (1);
903
904 unless (defined $token) {
905 $self->{current_attribute}->{value} .= '&';
906 } else {
907 $self->{current_attribute}->{value} .= $token->{data};
908 ## ISSUE: spec says "append the returned character token to the current attribute's value"
909 }
910
911 $self->{state} = $self->{last_attribute_value_state};
912 # next-input-character is already done
913 redo A;
914 } elsif ($self->{state} eq 'bogus comment') {
915 ## (only happen if PCDATA state)
916
917 my $token = {type => 'comment', data => ''};
918
919 BC: {
920 if ($self->{next_input_character} == 0x003E) { # >
921 $self->{state} = 'data';
922 !!!next-input-character;
923
924 !!!emit ($token);
925
926 redo A;
927 } elsif ($self->{next_input_character} == -1) {
928 $self->{state} = 'data';
929 ## reconsume
930
931 !!!emit ($token);
932
933 redo A;
934 } else {
935 $token->{data} .= chr ($self->{next_input_character});
936 !!!next-input-character;
937 redo BC;
938 }
939 } # BC
940 } elsif ($self->{state} eq 'markup declaration open') {
941 ## (only happen if PCDATA state)
942
943 my @next_char;
944 push @next_char, $self->{next_input_character};
945
946 if ($self->{next_input_character} == 0x002D) { # -
947 !!!next-input-character;
948 push @next_char, $self->{next_input_character};
949 if ($self->{next_input_character} == 0x002D) { # -
950 $self->{current_token} = {type => 'comment', data => ''};
951 $self->{state} = 'comment start';
952 !!!next-input-character;
953 redo A;
954 }
955 } elsif ($self->{next_input_character} == 0x0044 or # D
956 $self->{next_input_character} == 0x0064) { # d
957 !!!next-input-character;
958 push @next_char, $self->{next_input_character};
959 if ($self->{next_input_character} == 0x004F or # O
960 $self->{next_input_character} == 0x006F) { # o
961 !!!next-input-character;
962 push @next_char, $self->{next_input_character};
963 if ($self->{next_input_character} == 0x0043 or # C
964 $self->{next_input_character} == 0x0063) { # c
965 !!!next-input-character;
966 push @next_char, $self->{next_input_character};
967 if ($self->{next_input_character} == 0x0054 or # T
968 $self->{next_input_character} == 0x0074) { # t
969 !!!next-input-character;
970 push @next_char, $self->{next_input_character};
971 if ($self->{next_input_character} == 0x0059 or # Y
972 $self->{next_input_character} == 0x0079) { # y
973 !!!next-input-character;
974 push @next_char, $self->{next_input_character};
975 if ($self->{next_input_character} == 0x0050 or # P
976 $self->{next_input_character} == 0x0070) { # p
977 !!!next-input-character;
978 push @next_char, $self->{next_input_character};
979 if ($self->{next_input_character} == 0x0045 or # E
980 $self->{next_input_character} == 0x0065) { # e
981 ## ISSUE: What a stupid code this is!
982 $self->{state} = 'DOCTYPE';
983 !!!next-input-character;
984 redo A;
985 }
986 }
987 }
988 }
989 }
990 }
991 }
992
993 !!!parse-error (type => 'bogus comment open');
994 $self->{next_input_character} = shift @next_char;
995 !!!back-next-input-character (@next_char);
996 $self->{state} = 'bogus comment';
997 redo A;
998
999 ## ISSUE: typos in spec: chacacters, is is a parse error
1000 ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1001 } elsif ($self->{state} eq 'comment start') {
1002 if ($self->{next_input_character} == 0x002D) { # -
1003 $self->{state} = 'comment start dash';
1004 !!!next-input-character;
1005 redo A;
1006 } elsif ($self->{next_input_character} == 0x003E) { # >
1007 !!!parse-error (type => 'bogus comment');
1008 $self->{state} = 'data';
1009 !!!next-input-character;
1010
1011 !!!emit ($self->{current_token}); # comment
1012
1013 redo A;
1014 } elsif ($self->{next_input_character} == -1) {
1015 !!!parse-error (type => 'unclosed comment');
1016 $self->{state} = 'data';
1017 ## reconsume
1018
1019 !!!emit ($self->{current_token}); # comment
1020
1021 redo A;
1022 } else {
1023 $self->{current_token}->{data} # comment
1024 .= chr ($self->{next_input_character});
1025 $self->{state} = 'comment';
1026 !!!next-input-character;
1027 redo A;
1028 }
1029 } elsif ($self->{state} eq 'comment start dash') {
1030 if ($self->{next_input_character} == 0x002D) { # -
1031 $self->{state} = 'comment end';
1032 !!!next-input-character;
1033 redo A;
1034 } elsif ($self->{next_input_character} == 0x003E) { # >
1035 !!!parse-error (type => 'bogus comment');
1036 $self->{state} = 'data';
1037 !!!next-input-character;
1038
1039 !!!emit ($self->{current_token}); # comment
1040
1041 redo A;
1042 } elsif ($self->{next_input_character} == -1) {
1043 !!!parse-error (type => 'unclosed comment');
1044 $self->{state} = 'data';
1045 ## reconsume
1046
1047 !!!emit ($self->{current_token}); # comment
1048
1049 redo A;
1050 } else {
1051 $self->{current_token}->{data} # comment
1052 .= chr ($self->{next_input_character});
1053 $self->{state} = 'comment';
1054 !!!next-input-character;
1055 redo A;
1056 }
1057 } elsif ($self->{state} eq 'comment') {
1058 if ($self->{next_input_character} == 0x002D) { # -
1059 $self->{state} = 'comment end dash';
1060 !!!next-input-character;
1061 redo A;
1062 } elsif ($self->{next_input_character} == -1) {
1063 !!!parse-error (type => 'unclosed comment');
1064 $self->{state} = 'data';
1065 ## reconsume
1066
1067 !!!emit ($self->{current_token}); # comment
1068
1069 redo A;
1070 } else {
1071 $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1072 ## Stay in the state
1073 !!!next-input-character;
1074 redo A;
1075 }
1076 } elsif ($self->{state} eq 'comment end dash') {
1077 if ($self->{next_input_character} == 0x002D) { # -
1078 $self->{state} = 'comment end';
1079 !!!next-input-character;
1080 redo A;
1081 } elsif ($self->{next_input_character} == -1) {
1082 !!!parse-error (type => 'unclosed comment');
1083 $self->{state} = 'data';
1084 ## reconsume
1085
1086 !!!emit ($self->{current_token}); # comment
1087
1088 redo A;
1089 } else {
1090 $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1091 $self->{state} = 'comment';
1092 !!!next-input-character;
1093 redo A;
1094 }
1095 } elsif ($self->{state} eq 'comment end') {
1096 if ($self->{next_input_character} == 0x003E) { # >
1097 $self->{state} = 'data';
1098 !!!next-input-character;
1099
1100 !!!emit ($self->{current_token}); # comment
1101
1102 redo A;
1103 } elsif ($self->{next_input_character} == 0x002D) { # -
1104 !!!parse-error (type => 'dash in comment');
1105 $self->{current_token}->{data} .= '-'; # comment
1106 ## Stay in the state
1107 !!!next-input-character;
1108 redo A;
1109 } elsif ($self->{next_input_character} == -1) {
1110 !!!parse-error (type => 'unclosed comment');
1111 $self->{state} = 'data';
1112 ## reconsume
1113
1114 !!!emit ($self->{current_token}); # comment
1115
1116 redo A;
1117 } else {
1118 !!!parse-error (type => 'dash in comment');
1119 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1120 $self->{state} = 'comment';
1121 !!!next-input-character;
1122 redo A;
1123 }
1124 } elsif ($self->{state} eq 'DOCTYPE') {
1125 if ($self->{next_input_character} == 0x0009 or # HT
1126 $self->{next_input_character} == 0x000A or # LF
1127 $self->{next_input_character} == 0x000B or # VT
1128 $self->{next_input_character} == 0x000C or # FF
1129 $self->{next_input_character} == 0x0020) { # SP
1130 $self->{state} = 'before DOCTYPE name';
1131 !!!next-input-character;
1132 redo A;
1133 } else {
1134 !!!parse-error (type => 'no space before DOCTYPE name');
1135 $self->{state} = 'before DOCTYPE name';
1136 ## reconsume
1137 redo A;
1138 }
1139 } elsif ($self->{state} eq 'before DOCTYPE name') {
1140 if ($self->{next_input_character} == 0x0009 or # HT
1141 $self->{next_input_character} == 0x000A or # LF
1142 $self->{next_input_character} == 0x000B or # VT
1143 $self->{next_input_character} == 0x000C or # FF
1144 $self->{next_input_character} == 0x0020) { # SP
1145 ## Stay in the state
1146 !!!next-input-character;
1147 redo A;
1148 } elsif ($self->{next_input_character} == 0x003E) { # >
1149 !!!parse-error (type => 'no DOCTYPE name');
1150 $self->{state} = 'data';
1151 !!!next-input-character;
1152
1153 !!!emit ({type => 'DOCTYPE'}); # incorrect
1154
1155 redo A;
1156 } elsif ($self->{next_input_character} == -1) {
1157 !!!parse-error (type => 'no DOCTYPE name');
1158 $self->{state} = 'data';
1159 ## reconsume
1160
1161 !!!emit ({type => 'DOCTYPE'}); # incorrect
1162
1163 redo A;
1164 } else {
1165 $self->{current_token}
1166 = {type => 'DOCTYPE',
1167 name => chr ($self->{next_input_character}),
1168 correct => 1};
1169 ## ISSUE: "Set the token's name name to the" in the spec
1170 $self->{state} = 'DOCTYPE name';
1171 !!!next-input-character;
1172 redo A;
1173 }
1174 } elsif ($self->{state} eq 'DOCTYPE name') {
1175 ## ISSUE: Redundant "First," in the spec.
1176 if ($self->{next_input_character} == 0x0009 or # HT
1177 $self->{next_input_character} == 0x000A or # LF
1178 $self->{next_input_character} == 0x000B or # VT
1179 $self->{next_input_character} == 0x000C or # FF
1180 $self->{next_input_character} == 0x0020) { # SP
1181 $self->{state} = 'after DOCTYPE name';
1182 !!!next-input-character;
1183 redo A;
1184 } elsif ($self->{next_input_character} == 0x003E) { # >
1185 $self->{state} = 'data';
1186 !!!next-input-character;
1187
1188 !!!emit ($self->{current_token}); # DOCTYPE
1189
1190 redo A;
1191 } elsif ($self->{next_input_character} == -1) {
1192 !!!parse-error (type => 'unclosed DOCTYPE');
1193 $self->{state} = 'data';
1194 ## reconsume
1195
1196 delete $self->{current_token}->{correct};
1197 !!!emit ($self->{current_token}); # DOCTYPE
1198
1199 redo A;
1200 } else {
1201 $self->{current_token}->{name}
1202 .= chr ($self->{next_input_character}); # DOCTYPE
1203 ## Stay in the state
1204 !!!next-input-character;
1205 redo A;
1206 }
1207 } elsif ($self->{state} eq 'after DOCTYPE name') {
1208 if ($self->{next_input_character} == 0x0009 or # HT
1209 $self->{next_input_character} == 0x000A or # LF
1210 $self->{next_input_character} == 0x000B or # VT
1211 $self->{next_input_character} == 0x000C or # FF
1212 $self->{next_input_character} == 0x0020) { # SP
1213 ## Stay in the state
1214 !!!next-input-character;
1215 redo A;
1216 } elsif ($self->{next_input_character} == 0x003E) { # >
1217 $self->{state} = 'data';
1218 !!!next-input-character;
1219
1220 !!!emit ($self->{current_token}); # DOCTYPE
1221
1222 redo A;
1223 } elsif ($self->{next_input_character} == -1) {
1224 !!!parse-error (type => 'unclosed DOCTYPE');
1225 $self->{state} = 'data';
1226 ## reconsume
1227
1228 delete $self->{current_token}->{correct};
1229 !!!emit ($self->{current_token}); # DOCTYPE
1230
1231 redo A;
1232 } elsif ($self->{next_input_character} == 0x0050 or # P
1233 $self->{next_input_character} == 0x0070) { # p
1234 !!!next-input-character;
1235 if ($self->{next_input_character} == 0x0055 or # U
1236 $self->{next_input_character} == 0x0075) { # u
1237 !!!next-input-character;
1238 if ($self->{next_input_character} == 0x0042 or # B
1239 $self->{next_input_character} == 0x0062) { # b
1240 !!!next-input-character;
1241 if ($self->{next_input_character} == 0x004C or # L
1242 $self->{next_input_character} == 0x006C) { # l
1243 !!!next-input-character;
1244 if ($self->{next_input_character} == 0x0049 or # I
1245 $self->{next_input_character} == 0x0069) { # i
1246 !!!next-input-character;
1247 if ($self->{next_input_character} == 0x0043 or # C
1248 $self->{next_input_character} == 0x0063) { # c
1249 $self->{state} = 'before DOCTYPE public identifier';
1250 !!!next-input-character;
1251 redo A;
1252 }
1253 }
1254 }
1255 }
1256 }
1257
1258 #
1259 } elsif ($self->{next_input_character} == 0x0053 or # S
1260 $self->{next_input_character} == 0x0073) { # s
1261 !!!next-input-character;
1262 if ($self->{next_input_character} == 0x0059 or # Y
1263 $self->{next_input_character} == 0x0079) { # y
1264 !!!next-input-character;
1265 if ($self->{next_input_character} == 0x0053 or # S
1266 $self->{next_input_character} == 0x0073) { # s
1267 !!!next-input-character;
1268 if ($self->{next_input_character} == 0x0054 or # T
1269 $self->{next_input_character} == 0x0074) { # t
1270 !!!next-input-character;
1271 if ($self->{next_input_character} == 0x0045 or # E
1272 $self->{next_input_character} == 0x0065) { # e
1273 !!!next-input-character;
1274 if ($self->{next_input_character} == 0x004D or # M
1275 $self->{next_input_character} == 0x006D) { # m
1276 $self->{state} = 'before DOCTYPE system identifier';
1277 !!!next-input-character;
1278 redo A;
1279 }
1280 }
1281 }
1282 }
1283 }
1284
1285 #
1286 } else {
1287 !!!next-input-character;
1288 #
1289 }
1290
1291 !!!parse-error (type => 'string after DOCTYPE name');
1292 $self->{state} = 'bogus DOCTYPE';
1293 # next-input-character is already done
1294 redo A;
1295 } elsif ($self->{state} eq 'before DOCTYPE public identifier') {
1296 if ({
1297 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1298 #0x000D => 1, # HT, LF, VT, FF, SP, CR
1299 }->{$self->{next_input_character}}) {
1300 ## Stay in the state
1301 !!!next-input-character;
1302 redo A;
1303 } elsif ($self->{next_input_character} eq 0x0022) { # "
1304 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1305 $self->{state} = 'DOCTYPE public identifier (double-quoted)';
1306 !!!next-input-character;
1307 redo A;
1308 } elsif ($self->{next_input_character} eq 0x0027) { # '
1309 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1310 $self->{state} = 'DOCTYPE public identifier (single-quoted)';
1311 !!!next-input-character;
1312 redo A;
1313 } elsif ($self->{next_input_character} eq 0x003E) { # >
1314 !!!parse-error (type => 'no PUBLIC literal');
1315
1316 $self->{state} = 'data';
1317 !!!next-input-character;
1318
1319 delete $self->{current_token}->{correct};
1320 !!!emit ($self->{current_token}); # DOCTYPE
1321
1322 redo A;
1323 } elsif ($self->{next_input_character} == -1) {
1324 !!!parse-error (type => 'unclosed DOCTYPE');
1325
1326 $self->{state} = 'data';
1327 ## reconsume
1328
1329 delete $self->{current_token}->{correct};
1330 !!!emit ($self->{current_token}); # DOCTYPE
1331
1332 redo A;
1333 } else {
1334 !!!parse-error (type => 'string after PUBLIC');
1335 $self->{state} = 'bogus DOCTYPE';
1336 !!!next-input-character;
1337 redo A;
1338 }
1339 } elsif ($self->{state} eq 'DOCTYPE public identifier (double-quoted)') {
1340 if ($self->{next_input_character} == 0x0022) { # "
1341 $self->{state} = 'after DOCTYPE public identifier';
1342 !!!next-input-character;
1343 redo A;
1344 } elsif ($self->{next_input_character} == -1) {
1345 !!!parse-error (type => 'unclosed PUBLIC literal');
1346
1347 $self->{state} = 'data';
1348 ## reconsume
1349
1350 delete $self->{current_token}->{correct};
1351 !!!emit ($self->{current_token}); # DOCTYPE
1352
1353 redo A;
1354 } else {
1355 $self->{current_token}->{public_identifier} # DOCTYPE
1356 .= chr $self->{next_input_character};
1357 ## Stay in the state
1358 !!!next-input-character;
1359 redo A;
1360 }
1361 } elsif ($self->{state} eq 'DOCTYPE public identifier (single-quoted)') {
1362 if ($self->{next_input_character} == 0x0027) { # '
1363 $self->{state} = 'after DOCTYPE public identifier';
1364 !!!next-input-character;
1365 redo A;
1366 } elsif ($self->{next_input_character} == -1) {
1367 !!!parse-error (type => 'unclosed PUBLIC literal');
1368
1369 $self->{state} = 'data';
1370 ## reconsume
1371
1372 delete $self->{current_token}->{correct};
1373 !!!emit ($self->{current_token}); # DOCTYPE
1374
1375 redo A;
1376 } else {
1377 $self->{current_token}->{public_identifier} # DOCTYPE
1378 .= chr $self->{next_input_character};
1379 ## Stay in the state
1380 !!!next-input-character;
1381 redo A;
1382 }
1383 } elsif ($self->{state} eq 'after DOCTYPE public identifier') {
1384 if ({
1385 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1386 #0x000D => 1, # HT, LF, VT, FF, SP, CR
1387 }->{$self->{next_input_character}}) {
1388 ## Stay in the state
1389 !!!next-input-character;
1390 redo A;
1391 } elsif ($self->{next_input_character} == 0x0022) { # "
1392 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1393 $self->{state} = 'DOCTYPE system identifier (double-quoted)';
1394 !!!next-input-character;
1395 redo A;
1396 } elsif ($self->{next_input_character} == 0x0027) { # '
1397 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1398 $self->{state} = 'DOCTYPE system identifier (single-quoted)';
1399 !!!next-input-character;
1400 redo A;
1401 } elsif ($self->{next_input_character} == 0x003E) { # >
1402 $self->{state} = 'data';
1403 !!!next-input-character;
1404
1405 !!!emit ($self->{current_token}); # DOCTYPE
1406
1407 redo A;
1408 } elsif ($self->{next_input_character} == -1) {
1409 !!!parse-error (type => 'unclosed DOCTYPE');
1410
1411 $self->{state} = 'data';
1412 ## reconsume
1413
1414 delete $self->{current_token}->{correct};
1415 !!!emit ($self->{current_token}); # DOCTYPE
1416
1417 redo A;
1418 } else {
1419 !!!parse-error (type => 'string after PUBLIC literal');
1420 $self->{state} = 'bogus DOCTYPE';
1421 !!!next-input-character;
1422 redo A;
1423 }
1424 } elsif ($self->{state} eq 'before DOCTYPE system identifier') {
1425 if ({
1426 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1427 #0x000D => 1, # HT, LF, VT, FF, SP, CR
1428 }->{$self->{next_input_character}}) {
1429 ## Stay in the state
1430 !!!next-input-character;
1431 redo A;
1432 } elsif ($self->{next_input_character} == 0x0022) { # "
1433 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1434 $self->{state} = 'DOCTYPE system identifier (double-quoted)';
1435 !!!next-input-character;
1436 redo A;
1437 } elsif ($self->{next_input_character} == 0x0027) { # '
1438 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1439 $self->{state} = 'DOCTYPE system identifier (single-quoted)';
1440 !!!next-input-character;
1441 redo A;
1442 } elsif ($self->{next_input_character} == 0x003E) { # >
1443 !!!parse-error (type => 'no SYSTEM literal');
1444 $self->{state} = 'data';
1445 !!!next-input-character;
1446
1447 delete $self->{current_token}->{correct};
1448 !!!emit ($self->{current_token}); # DOCTYPE
1449
1450 redo A;
1451 } elsif ($self->{next_input_character} == -1) {
1452 !!!parse-error (type => 'unclosed DOCTYPE');
1453
1454 $self->{state} = 'data';
1455 ## reconsume
1456
1457 delete $self->{current_token}->{correct};
1458 !!!emit ($self->{current_token}); # DOCTYPE
1459
1460 redo A;
1461 } else {
1462 !!!parse-error (type => 'string after PUBLIC literal');
1463 $self->{state} = 'bogus DOCTYPE';
1464 !!!next-input-character;
1465 redo A;
1466 }
1467 } elsif ($self->{state} eq 'DOCTYPE system identifier (double-quoted)') {
1468 if ($self->{next_input_character} == 0x0022) { # "
1469 $self->{state} = 'after DOCTYPE system identifier';
1470 !!!next-input-character;
1471 redo A;
1472 } elsif ($self->{next_input_character} == -1) {
1473 !!!parse-error (type => 'unclosed SYSTEM literal');
1474
1475 $self->{state} = 'data';
1476 ## reconsume
1477
1478 delete $self->{current_token}->{correct};
1479 !!!emit ($self->{current_token}); # DOCTYPE
1480
1481 redo A;
1482 } else {
1483 $self->{current_token}->{system_identifier} # DOCTYPE
1484 .= chr $self->{next_input_character};
1485 ## Stay in the state
1486 !!!next-input-character;
1487 redo A;
1488 }
1489 } elsif ($self->{state} eq 'DOCTYPE system identifier (single-quoted)') {
1490 if ($self->{next_input_character} == 0x0027) { # '
1491 $self->{state} = 'after DOCTYPE system identifier';
1492 !!!next-input-character;
1493 redo A;
1494 } elsif ($self->{next_input_character} == -1) {
1495 !!!parse-error (type => 'unclosed SYSTEM literal');
1496
1497 $self->{state} = 'data';
1498 ## reconsume
1499
1500 delete $self->{current_token}->{correct};
1501 !!!emit ($self->{current_token}); # DOCTYPE
1502
1503 redo A;
1504 } else {
1505 $self->{current_token}->{system_identifier} # DOCTYPE
1506 .= chr $self->{next_input_character};
1507 ## Stay in the state
1508 !!!next-input-character;
1509 redo A;
1510 }
1511 } elsif ($self->{state} eq 'after DOCTYPE system identifier') {
1512 if ({
1513 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1514 #0x000D => 1, # HT, LF, VT, FF, SP, CR
1515 }->{$self->{next_input_character}}) {
1516 ## Stay in the state
1517 !!!next-input-character;
1518 redo A;
1519 } elsif ($self->{next_input_character} == 0x003E) { # >
1520 $self->{state} = 'data';
1521 !!!next-input-character;
1522
1523 !!!emit ($self->{current_token}); # DOCTYPE
1524
1525 redo A;
1526 } elsif ($self->{next_input_character} == -1) {
1527 !!!parse-error (type => 'unclosed DOCTYPE');
1528
1529 $self->{state} = 'data';
1530 ## reconsume
1531
1532 delete $self->{current_token}->{correct};
1533 !!!emit ($self->{current_token}); # DOCTYPE
1534
1535 redo A;
1536 } else {
1537 !!!parse-error (type => 'string after SYSTEM literal');
1538 $self->{state} = 'bogus DOCTYPE';
1539 !!!next-input-character;
1540 redo A;
1541 }
1542 } elsif ($self->{state} eq 'bogus DOCTYPE') {
1543 if ($self->{next_input_character} == 0x003E) { # >
1544 $self->{state} = 'data';
1545 !!!next-input-character;
1546
1547 delete $self->{current_token}->{correct};
1548 !!!emit ($self->{current_token}); # DOCTYPE
1549
1550 redo A;
1551 } elsif ($self->{next_input_character} == -1) {
1552 !!!parse-error (type => 'unclosed DOCTYPE');
1553 $self->{state} = 'data';
1554 ## reconsume
1555
1556 delete $self->{current_token}->{correct};
1557 !!!emit ($self->{current_token}); # DOCTYPE
1558
1559 redo A;
1560 } else {
1561 ## Stay in the state
1562 !!!next-input-character;
1563 redo A;
1564 }
1565 } else {
1566 die "$0: $self->{state}: Unknown state";
1567 }
1568 } # A
1569
1570 die "$0: _get_next_token: unexpected case";
1571 } # _get_next_token
1572
1573 sub _tokenize_attempt_to_consume_an_entity ($$) {
1574 my ($self, $in_attr) = @_;
1575
1576 if ({
1577 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
1578 0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
1579 }->{$self->{next_input_character}}) {
1580 ## Don't consume
1581 ## No error
1582 return undef;
1583 } elsif ($self->{next_input_character} == 0x0023) { # #
1584 !!!next-input-character;
1585 if ($self->{next_input_character} == 0x0078 or # x
1586 $self->{next_input_character} == 0x0058) { # X
1587 my $code;
1588 X: {
1589 my $x_char = $self->{next_input_character};
1590 !!!next-input-character;
1591 if (0x0030 <= $self->{next_input_character} and
1592 $self->{next_input_character} <= 0x0039) { # 0..9
1593 $code ||= 0;
1594 $code *= 0x10;
1595 $code += $self->{next_input_character} - 0x0030;
1596 redo X;
1597 } elsif (0x0061 <= $self->{next_input_character} and
1598 $self->{next_input_character} <= 0x0066) { # a..f
1599 $code ||= 0;
1600 $code *= 0x10;
1601 $code += $self->{next_input_character} - 0x0060 + 9;
1602 redo X;
1603 } elsif (0x0041 <= $self->{next_input_character} and
1604 $self->{next_input_character} <= 0x0046) { # A..F
1605 $code ||= 0;
1606 $code *= 0x10;
1607 $code += $self->{next_input_character} - 0x0040 + 9;
1608 redo X;
1609 } elsif (not defined $code) { # no hexadecimal digit
1610 !!!parse-error (type => 'bare hcro');
1611 $self->{next_input_character} = 0x0023; # #
1612 !!!back-next-input-character ($x_char);
1613 return undef;
1614 } elsif ($self->{next_input_character} == 0x003B) { # ;
1615 !!!next-input-character;
1616 } else {
1617 !!!parse-error (type => 'no refc');
1618 }
1619
1620 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
1621 !!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code);
1622 $code = 0xFFFD;
1623 } elsif ($code > 0x10FFFF) {
1624 !!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code);
1625 $code = 0xFFFD;
1626 } elsif ($code == 0x000D) {
1627 !!!parse-error (type => 'CR character reference');
1628 $code = 0x000A;
1629 } elsif (0x80 <= $code and $code <= 0x9F) {
1630 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
1631 $code = $c1_entity_char->{$code};
1632 }
1633
1634 return {type => 'character', data => chr $code};
1635 } # X
1636 } elsif (0x0030 <= $self->{next_input_character} and
1637 $self->{next_input_character} <= 0x0039) { # 0..9
1638 my $code = $self->{next_input_character} - 0x0030;
1639 !!!next-input-character;
1640
1641 while (0x0030 <= $self->{next_input_character} and
1642 $self->{next_input_character} <= 0x0039) { # 0..9
1643 $code *= 10;
1644 $code += $self->{next_input_character} - 0x0030;
1645
1646 !!!next-input-character;
1647 }
1648
1649 if ($self->{next_input_character} == 0x003B) { # ;
1650 !!!next-input-character;
1651 } else {
1652 !!!parse-error (type => 'no refc');
1653 }
1654
1655 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
1656 !!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code);
1657 $code = 0xFFFD;
1658 } elsif ($code > 0x10FFFF) {
1659 !!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code);
1660 $code = 0xFFFD;
1661 } elsif ($code == 0x000D) {
1662 !!!parse-error (type => 'CR character reference');
1663 $code = 0x000A;
1664 } elsif (0x80 <= $code and $code <= 0x9F) {
1665 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
1666 $code = $c1_entity_char->{$code};
1667 }
1668
1669 return {type => 'character', data => chr $code};
1670 } else {
1671 !!!parse-error (type => 'bare nero');
1672 !!!back-next-input-character ($self->{next_input_character});
1673 $self->{next_input_character} = 0x0023; # #
1674 return undef;
1675 }
1676 } elsif ((0x0041 <= $self->{next_input_character} and
1677 $self->{next_input_character} <= 0x005A) or
1678 (0x0061 <= $self->{next_input_character} and
1679 $self->{next_input_character} <= 0x007A)) {
1680 my $entity_name = chr $self->{next_input_character};
1681 !!!next-input-character;
1682
1683 my $value = $entity_name;
1684 my $match;
1685 require Whatpm::_NamedEntityList;
1686 our $EntityChar;
1687
1688 while (length $entity_name < 10 and
1689 ## NOTE: Some number greater than the maximum length of entity name
1690 ((0x0041 <= $self->{next_input_character} and # a
1691 $self->{next_input_character} <= 0x005A) or # x
1692 (0x0061 <= $self->{next_input_character} and # a
1693 $self->{next_input_character} <= 0x007A) or # z
1694 (0x0030 <= $self->{next_input_character} and # 0
1695 $self->{next_input_character} <= 0x0039) or # 9
1696 $self->{next_input_character} == 0x003B)) { # ;
1697 $entity_name .= chr $self->{next_input_character};
1698 if (defined $EntityChar->{$entity_name}) {
1699 if ($self->{next_input_character} == 0x003B) { # ;
1700 $value = $EntityChar->{$entity_name};
1701 $match = 1;
1702 !!!next-input-character;
1703 last;
1704 } elsif (not $in_attr) {
1705 $value = $EntityChar->{$entity_name};
1706 $match = -1;
1707 } else {
1708 $value .= chr $self->{next_input_character};
1709 }
1710 } else {
1711 $value .= chr $self->{next_input_character};
1712 }
1713 !!!next-input-character;
1714 }
1715
1716 if ($match > 0) {
1717 return {type => 'character', data => $value};
1718 } elsif ($match < 0) {
1719 !!!parse-error (type => 'refc');
1720 return {type => 'character', data => $value};
1721 } else {
1722 !!!parse-error (type => 'bare ero');
1723 ## NOTE: No characters are consumed in the spec.
1724 return {type => 'character', data => '&'.$value};
1725 }
1726 } else {
1727 ## no characters are consumed
1728 !!!parse-error (type => 'bare ero');
1729 return undef;
1730 }
1731 } # _tokenize_attempt_to_consume_an_entity
1732
1733 sub _initialize_tree_constructor ($) {
1734 my $self = shift;
1735 ## NOTE: $self->{document} MUST be specified before this method is called
1736 $self->{document}->strict_error_checking (0);
1737 ## TODO: Turn mutation events off # MUST
1738 ## TODO: Turn loose Document option (manakai extension) on
1739 $self->{document}->manakai_is_html (1); # MUST
1740 } # _initialize_tree_constructor
1741
1742 sub _terminate_tree_constructor ($) {
1743 my $self = shift;
1744 $self->{document}->strict_error_checking (1);
1745 ## TODO: Turn mutation events on
1746 } # _terminate_tree_constructor
1747
1748 ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1749
1750 { # tree construction stage
1751 my $token;
1752
1753 sub _construct_tree ($) {
1754 my ($self) = @_;
1755
1756 ## When an interactive UA render the $self->{document} available
1757 ## to the user, or when it begin accepting user input, are
1758 ## not defined.
1759
1760 ## Append a character: collect it and all subsequent consecutive
1761 ## characters and insert one Text node whose data is concatenation
1762 ## of all those characters. # MUST
1763
1764 !!!next-token;
1765
1766 $self->{insertion_mode} = 'before head';
1767 undef $self->{form_element};
1768 undef $self->{head_element};
1769 $self->{open_elements} = [];
1770 undef $self->{inner_html_node};
1771
1772 $self->_tree_construction_initial; # MUST
1773 $self->_tree_construction_root_element;
1774 $self->_tree_construction_main;
1775 } # _construct_tree
1776
1777 sub _tree_construction_initial ($) {
1778 my $self = shift;
1779 INITIAL: {
1780 if ($token->{type} eq 'DOCTYPE') {
1781 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
1782 ## error, switch to a conformance checking mode for another
1783 ## language.
1784 my $doctype_name = $token->{name};
1785 $doctype_name = '' unless defined $doctype_name;
1786 $doctype_name =~ tr/a-z/A-Z/;
1787 if (not defined $token->{name} or # <!DOCTYPE>
1788 defined $token->{public_identifier} or
1789 defined $token->{system_identifier}) {
1790 !!!parse-error (type => 'not HTML5');
1791 } elsif ($doctype_name ne 'HTML') {
1792 ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
1793 !!!parse-error (type => 'not HTML5');
1794 }
1795
1796 my $doctype = $self->{document}->create_document_type_definition
1797 ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
1798 $doctype->public_id ($token->{public_identifier})
1799 if defined $token->{public_identifier};
1800 $doctype->system_id ($token->{system_identifier})
1801 if defined $token->{system_identifier};
1802 ## NOTE: Other DocumentType attributes are null or empty lists.
1803 ## ISSUE: internalSubset = null??
1804 $self->{document}->append_child ($doctype);
1805
1806 if (not $token->{correct} or $doctype_name ne 'HTML') {
1807 $self->{document}->manakai_compat_mode ('quirks');
1808 } elsif (defined $token->{public_identifier}) {
1809 my $pubid = $token->{public_identifier};
1810 $pubid =~ tr/a-z/A-z/;
1811 if ({
1812 "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
1813 "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
1814 "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
1815 "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
1816 "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
1817 "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
1818 "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
1819 "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
1820 "-//IETF//DTD HTML 2.0//EN" => 1,
1821 "-//IETF//DTD HTML 2.1E//EN" => 1,
1822 "-//IETF//DTD HTML 3.0//EN" => 1,
1823 "-//IETF//DTD HTML 3.0//EN//" => 1,
1824 "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
1825 "-//IETF//DTD HTML 3.2//EN" => 1,
1826 "-//IETF//DTD HTML 3//EN" => 1,
1827 "-//IETF//DTD HTML LEVEL 0//EN" => 1,
1828 "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
1829 "-//IETF//DTD HTML LEVEL 1//EN" => 1,
1830 "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
1831 "-//IETF//DTD HTML LEVEL 2//EN" => 1,
1832 "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
1833 "-//IETF//DTD HTML LEVEL 3//EN" => 1,
1834 "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
1835 "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
1836 "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
1837 "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
1838 "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
1839 "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
1840 "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
1841 "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
1842 "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
1843 "-//IETF//DTD HTML STRICT//EN" => 1,
1844 "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
1845 "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
1846 "-//IETF//DTD HTML//EN" => 1,
1847 "-//IETF//DTD HTML//EN//2.0" => 1,
1848 "-//IETF//DTD HTML//EN//3.0" => 1,
1849 "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
1850 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
1851 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
1852 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
1853 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
1854 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
1855 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
1856 "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
1857 "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
1858 "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
1859 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
1860 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
1861 "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
1862 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
1863 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
1864 "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
1865 "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
1866 "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
1867 "-//W3C//DTD HTML 3.2//EN" => 1,
1868 "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
1869 "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
1870 "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
1871 "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
1872 "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
1873 "-//W3C//DTD W3 HTML//EN" => 1,
1874 "-//W3O//DTD W3 HTML 3.0//EN" => 1,
1875 "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
1876 "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
1877 "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
1878 "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
1879 "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
1880 "HTML" => 1,
1881 }->{$pubid}) {
1882 $self->{document}->manakai_compat_mode ('quirks');
1883 } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
1884 $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
1885 if (defined $token->{system_identifier}) {
1886 $self->{document}->manakai_compat_mode ('quirks');
1887 } else {
1888 $self->{document}->manakai_compat_mode ('limited quirks');
1889 }
1890 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 Frameset//EN" or
1891 $pubid eq "-//W3C//DTD XHTML 1.0 Transitional//EN") {
1892 $self->{document}->manakai_compat_mode ('limited quirks');
1893 }
1894 }
1895 if (defined $token->{system_identifier}) {
1896 my $sysid = $token->{system_identifier};
1897 $sysid =~ tr/A-Z/a-z/;
1898 if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1899 $self->{document}->manakai_compat_mode ('quirks');
1900 }
1901 }
1902
1903 ## Go to the root element phase.
1904 !!!next-token;
1905 return;
1906 } elsif ({
1907 'start tag' => 1,
1908 'end tag' => 1,
1909 'end-of-file' => 1,
1910 }->{$token->{type}}) {
1911 !!!parse-error (type => 'no DOCTYPE');
1912 $self->{document}->manakai_compat_mode ('quirks');
1913 ## Go to the root element phase
1914 ## reprocess
1915 return;
1916 } elsif ($token->{type} eq 'character') {
1917 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
1918 ## Ignore the token
1919
1920 unless (length $token->{data}) {
1921 ## Stay in the phase
1922 !!!next-token;
1923 redo INITIAL;
1924 }
1925 }
1926
1927 !!!parse-error (type => 'no DOCTYPE');
1928 $self->{document}->manakai_compat_mode ('quirks');
1929 ## Go to the root element phase
1930 ## reprocess
1931 return;
1932 } elsif ($token->{type} eq 'comment') {
1933 my $comment = $self->{document}->create_comment ($token->{data});
1934 $self->{document}->append_child ($comment);
1935
1936 ## Stay in the phase.
1937 !!!next-token;
1938 redo INITIAL;
1939 } else {
1940 die "$0: $token->{type}: Unknown token";
1941 }
1942 } # INITIAL
1943 } # _tree_construction_initial
1944
1945 sub _tree_construction_root_element ($) {
1946 my $self = shift;
1947
1948 B: {
1949 if ($token->{type} eq 'DOCTYPE') {
1950 !!!parse-error (type => 'in html:#DOCTYPE');
1951 ## Ignore the token
1952 ## Stay in the phase
1953 !!!next-token;
1954 redo B;
1955 } elsif ($token->{type} eq 'comment') {
1956 my $comment = $self->{document}->create_comment ($token->{data});
1957 $self->{document}->append_child ($comment);
1958 ## Stay in the phase
1959 !!!next-token;
1960 redo B;
1961 } elsif ($token->{type} eq 'character') {
1962 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
1963 ## Ignore the token.
1964
1965 unless (length $token->{data}) {
1966 ## Stay in the phase
1967 !!!next-token;
1968 redo B;
1969 }
1970 }
1971 #
1972 } elsif ({
1973 'start tag' => 1,
1974 'end tag' => 1,
1975 'end-of-file' => 1,
1976 }->{$token->{type}}) {
1977 ## ISSUE: There is an issue in the spec
1978 #
1979 } else {
1980 die "$0: $token->{type}: Unknown token";
1981 }
1982 my $root_element; !!!create-element ($root_element, 'html');
1983 $self->{document}->append_child ($root_element);
1984 push @{$self->{open_elements}}, [$root_element, 'html'];
1985 #$phase = 'main';
1986 ## reprocess
1987 #redo B;
1988 return;
1989 } # B
1990 } # _tree_construction_root_element
1991
1992 sub _reset_insertion_mode ($) {
1993 my $self = shift;
1994
1995 ## Step 1
1996 my $last;
1997
1998 ## Step 2
1999 my $i = -1;
2000 my $node = $self->{open_elements}->[$i];
2001
2002 ## Step 3
2003 S3: {
2004 $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
2005 if (defined $self->{inner_html_node}) {
2006 if ($self->{inner_html_node}->[1] eq 'td' or
2007 $self->{inner_html_node}->[1] eq 'th') {
2008 #
2009 } else {
2010 $node = $self->{inner_html_node};
2011 }
2012 }
2013
2014 ## Step 4..13
2015 my $new_mode = {
2016 select => 'in select',
2017 td => 'in cell',
2018 th => 'in cell',
2019 tr => 'in row',
2020 tbody => 'in table body',
2021 thead => 'in table head',
2022 tfoot => 'in table foot',
2023 caption => 'in caption',
2024 colgroup => 'in column group',
2025 table => 'in table',
2026 head => 'in body', # not in head!
2027 body => 'in body',
2028 frameset => 'in frameset',
2029 }->{$node->[1]};
2030 $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2031
2032 ## Step 14
2033 if ($node->[1] eq 'html') {
2034 unless (defined $self->{head_element}) {
2035 $self->{insertion_mode} = 'before head';
2036 } else {
2037 $self->{insertion_mode} = 'after head';
2038 }
2039 return;
2040 }
2041
2042 ## Step 15
2043 $self->{insertion_mode} = 'in body' and return if $last;
2044
2045 ## Step 16
2046 $i--;
2047 $node = $self->{open_elements}->[$i];
2048
2049 ## Step 17
2050 redo S3;
2051 } # S3
2052 } # _reset_insertion_mode
2053
2054 sub _tree_construction_main ($) {
2055 my $self = shift;
2056
2057 my $phase = 'main';
2058
2059 my $active_formatting_elements = [];
2060
2061 my $reconstruct_active_formatting_elements = sub { # MUST
2062 my $insert = shift;
2063
2064 ## Step 1
2065 return unless @$active_formatting_elements;
2066
2067 ## Step 3
2068 my $i = -1;
2069 my $entry = $active_formatting_elements->[$i];
2070
2071 ## Step 2
2072 return if $entry->[0] eq '#marker';
2073 for (@{$self->{open_elements}}) {
2074 if ($entry->[0] eq $_->[0]) {
2075 return;
2076 }
2077 }
2078
2079 S4: {
2080 ## Step 4
2081 last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2082
2083 ## Step 5
2084 $i--;
2085 $entry = $active_formatting_elements->[$i];
2086
2087 ## Step 6
2088 if ($entry->[0] eq '#marker') {
2089 #
2090 } else {
2091 my $in_open_elements;
2092 OE: for (@{$self->{open_elements}}) {
2093 if ($entry->[0] eq $_->[0]) {
2094 $in_open_elements = 1;
2095 last OE;
2096 }
2097 }
2098 if ($in_open_elements) {
2099 #
2100 } else {
2101 redo S4;
2102 }
2103 }
2104
2105 ## Step 7
2106 $i++;
2107 $entry = $active_formatting_elements->[$i];
2108 } # S4
2109
2110 S7: {
2111 ## Step 8
2112 my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2113
2114 ## Step 9
2115 $insert->($clone->[0]);
2116 push @{$self->{open_elements}}, $clone;
2117
2118 ## Step 10
2119 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2120
2121 ## Step 11
2122 unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2123 ## Step 7'
2124 $i++;
2125 $entry = $active_formatting_elements->[$i];
2126
2127 redo S7;
2128 }
2129 } # S7
2130 }; # $reconstruct_active_formatting_elements
2131
2132 my $clear_up_to_marker = sub {
2133 for (reverse 0..$#$active_formatting_elements) {
2134 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2135 splice @$active_formatting_elements, $_;
2136 return;
2137 }
2138 }
2139 }; # $clear_up_to_marker
2140
2141 my $parse_rcdata = sub ($$) {
2142 my ($content_model_flag, $insert) = @_;
2143
2144 ## Step 1
2145 my $start_tag_name = $token->{tag_name};
2146 my $el;
2147 !!!create-element ($el, $start_tag_name, $token->{attributes});
2148
2149 ## Step 2
2150 $insert->($el); # /context node/->append_child ($el)
2151
2152 ## Step 3
2153 $self->{content_model_flag} = $content_model_flag; # CDATA or RCDATA
2154 delete $self->{escape}; # MUST
2155
2156 ## Step 4
2157 my $text = '';
2158 !!!next-token;
2159 while ($token->{type} eq 'character') { # or until stop tokenizing
2160 $text .= $token->{data};
2161 !!!next-token;
2162 }
2163
2164 ## Step 5
2165 if (length $text) {
2166 my $text = $self->{document}->create_text_node ($text);
2167 $el->append_child ($text);
2168 }
2169
2170 ## Step 6
2171 $self->{content_model_flag} = 'PCDATA';
2172
2173 ## Step 7
2174 if ($token->{type} eq 'end tag' and $token->{tag_name} eq $start_tag_name) {
2175 ## Ignore the token
2176 } else {
2177 !!!parse-error (type => 'in '.$content_model_flag.':#'.$token->{type});
2178 }
2179 !!!next-token;
2180 }; # $parse_rcdata
2181
2182 my $script_start_tag = sub ($) {
2183 my $insert = $_[0];
2184 my $script_el;
2185 !!!create-element ($script_el, 'script', $token->{attributes});
2186 ## TODO: mark as "parser-inserted"
2187
2188 $self->{content_model_flag} = 'CDATA';
2189 delete $self->{escape}; # MUST
2190
2191 my $text = '';
2192 !!!next-token;
2193 while ($token->{type} eq 'character') {
2194 $text .= $token->{data};
2195 !!!next-token;
2196 } # stop if non-character token or tokenizer stops tokenising
2197 if (length $text) {
2198 $script_el->manakai_append_text ($text);
2199 }
2200
2201 $self->{content_model_flag} = 'PCDATA';
2202
2203 if ($token->{type} eq 'end tag' and
2204 $token->{tag_name} eq 'script') {
2205 ## Ignore the token
2206 } else {
2207 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2208 ## ISSUE: And ignore?
2209 ## TODO: mark as "already executed"
2210 }
2211
2212 if (defined $self->{inner_html_node}) {
2213 ## TODO: mark as "already executed"
2214 } else {
2215 ## TODO: $old_insertion_point = current insertion point
2216 ## TODO: insertion point = just before the next input character
2217
2218 $insert->($script_el);
2219
2220 ## TODO: insertion point = $old_insertion_point (might be "undefined")
2221
2222 ## TODO: if there is a script that will execute as soon as the parser resume, then...
2223 }
2224
2225 !!!next-token;
2226 }; # $script_start_tag
2227
2228 my $formatting_end_tag = sub {
2229 my $tag_name = shift;
2230
2231 FET: {
2232 ## Step 1
2233 my $formatting_element;
2234 my $formatting_element_i_in_active;
2235 AFE: for (reverse 0..$#$active_formatting_elements) {
2236 if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2237 $formatting_element = $active_formatting_elements->[$_];
2238 $formatting_element_i_in_active = $_;
2239 last AFE;
2240 } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2241 last AFE;
2242 }
2243 } # AFE
2244 unless (defined $formatting_element) {
2245 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
2246 ## Ignore the token
2247 !!!next-token;
2248 return;
2249 }
2250 ## has an element in scope
2251 my $in_scope = 1;
2252 my $formatting_element_i_in_open;
2253 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2254 my $node = $self->{open_elements}->[$_];
2255 if ($node->[0] eq $formatting_element->[0]) {
2256 if ($in_scope) {
2257 $formatting_element_i_in_open = $_;
2258 last INSCOPE;
2259 } else { # in open elements but not in scope
2260 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2261 ## Ignore the token
2262 !!!next-token;
2263 return;
2264 }
2265 } elsif ({
2266 table => 1, caption => 1, td => 1, th => 1,
2267 button => 1, marquee => 1, object => 1, html => 1,
2268 }->{$node->[1]}) {
2269 $in_scope = 0;
2270 }
2271 } # INSCOPE
2272 unless (defined $formatting_element_i_in_open) {
2273 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2274 pop @$active_formatting_elements; # $formatting_element
2275 !!!next-token; ## TODO: ok?
2276 return;
2277 }
2278 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
2279 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2280 }
2281
2282 ## Step 2
2283 my $furthest_block;
2284 my $furthest_block_i_in_open;
2285 OE: for (reverse 0..$#{$self->{open_elements}}) {
2286 my $node = $self->{open_elements}->[$_];
2287 if (not $formatting_category->{$node->[1]} and
2288 #not $phrasing_category->{$node->[1]} and
2289 ($special_category->{$node->[1]} or
2290 $scoping_category->{$node->[1]})) {
2291 $furthest_block = $node;
2292 $furthest_block_i_in_open = $_;
2293 } elsif ($node->[0] eq $formatting_element->[0]) {
2294 last OE;
2295 }
2296 } # OE
2297
2298 ## Step 3
2299 unless (defined $furthest_block) { # MUST
2300 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
2301 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
2302 !!!next-token;
2303 return;
2304 }
2305
2306 ## Step 4
2307 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
2308
2309 ## Step 5
2310 my $furthest_block_parent = $furthest_block->[0]->parent_node;
2311 if (defined $furthest_block_parent) {
2312 $furthest_block_parent->remove_child ($furthest_block->[0]);
2313 }
2314
2315 ## Step 6
2316 my $bookmark_prev_el
2317 = $active_formatting_elements->[$formatting_element_i_in_active - 1]
2318 ->[0];
2319
2320 ## Step 7
2321 my $node = $furthest_block;
2322 my $node_i_in_open = $furthest_block_i_in_open;
2323 my $last_node = $furthest_block;
2324 S7: {
2325 ## Step 1
2326 $node_i_in_open--;
2327 $node = $self->{open_elements}->[$node_i_in_open];
2328
2329 ## Step 2
2330 my $node_i_in_active;
2331 S7S2: {
2332 for (reverse 0..$#$active_formatting_elements) {
2333 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2334 $node_i_in_active = $_;
2335 last S7S2;
2336 }
2337 }
2338 splice @{$self->{open_elements}}, $node_i_in_open, 1;
2339 redo S7;
2340 } # S7S2
2341
2342 ## Step 3
2343 last S7 if $node->[0] eq $formatting_element->[0];
2344
2345 ## Step 4
2346 if ($last_node->[0] eq $furthest_block->[0]) {
2347 $bookmark_prev_el = $node->[0];
2348 }
2349
2350 ## Step 5
2351 if ($node->[0]->has_child_nodes ()) {
2352 my $clone = [$node->[0]->clone_node (0), $node->[1]];
2353 $active_formatting_elements->[$node_i_in_active] = $clone;
2354 $self->{open_elements}->[$node_i_in_open] = $clone;
2355 $node = $clone;
2356 }
2357
2358 ## Step 6
2359 $node->[0]->append_child ($last_node->[0]);
2360
2361 ## Step 7
2362 $last_node = $node;
2363
2364 ## Step 8
2365 redo S7;
2366 } # S7
2367
2368 ## Step 8
2369 $common_ancestor_node->[0]->append_child ($last_node->[0]);
2370
2371 ## Step 9
2372 my $clone = [$formatting_element->[0]->clone_node (0),
2373 $formatting_element->[1]];
2374
2375 ## Step 10
2376 my @cn = @{$furthest_block->[0]->child_nodes};
2377 $clone->[0]->append_child ($_) for @cn;
2378
2379 ## Step 11
2380 $furthest_block->[0]->append_child ($clone->[0]);
2381
2382 ## Step 12
2383 my $i;
2384 AFE: for (reverse 0..$#$active_formatting_elements) {
2385 if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
2386 splice @$active_formatting_elements, $_, 1;
2387 $i-- and last AFE if defined $i;
2388 } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
2389 $i = $_;
2390 }
2391 } # AFE
2392 splice @$active_formatting_elements, $i + 1, 0, $clone;
2393
2394 ## Step 13
2395 undef $i;
2396 OE: for (reverse 0..$#{$self->{open_elements}}) {
2397 if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
2398 splice @{$self->{open_elements}}, $_, 1;
2399 $i-- and last OE if defined $i;
2400 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
2401 $i = $_;
2402 }
2403 } # OE
2404 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
2405
2406 ## Step 14
2407 redo FET;
2408 } # FET
2409 }; # $formatting_end_tag
2410
2411 my $insert_to_current = sub {
2412 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
2413 }; # $insert_to_current
2414
2415 my $insert_to_foster = sub {
2416 my $child = shift;
2417 if ({
2418 table => 1, tbody => 1, tfoot => 1,
2419 thead => 1, tr => 1,
2420 }->{$self->{open_elements}->[-1]->[1]}) {
2421 # MUST
2422 my $foster_parent_element;
2423 my $next_sibling;
2424 OE: for (reverse 0..$#{$self->{open_elements}}) {
2425 if ($self->{open_elements}->[$_]->[1] eq 'table') {
2426 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
2427 if (defined $parent and $parent->node_type == 1) {
2428 $foster_parent_element = $parent;
2429 $next_sibling = $self->{open_elements}->[$_]->[0];
2430 } else {
2431 $foster_parent_element
2432 = $self->{open_elements}->[$_ - 1]->[0];
2433 }
2434 last OE;
2435 }
2436 } # OE
2437 $foster_parent_element = $self->{open_elements}->[0]->[0]
2438 unless defined $foster_parent_element;
2439 $foster_parent_element->insert_before
2440 ($child, $next_sibling);
2441 } else {
2442 $self->{open_elements}->[-1]->[0]->append_child ($child);
2443 }
2444 }; # $insert_to_foster
2445
2446 my $in_body = sub {
2447 my $insert = shift;
2448 if ($token->{type} eq 'start tag') {
2449 if ($token->{tag_name} eq 'script') {
2450 ## NOTE: This is an "as if in head" code clone
2451 $script_start_tag->($insert);
2452 return;
2453 } elsif ($token->{tag_name} eq 'style') {
2454 ## NOTE: This is an "as if in head" code clone
2455 $parse_rcdata->('CDATA', $insert);
2456 return;
2457 } elsif ({
2458 base => 1, link => 1, meta => 1,
2459 }->{$token->{tag_name}}) {
2460 ## NOTE: This is an "as if in head" code clone, only "-t" differs
2461 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2462 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
2463 !!!next-token;
2464 ## TODO: Extracting |charset| from |meta|.
2465 return;
2466 } elsif ($token->{tag_name} eq 'title') {
2467 !!!parse-error (type => 'in body:title');
2468 ## NOTE: This is an "as if in head" code clone
2469 $parse_rcdata->('RCDATA', $insert);
2470 return;
2471 } elsif ($token->{tag_name} eq 'body') {
2472 !!!parse-error (type => 'in body:body');
2473
2474 if (@{$self->{open_elements}} == 1 or
2475 $self->{open_elements}->[1]->[1] ne 'body') {
2476 ## Ignore the token
2477 } else {
2478 my $body_el = $self->{open_elements}->[1]->[0];
2479 for my $attr_name (keys %{$token->{attributes}}) {
2480 unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2481 $body_el->set_attribute_ns
2482 (undef, [undef, $attr_name],
2483 $token->{attributes}->{$attr_name}->{value});
2484 }
2485 }
2486 }
2487 !!!next-token;
2488 return;
2489 } elsif ({
2490 address => 1, blockquote => 1, center => 1, dir => 1,
2491 div => 1, dl => 1, fieldset => 1, listing => 1,
2492 menu => 1, ol => 1, p => 1, ul => 1,
2493 pre => 1,
2494 }->{$token->{tag_name}}) {
2495 ## has a p element in scope
2496 INSCOPE: for (reverse @{$self->{open_elements}}) {
2497 if ($_->[1] eq 'p') {
2498 !!!back-token;
2499 $token = {type => 'end tag', tag_name => 'p'};
2500 return;
2501 } elsif ({
2502 table => 1, caption => 1, td => 1, th => 1,
2503 button => 1, marquee => 1, object => 1, html => 1,
2504 }->{$_->[1]}) {
2505 last INSCOPE;
2506 }
2507 } # INSCOPE
2508
2509 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2510 if ($token->{tag_name} eq 'pre') {
2511 !!!next-token;
2512 if ($token->{type} eq 'character') {
2513 $token->{data} =~ s/^\x0A//;
2514 unless (length $token->{data}) {
2515 !!!next-token;
2516 }
2517 }
2518 } else {
2519 !!!next-token;
2520 }
2521 return;
2522 } elsif ($token->{tag_name} eq 'form') {
2523 if (defined $self->{form_element}) {
2524 !!!parse-error (type => 'in form:form');
2525 ## Ignore the token
2526 !!!next-token;
2527 return;
2528 } else {
2529 ## has a p element in scope
2530 INSCOPE: for (reverse @{$self->{open_elements}}) {
2531 if ($_->[1] eq 'p') {
2532 !!!back-token;
2533 $token = {type => 'end tag', tag_name => 'p'};
2534 return;
2535 } elsif ({
2536 table => 1, caption => 1, td => 1, th => 1,
2537 button => 1, marquee => 1, object => 1, html => 1,
2538 }->{$_->[1]}) {
2539 last INSCOPE;
2540 }
2541 } # INSCOPE
2542
2543 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2544 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2545 !!!next-token;
2546 return;
2547 }
2548 } elsif ($token->{tag_name} eq 'li') {
2549 ## has a p element in scope
2550 INSCOPE: for (reverse @{$self->{open_elements}}) {
2551 if ($_->[1] eq 'p') {
2552 !!!back-token;
2553 $token = {type => 'end tag', tag_name => 'p'};
2554 return;
2555 } elsif ({
2556 table => 1, caption => 1, td => 1, th => 1,
2557 button => 1, marquee => 1, object => 1, html => 1,
2558 }->{$_->[1]}) {
2559 last INSCOPE;
2560 }
2561 } # INSCOPE
2562
2563 ## Step 1
2564 my $i = -1;
2565 my $node = $self->{open_elements}->[$i];
2566 LI: {
2567 ## Step 2
2568 if ($node->[1] eq 'li') {
2569 if ($i != -1) {
2570 !!!parse-error (type => 'end tag missing:'.
2571 $self->{open_elements}->[-1]->[1]);
2572 ## TODO: test
2573 }
2574 splice @{$self->{open_elements}}, $i;
2575 last LI;
2576 }
2577
2578 ## Step 3
2579 if (not $formatting_category->{$node->[1]} and
2580 #not $phrasing_category->{$node->[1]} and
2581 ($special_category->{$node->[1]} or
2582 $scoping_category->{$node->[1]}) and
2583 $node->[1] ne 'address' and $node->[1] ne 'div') {
2584 last LI;
2585 }
2586
2587 ## Step 4
2588 $i--;
2589 $node = $self->{open_elements}->[$i];
2590 redo LI;
2591 } # LI
2592
2593 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2594 !!!next-token;
2595 return;
2596 } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2597 ## has a p element in scope
2598 INSCOPE: for (reverse @{$self->{open_elements}}) {
2599 if ($_->[1] eq 'p') {
2600 !!!back-token;
2601 $token = {type => 'end tag', tag_name => 'p'};
2602 return;
2603 } elsif ({
2604 table => 1, caption => 1, td => 1, th => 1,
2605 button => 1, marquee => 1, object => 1, html => 1,
2606 }->{$_->[1]}) {
2607 last INSCOPE;
2608 }
2609 } # INSCOPE
2610
2611 ## Step 1
2612 my $i = -1;
2613 my $node = $self->{open_elements}->[$i];
2614 LI: {
2615 ## Step 2
2616 if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2617 if ($i != -1) {
2618 !!!parse-error (type => 'end tag missing:'.
2619 $self->{open_elements}->[-1]->[1]);
2620 ## TODO: test
2621 }
2622 splice @{$self->{open_elements}}, $i;
2623 last LI;
2624 }
2625
2626 ## Step 3
2627 if (not $formatting_category->{$node->[1]} and
2628 #not $phrasing_category->{$node->[1]} and
2629 ($special_category->{$node->[1]} or
2630 $scoping_category->{$node->[1]}) and
2631 $node->[1] ne 'address' and $node->[1] ne 'div') {
2632 last LI;
2633 }
2634
2635 ## Step 4
2636 $i--;
2637 $node = $self->{open_elements}->[$i];
2638 redo LI;
2639 } # LI
2640
2641 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2642 !!!next-token;
2643 return;
2644 } elsif ($token->{tag_name} eq 'plaintext') {
2645 ## has a p element in scope
2646 INSCOPE: for (reverse @{$self->{open_elements}}) {
2647 if ($_->[1] eq 'p') {
2648 !!!back-token;
2649 $token = {type => 'end tag', tag_name => 'p'};
2650 return;
2651 } elsif ({
2652 table => 1, caption => 1, td => 1, th => 1,
2653 button => 1, marquee => 1, object => 1, html => 1,
2654 }->{$_->[1]}) {
2655 last INSCOPE;
2656 }
2657 } # INSCOPE
2658
2659 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2660
2661 $self->{content_model_flag} = 'PLAINTEXT';
2662
2663 !!!next-token;
2664 return;
2665 } elsif ({
2666 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2667 }->{$token->{tag_name}}) {
2668 ## has a p element in scope
2669 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2670 my $node = $self->{open_elements}->[$_];
2671 if ($node->[1] eq 'p') {
2672 !!!back-token;
2673 $token = {type => 'end tag', tag_name => 'p'};
2674 return;
2675 } elsif ({
2676 table => 1, caption => 1, td => 1, th => 1,
2677 button => 1, marquee => 1, object => 1, html => 1,
2678 }->{$node->[1]}) {
2679 last INSCOPE;
2680 }
2681 } # INSCOPE
2682
2683 ## NOTE: See <http://html5.org/tools/web-apps-tracker?from=925&to=926>
2684 ## has an element in scope
2685 #my $i;
2686 #INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2687 # my $node = $self->{open_elements}->[$_];
2688 # if ({
2689 # h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2690 # }->{$node->[1]}) {
2691 # $i = $_;
2692 # last INSCOPE;
2693 # } elsif ({
2694 # table => 1, caption => 1, td => 1, th => 1,
2695 # button => 1, marquee => 1, object => 1, html => 1,
2696 # }->{$node->[1]}) {
2697 # last INSCOPE;
2698 # }
2699 #} # INSCOPE
2700 #
2701 #if (defined $i) {
2702 # !!! parse-error (type => 'in hn:hn');
2703 # splice @{$self->{open_elements}}, $i;
2704 #}
2705
2706 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2707
2708 !!!next-token;
2709 return;
2710 } elsif ($token->{tag_name} eq 'a') {
2711 AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2712 my $node = $active_formatting_elements->[$i];
2713 if ($node->[1] eq 'a') {
2714 !!!parse-error (type => 'in a:a');
2715
2716 !!!back-token;
2717 $token = {type => 'end tag', tag_name => 'a'};
2718 $formatting_end_tag->($token->{tag_name});
2719
2720 AFE2: for (reverse 0..$#$active_formatting_elements) {
2721 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2722 splice @$active_formatting_elements, $_, 1;
2723 last AFE2;
2724 }
2725 } # AFE2
2726 OE: for (reverse 0..$#{$self->{open_elements}}) {
2727 if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2728 splice @{$self->{open_elements}}, $_, 1;
2729 last OE;
2730 }
2731 } # OE
2732 last AFE;
2733 } elsif ($node->[0] eq '#marker') {
2734 last AFE;
2735 }
2736 } # AFE
2737
2738 $reconstruct_active_formatting_elements->($insert_to_current);
2739
2740 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2741 push @$active_formatting_elements, $self->{open_elements}->[-1];
2742
2743 !!!next-token;
2744 return;
2745 } elsif ({
2746 b => 1, big => 1, em => 1, font => 1, i => 1,
2747 s => 1, small => 1, strile => 1,
2748 strong => 1, tt => 1, u => 1,
2749 }->{$token->{tag_name}}) {
2750 $reconstruct_active_formatting_elements->($insert_to_current);
2751
2752 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2753 push @$active_formatting_elements, $self->{open_elements}->[-1];
2754
2755 !!!next-token;
2756 return;
2757 } elsif ($token->{tag_name} eq 'nobr') {
2758 $reconstruct_active_formatting_elements->($insert_to_current);
2759
2760 ## has a |nobr| element in scope
2761 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2762 my $node = $self->{open_elements}->[$_];
2763 if ($node->[1] eq 'nobr') {
2764 !!!back-token;
2765 $token = {type => 'end tag', tag_name => 'nobr'};
2766 return;
2767 } elsif ({
2768 table => 1, caption => 1, td => 1, th => 1,
2769 button => 1, marquee => 1, object => 1, html => 1,
2770 }->{$node->[1]}) {
2771 last INSCOPE;
2772 }
2773 } # INSCOPE
2774
2775 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2776 push @$active_formatting_elements, $self->{open_elements}->[-1];
2777
2778 !!!next-token;
2779 return;
2780 } elsif ($token->{tag_name} eq 'button') {
2781 ## has a button element in scope
2782 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2783 my $node = $self->{open_elements}->[$_];
2784 if ($node->[1] eq 'button') {
2785 !!!parse-error (type => 'in button:button');
2786 !!!back-token;
2787 $token = {type => 'end tag', tag_name => 'button'};
2788 return;
2789 } elsif ({
2790 table => 1, caption => 1, td => 1, th => 1,
2791 button => 1, marquee => 1, object => 1, html => 1,
2792 }->{$node->[1]}) {
2793 last INSCOPE;
2794 }
2795 } # INSCOPE
2796
2797 $reconstruct_active_formatting_elements->($insert_to_current);
2798
2799 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2800 push @$active_formatting_elements, ['#marker', ''];
2801
2802 !!!next-token;
2803 return;
2804 } elsif ($token->{tag_name} eq 'marquee' or
2805 $token->{tag_name} eq 'object') {
2806 $reconstruct_active_formatting_elements->($insert_to_current);
2807
2808 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2809 push @$active_formatting_elements, ['#marker', ''];
2810
2811 !!!next-token;
2812 return;
2813 } elsif ($token->{tag_name} eq 'xmp') {
2814 $reconstruct_active_formatting_elements->($insert_to_current);
2815 $parse_rcdata->('CDATA', $insert);
2816 return;
2817 } elsif ($token->{tag_name} eq 'table') {
2818 ## has a p element in scope
2819 INSCOPE: for (reverse @{$self->{open_elements}}) {
2820 if ($_->[1] eq 'p') {
2821 !!!back-token;
2822 $token = {type => 'end tag', tag_name => 'p'};
2823 return;
2824 } elsif ({
2825 table => 1, caption => 1, td => 1, th => 1,
2826 button => 1, marquee => 1, object => 1, html => 1,
2827 }->{$_->[1]}) {
2828 last INSCOPE;
2829 }
2830 } # INSCOPE
2831
2832 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2833
2834 $self->{insertion_mode} = 'in table';
2835
2836 !!!next-token;
2837 return;
2838 } elsif ({
2839 area => 1, basefont => 1, bgsound => 1, br => 1,
2840 embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2841 image => 1,
2842 }->{$token->{tag_name}}) {
2843 if ($token->{tag_name} eq 'image') {
2844 !!!parse-error (type => 'image');
2845 $token->{tag_name} = 'img';
2846 }
2847
2848 $reconstruct_active_formatting_elements->($insert_to_current);
2849
2850 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2851 pop @{$self->{open_elements}};
2852
2853 !!!next-token;
2854 return;
2855 } elsif ($token->{tag_name} eq 'hr') {
2856 ## has a p element in scope
2857 INSCOPE: for (reverse @{$self->{open_elements}}) {
2858 if ($_->[1] eq 'p') {
2859 !!!back-token;
2860 $token = {type => 'end tag', tag_name => 'p'};
2861 return;
2862 } elsif ({
2863 table => 1, caption => 1, td => 1, th => 1,
2864 button => 1, marquee => 1, object => 1, html => 1,
2865 }->{$_->[1]}) {
2866 last INSCOPE;
2867 }
2868 } # INSCOPE
2869
2870 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2871 pop @{$self->{open_elements}};
2872
2873 !!!next-token;
2874 return;
2875 } elsif ($token->{tag_name} eq 'input') {
2876 $reconstruct_active_formatting_elements->($insert_to_current);
2877
2878 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2879 ## TODO: associate with $self->{form_element} if defined
2880 pop @{$self->{open_elements}};
2881
2882 !!!next-token;
2883 return;
2884 } elsif ($token->{tag_name} eq 'isindex') {
2885 !!!parse-error (type => 'isindex');
2886
2887 if (defined $self->{form_element}) {
2888 ## Ignore the token
2889 !!!next-token;
2890 return;
2891 } else {
2892 my $at = $token->{attributes};
2893 my $form_attrs;
2894 $form_attrs->{action} = $at->{action} if $at->{action};
2895 my $prompt_attr = $at->{prompt};
2896 $at->{name} = {name => 'name', value => 'isindex'};
2897 delete $at->{action};
2898 delete $at->{prompt};
2899 my @tokens = (
2900 {type => 'start tag', tag_name => 'form',
2901 attributes => $form_attrs},
2902 {type => 'start tag', tag_name => 'hr'},
2903 {type => 'start tag', tag_name => 'p'},
2904 {type => 'start tag', tag_name => 'label'},
2905 );
2906 if ($prompt_attr) {
2907 push @tokens, {type => 'character', data => $prompt_attr->{value}};
2908 } else {
2909 push @tokens, {type => 'character',
2910 data => 'This is a searchable index. Insert your search keywords here: '}; # SHOULD
2911 ## TODO: make this configurable
2912 }
2913 push @tokens,
2914 {type => 'start tag', tag_name => 'input', attributes => $at},
2915 #{type => 'character', data => ''}, # SHOULD
2916 {type => 'end tag', tag_name => 'label'},
2917 {type => 'end tag', tag_name => 'p'},
2918 {type => 'start tag', tag_name => 'hr'},
2919 {type => 'end tag', tag_name => 'form'};
2920 $token = shift @tokens;
2921 !!!back-token (@tokens);
2922 return;
2923 }
2924 } elsif ($token->{tag_name} eq 'textarea') {
2925 my $tag_name = $token->{tag_name};
2926 my $el;
2927 !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2928
2929 ## TODO: $self->{form_element} if defined
2930 $self->{content_model_flag} = 'RCDATA';
2931 delete $self->{escape}; # MUST
2932
2933 $insert->($el);
2934
2935 my $text = '';
2936 !!!next-token;
2937 if ($token->{type} eq 'character') {
2938 $token->{data} =~ s/^\x0A//;
2939 unless (length $token->{data}) {
2940 !!!next-token;
2941 }
2942 }
2943 while ($token->{type} eq 'character') {
2944 $text .= $token->{data};
2945 !!!next-token;
2946 }
2947 if (length $text) {
2948 $el->manakai_append_text ($text);
2949 }
2950
2951 $self->{content_model_flag} = 'PCDATA';
2952
2953 if ($token->{type} eq 'end tag' and
2954 $token->{tag_name} eq $tag_name) {
2955 ## Ignore the token
2956 } else {
2957 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2958 }
2959 !!!next-token;
2960 return;
2961 } elsif ({
2962 iframe => 1,
2963 noembed => 1,
2964 noframes => 1,
2965 noscript => 0, ## TODO: 1 if scripting is enabled
2966 }->{$token->{tag_name}}) {
2967 $parse_rcdata->('CDATA', $insert);
2968 return;
2969 } elsif ($token->{tag_name} eq 'select') {
2970 $reconstruct_active_formatting_elements->($insert_to_current);
2971
2972 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2973
2974 $self->{insertion_mode} = 'in select';
2975 !!!next-token;
2976 return;
2977 } elsif ({
2978 caption => 1, col => 1, colgroup => 1, frame => 1,
2979 frameset => 1, head => 1, option => 1, optgroup => 1,
2980 tbody => 1, td => 1, tfoot => 1, th => 1,
2981 thead => 1, tr => 1,
2982 }->{$token->{tag_name}}) {
2983 !!!parse-error (type => 'in body:'.$token->{tag_name});
2984 ## Ignore the token
2985 !!!next-token;
2986 return;
2987
2988 ## ISSUE: An issue on HTML5 new elements in the spec.
2989 } else {
2990 $reconstruct_active_formatting_elements->($insert_to_current);
2991
2992 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2993
2994 !!!next-token;
2995 return;
2996 }
2997 } elsif ($token->{type} eq 'end tag') {
2998 if ($token->{tag_name} eq 'body') {
2999 if (@{$self->{open_elements}} > 1 and
3000 $self->{open_elements}->[1]->[1] eq 'body') {
3001 for (@{$self->{open_elements}}) {
3002 unless ({
3003 dd => 1, dt => 1, li => 1, p => 1, td => 1,
3004 th => 1, tr => 1, body => 1, html => 1,
3005 }->{$_->[1]}) {
3006 !!!parse-error (type => 'not closed:'.$_->[1]);
3007 }
3008 }
3009
3010 $self->{insertion_mode} = 'after body';
3011 !!!next-token;
3012 return;
3013 } else {
3014 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3015 ## Ignore the token
3016 !!!next-token;
3017 return;
3018 }
3019 } elsif ($token->{tag_name} eq 'html') {
3020 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
3021 ## ISSUE: There is an issue in the spec.
3022 if ($self->{open_elements}->[-1]->[1] ne 'body') {
3023 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
3024 }
3025 $self->{insertion_mode} = 'after body';
3026 ## reprocess
3027 return;
3028 } else {
3029 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3030 ## Ignore the token
3031 !!!next-token;
3032 return;
3033 }
3034 } elsif ({
3035 address => 1, blockquote => 1, center => 1, dir => 1,
3036 div => 1, dl => 1, fieldset => 1, listing => 1,
3037 menu => 1, ol => 1, pre => 1, ul => 1,
3038 p => 1,
3039 dd => 1, dt => 1, li => 1,
3040 button => 1, marquee => 1, object => 1,
3041 }->{$token->{tag_name}}) {
3042 ## has an element in scope
3043 my $i;
3044 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3045 my $node = $self->{open_elements}->[$_];
3046 if ($node->[1] eq $token->{tag_name}) {
3047 ## generate implied end tags
3048 if ({
3049 dd => ($token->{tag_name} ne 'dd'),
3050 dt => ($token->{tag_name} ne 'dt'),
3051 li => ($token->{tag_name} ne 'li'),
3052 p => ($token->{tag_name} ne 'p'),
3053 td => 1, th => 1, tr => 1,
3054 }->{$self->{open_elements}->[-1]->[1]}) {
3055 !!!back-token;
3056 $token = {type => 'end tag',
3057 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3058 return;
3059 }
3060 $i = $_;
3061 last INSCOPE unless $token->{tag_name} eq 'p';
3062 } elsif ({
3063 table => 1, caption => 1, td => 1, th => 1,
3064 button => 1, marquee => 1, object => 1, html => 1,
3065 }->{$node->[1]}) {
3066 last INSCOPE;
3067 }
3068 } # INSCOPE
3069
3070 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
3071 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3072 }
3073
3074 splice @{$self->{open_elements}}, $i if defined $i;
3075 $clear_up_to_marker->()
3076 if {
3077 button => 1, marquee => 1, object => 1,
3078 }->{$token->{tag_name}};
3079 !!!next-token;
3080 return;
3081 } elsif ($token->{tag_name} eq 'form') {
3082 ## has an element in scope
3083 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3084 my $node = $self->{open_elements}->[$_];
3085 if ($node->[1] eq $token->{tag_name}) {
3086 ## generate implied end tags
3087 if ({
3088 dd => 1, dt => 1, li => 1, p => 1,
3089 td => 1, th => 1, tr => 1,
3090 }->{$self->{open_elements}->[-1]->[1]}) {
3091 !!!back-token;
3092 $token = {type => 'end tag',
3093 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3094 return;
3095 }
3096 last INSCOPE;
3097 } elsif ({
3098 table => 1, caption => 1, td => 1, th => 1,
3099 button => 1, marquee => 1, object => 1, html => 1,
3100 }->{$node->[1]}) {
3101 last INSCOPE;
3102 }
3103 } # INSCOPE
3104
3105 if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
3106 pop @{$self->{open_elements}};
3107 } else {
3108 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3109 }
3110
3111 undef $self->{form_element};
3112 !!!next-token;
3113 return;
3114 } elsif ({
3115 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3116 }->{$token->{tag_name}}) {
3117 ## has an element in scope
3118 my $i;
3119 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3120 my $node = $self->{open_elements}->[$_];
3121 if ({
3122 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3123 }->{$node->[1]}) {
3124 ## generate implied end tags
3125 if ({
3126 dd => 1, dt => 1, li => 1, p => 1,
3127 td => 1, th => 1, tr => 1,
3128 }->{$self->{open_elements}->[-1]->[1]}) {
3129 !!!back-token;
3130 $token = {type => 'end tag',
3131 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3132 return;
3133 }
3134 $i = $_;
3135 last INSCOPE;
3136 } elsif ({
3137 table => 1, caption => 1, td => 1, th => 1,
3138 button => 1, marquee => 1, object => 1, html => 1,
3139 }->{$node->[1]}) {
3140 last INSCOPE;
3141 }
3142 } # INSCOPE
3143
3144 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
3145 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3146 }
3147
3148 splice @{$self->{open_elements}}, $i if defined $i;
3149 !!!next-token;
3150 return;
3151 } elsif ({
3152 a => 1,
3153 b => 1, big => 1, em => 1, font => 1, i => 1,
3154 nobr => 1, s => 1, small => 1, strile => 1,
3155 strong => 1, tt => 1, u => 1,
3156 }->{$token->{tag_name}}) {
3157 $formatting_end_tag->($token->{tag_name});
3158 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
3159 return;
3160 } elsif ({
3161 caption => 1, col => 1, colgroup => 1, frame => 1,
3162 frameset => 1, head => 1, option => 1, optgroup => 1,
3163 tbody => 1, td => 1, tfoot => 1, th => 1,
3164 thead => 1, tr => 1,
3165 area => 1, basefont => 1, bgsound => 1, br => 1,
3166 embed => 1, hr => 1, iframe => 1, image => 1,
3167 img => 1, input => 1, isindex => 1, noembed => 1,
3168 noframes => 1, param => 1, select => 1, spacer => 1,
3169 table => 1, textarea => 1, wbr => 1,
3170 noscript => 0, ## TODO: if scripting is enabled
3171 }->{$token->{tag_name}}) {
3172 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3173 ## Ignore the token
3174 !!!next-token;
3175 return;
3176
3177 ## ISSUE: Issue on HTML5 new elements in spec
3178
3179 } else {
3180 ## Step 1
3181 my $node_i = -1;
3182 my $node = $self->{open_elements}->[$node_i];
3183
3184 ## Step 2
3185 S2: {
3186 if ($node->[1] eq $token->{tag_name}) {
3187 ## Step 1
3188 ## generate implied end tags
3189 if ({
3190 dd => 1, dt => 1, li => 1, p => 1,
3191 td => 1, th => 1, tr => 1,
3192 }->{$self->{open_elements}->[-1]->[1]}) {
3193 !!!back-token;
3194 $token = {type => 'end tag',
3195 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3196 return;
3197 }
3198
3199 ## Step 2
3200 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
3201 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3202 }
3203
3204 ## Step 3
3205 splice @{$self->{open_elements}}, $node_i;
3206
3207 !!!next-token;
3208 last S2;
3209 } else {
3210 ## Step 3
3211 if (not $formatting_category->{$node->[1]} and
3212 #not $phrasing_category->{$node->[1]} and
3213 ($special_category->{$node->[1]} or
3214 $scoping_category->{$node->[1]})) {
3215 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3216 ## Ignore the token
3217 !!!next-token;
3218 last S2;
3219 }
3220 }
3221
3222 ## Step 4
3223 $node_i--;
3224 $node = $self->{open_elements}->[$node_i];
3225
3226 ## Step 5;
3227 redo S2;
3228 } # S2
3229 return;
3230 }
3231 }
3232 }; # $in_body
3233
3234 B: {
3235 if ($phase eq 'main') {
3236 if ($token->{type} eq 'DOCTYPE') {
3237 !!!parse-error (type => 'in html:#DOCTYPE');
3238 ## Ignore the token
3239 ## Stay in the phase
3240 !!!next-token;
3241 redo B;
3242 } elsif ($token->{type} eq 'start tag' and
3243 $token->{tag_name} eq 'html') {
3244 ## TODO: unless it is the first start tag token, parse-error
3245 my $top_el = $self->{open_elements}->[0]->[0];
3246 for my $attr_name (keys %{$token->{attributes}}) {
3247 unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3248 $top_el->set_attribute_ns
3249 (undef, [undef, $attr_name],
3250 $token->{attributes}->{$attr_name}->{value});
3251 }
3252 }
3253 !!!next-token;
3254 redo B;
3255 } elsif ($token->{type} eq 'end-of-file') {
3256 ## Generate implied end tags
3257 if ({
3258 dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
3259 }->{$self->{open_elements}->[-1]->[1]}) {
3260 !!!back-token;
3261 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
3262 redo B;
3263 }
3264
3265 if (@{$self->{open_elements}} > 2 or
3266 (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
3267 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3268 } elsif (defined $self->{inner_html_node} and
3269 @{$self->{open_elements}} > 1 and
3270 $self->{open_elements}->[1]->[1] ne 'body') {
3271 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3272 }
3273
3274 ## Stop parsing
3275 last B;
3276
3277 ## ISSUE: There is an issue in the spec.
3278 } else {
3279 if ($self->{insertion_mode} eq 'before head') {
3280 if ($token->{type} eq 'character') {
3281 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3282 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3283 unless (length $token->{data}) {
3284 !!!next-token;
3285 redo B;
3286 }
3287 }
3288 ## As if <head>
3289 !!!create-element ($self->{head_element}, 'head');
3290 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3291 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3292 $self->{insertion_mode} = 'in head';
3293 ## reprocess
3294 redo B;
3295 } elsif ($token->{type} eq 'comment') {
3296 my $comment = $self->{document}->create_comment ($token->{data});
3297 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3298 !!!next-token;
3299 redo B;
3300 } elsif ($token->{type} eq 'start tag') {
3301 my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
3302 !!!create-element ($self->{head_element}, 'head', $attr);
3303 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3304 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3305 $self->{insertion_mode} = 'in head';
3306 if ($token->{tag_name} eq 'head') {
3307 !!!next-token;
3308 #} elsif ({
3309 # base => 1, link => 1, meta => 1,
3310 # script => 1, style => 1, title => 1,
3311 # }->{$token->{tag_name}}) {
3312 # ## reprocess
3313 } else {
3314 ## reprocess
3315 }
3316 redo B;
3317 } elsif ($token->{type} eq 'end tag') {
3318 if ({head => 1, body => 1, html => 1}->{$token->{tag_name}}) {
3319 ## As if <head>
3320 !!!create-element ($self->{head_element}, 'head');
3321 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3322 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3323 $self->{insertion_mode} = 'in head';
3324 ## reprocess
3325 redo B;
3326 } else {
3327 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3328 ## Ignore the token ## ISSUE: An issue in the spec.
3329 !!!next-token;
3330 redo B;
3331 }
3332 } else {
3333 die "$0: $token->{type}: Unknown type";
3334 }
3335 } elsif ($self->{insertion_mode} eq 'in head' or
3336 $self->{insertion_mode} eq 'in head noscript' or
3337 $self->{insertion_mode} eq 'after head') {
3338 if ($token->{type} eq 'character') {
3339 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3340 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3341 unless (length $token->{data}) {
3342 !!!next-token;
3343 redo B;
3344 }
3345 }
3346
3347 #
3348 } elsif ($token->{type} eq 'comment') {
3349 my $comment = $self->{document}->create_comment ($token->{data});
3350 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3351 !!!next-token;
3352 redo B;
3353 } elsif ($token->{type} eq 'start tag') {
3354 if ({base => ($self->{insertion_mode} eq 'in head' or
3355 $self->{insertion_mode} eq 'after head'),
3356 link => 1, meta => 1}->{$token->{tag_name}}) {
3357 ## NOTE: There is a "as if in head" code clone.
3358 if ($self->{insertion_mode} eq 'after head') {
3359 !!!parse-error (type => 'after head:'.$token->{tag_name});
3360 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3361 }
3362 !!!insert-element ($token->{tag_name}, $token->{attributes});
3363 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3364 ## TODO: Extracting |charset| from |meta|.
3365 pop @{$self->{open_elements}}
3366 if $self->{insertion_mode} eq 'after head';
3367 !!!next-token;
3368 redo B;
3369 } elsif ($token->{tag_name} eq 'title' and
3370 $self->{insertion_mode} eq 'in head') {
3371 ## NOTE: There is a "as if in head" code clone.
3372 if ($self->{insertion_mode} eq 'after head') {
3373 !!!parse-error (type => 'after head:'.$token->{tag_name});
3374 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3375 }
3376 $parse_rcdata->('RCDATA', $insert_to_current);
3377 pop @{$self->{open_elements}}
3378 if $self->{insertion_mode} eq 'after head';
3379 redo B;
3380 } elsif ($token->{tag_name} eq 'style') {
3381 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
3382 ## insertion mode 'in head')
3383 ## NOTE: There is a "as if in head" code clone.
3384 if ($self->{insertion_mode} eq 'after head') {
3385 !!!parse-error (type => 'after head:'.$token->{tag_name});
3386 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3387 }
3388 $parse_rcdata->('CDATA', $insert_to_current);
3389 pop @{$self->{open_elements}}
3390 if $self->{insertion_mode} eq 'after head';
3391 redo B;
3392 } elsif ($token->{tag_name} eq 'noscript') {
3393 if ($self->{insertion_mode} eq 'in head') {
3394 ## NOTE: and scripting is disalbed
3395 !!!insert-element ($token->{tag_name}, $token->{attributes});
3396 $self->{insertion_mode} = 'in head noscript';
3397 !!!next-token;
3398 redo B;
3399 } elsif ($self->{insertion_mode} eq 'in head noscript') {
3400 !!!parse-error (type => 'noscript in noscript');
3401 ## Ignore the token
3402 redo B;
3403 } else {
3404 #
3405 }
3406 } elsif ($token->{tag_name} eq 'head' and
3407 $self->{insertion_mode} ne 'after head') {
3408 !!!parse-error (type => 'in head:head'); # or in head noscript
3409 ## Ignore the token
3410 !!!next-token;
3411 redo B;
3412 } elsif ($self->{insertion_mode} ne 'in head noscript' and
3413 $token->{tag_name} eq 'script') {
3414 if ($self->{insertion_mode} eq 'after head') {
3415 !!!parse-error (type => 'after head:'.$token->{tag_name});
3416 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3417 }
3418 ## NOTE: There is a "as if in head" code clone.
3419 $script_start_tag->($insert_to_current);
3420 pop @{$self->{open_elements}}
3421 if $self->{insertion_mode} eq 'after head';
3422 redo B;
3423 } elsif ($self->{insertion_mode} eq 'after head' and
3424 $token->{tag_name} eq 'body') {
3425 !!!insert-element ('body', $token->{attributes});
3426 $self->{insertion_mode} = 'in body';
3427 !!!next-token;
3428 redo B;
3429 } elsif ($self->{insertion_mode} eq 'after head' and
3430 $token->{tag_name} eq 'frameset') {
3431 !!!insert-element ('frameset', $token->{attributes});
3432 $self->{insertion_mode} = 'in frameset';
3433 !!!next-token;
3434 redo B;
3435 } else {
3436 #
3437 }
3438 } elsif ($token->{type} eq 'end tag') {
3439 if ($self->{insertion_mode} eq 'in head' and
3440 $token->{tag_name} eq 'head') {
3441 pop @{$self->{open_elements}};
3442 $self->{insertion_mode} = 'after head';
3443 !!!next-token;
3444 redo B;
3445 } elsif ($self->{insertion_mode} eq 'in head noscript' and
3446 $token->{tag_name} eq 'noscript') {
3447 pop @{$self->{open_elements}};
3448 $self->{insertion_mode} = 'in head';
3449 !!!next-token;
3450 redo B;
3451 } elsif ($self->{insertion_mode} eq 'in head' and
3452 ($token->{tag_name} eq 'body' or
3453 $token->{tag_name} eq 'html')) {
3454 #
3455 } elsif ($self->{insertion_mode} ne 'after head') {
3456 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3457 ## Ignore the token
3458 !!!next-token;
3459 redo B;
3460 } else {
3461 #
3462 }
3463 } else {
3464 #
3465 }
3466
3467 ## As if </head> or </noscript> or <body>
3468 if ($self->{insertion_mode} eq 'in head') {
3469 pop @{$self->{open_elements}};
3470 $self->{insertion_mode} = 'after head';
3471 } elsif ($self->{insertion_mode} eq 'in head noscript') {
3472 pop @{$self->{open_elements}};
3473 !!!parse-error (type => 'in noscript:'.(defined $token->{tag_name} ? ($token->{type} eq 'end tag' ? '/' : '') . $token->{tag_name} : '#' . $token->{type}));
3474 $self->{insertion_mode} = 'in head';
3475 } else { # 'after head'
3476 !!!insert-element ('body');
3477 $self->{insertion_mode} = 'in body';
3478 }
3479 ## reprocess
3480 redo B;
3481
3482 ## ISSUE: An issue in the spec.
3483 } elsif ($self->{insertion_mode} eq 'in body') {
3484 if ($token->{type} eq 'character') {
3485 ## NOTE: There is a code clone of "character in body".
3486 $reconstruct_active_formatting_elements->($insert_to_current);
3487
3488 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3489
3490 !!!next-token;
3491 redo B;
3492 } elsif ($token->{type} eq 'comment') {
3493 ## NOTE: There is a code clone of "comment in body".
3494 my $comment = $self->{document}->create_comment ($token->{data});
3495 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3496 !!!next-token;
3497 redo B;
3498 } else {
3499 $in_body->($insert_to_current);
3500 redo B;
3501 }
3502 } elsif ($self->{insertion_mode} eq 'in table') {
3503 if ($token->{type} eq 'character') {
3504 ## NOTE: There are "character in table" code clones.
3505 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3506 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3507
3508 unless (length $token->{data}) {
3509 !!!next-token;
3510 redo B;
3511 }
3512 }
3513
3514 !!!parse-error (type => 'in table:#character');
3515
3516 ## As if in body, but insert into foster parent element
3517 ## ISSUE: Spec says that "whenever a node would be inserted
3518 ## into the current node" while characters might not be
3519 ## result in a new Text node.
3520 $reconstruct_active_formatting_elements->($insert_to_foster);
3521
3522 if ({
3523 table => 1, tbody => 1, tfoot => 1,
3524 thead => 1, tr => 1,
3525 }->{$self->{open_elements}->[-1]->[1]}) {
3526 # MUST
3527 my $foster_parent_element;
3528 my $next_sibling;
3529 my $prev_sibling;
3530 OE: for (reverse 0..$#{$self->{open_elements}}) {
3531 if ($self->{open_elements}->[$_]->[1] eq 'table') {
3532 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3533 if (defined $parent and $parent->node_type == 1) {
3534 $foster_parent_element = $parent;
3535 $next_sibling = $self->{open_elements}->[$_]->[0];
3536 $prev_sibling = $next_sibling->previous_sibling;
3537 } else {
3538 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3539 $prev_sibling = $foster_parent_element->last_child;
3540 }
3541 last OE;
3542 }
3543 } # OE
3544 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3545 $prev_sibling = $foster_parent_element->last_child
3546 unless defined $foster_parent_element;
3547 if (defined $prev_sibling and
3548 $prev_sibling->node_type == 3) {
3549 $prev_sibling->manakai_append_text ($token->{data});
3550 } else {
3551 $foster_parent_element->insert_before
3552 ($self->{document}->create_text_node ($token->{data}),
3553 $next_sibling);
3554 }
3555 } else {
3556 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3557 }
3558
3559 !!!next-token;
3560 redo B;
3561 } elsif ($token->{type} eq 'comment') {
3562 my $comment = $self->{document}->create_comment ($token->{data});
3563 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3564 !!!next-token;
3565 redo B;
3566 } elsif ($token->{type} eq 'start tag') {
3567 if ({
3568 caption => 1,
3569 colgroup => 1,
3570 tbody => 1, tfoot => 1, thead => 1,
3571 }->{$token->{tag_name}}) {
3572 ## Clear back to table context
3573 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3574 $self->{open_elements}->[-1]->[1] ne 'html') {
3575 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3576 pop @{$self->{open_elements}};
3577 }
3578
3579 push @$active_formatting_elements, ['#marker', '']
3580 if $token->{tag_name} eq 'caption';
3581
3582 !!!insert-element ($token->{tag_name}, $token->{attributes});
3583 $self->{insertion_mode} = {
3584 caption => 'in caption',
3585 colgroup => 'in column group',
3586 tbody => 'in table body',
3587 tfoot => 'in table body',
3588 thead => 'in table body',
3589 }->{$token->{tag_name}};
3590 !!!next-token;
3591 redo B;
3592 } elsif ({
3593 col => 1,
3594 td => 1, th => 1, tr => 1,
3595 }->{$token->{tag_name}}) {
3596 ## Clear back to table context
3597 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3598 $self->{open_elements}->[-1]->[1] ne 'html') {
3599 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3600 pop @{$self->{open_elements}};
3601 }
3602
3603 !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3604 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3605 ? 'in column group' : 'in table body';
3606 ## reprocess
3607 redo B;
3608 } elsif ($token->{tag_name} eq 'table') {
3609 ## NOTE: There are code clones for this "table in table"
3610 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3611
3612 ## As if </table>
3613 ## have a table element in table scope
3614 my $i;
3615 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3616 my $node = $self->{open_elements}->[$_];
3617 if ($node->[1] eq 'table') {
3618 $i = $_;
3619 last INSCOPE;
3620 } elsif ({
3621 table => 1, html => 1,
3622 }->{$node->[1]}) {
3623 last INSCOPE;
3624 }
3625 } # INSCOPE
3626 unless (defined $i) {
3627 !!!parse-error (type => 'unmatched end tag:table');
3628 ## Ignore tokens </table><table>
3629 !!!next-token;
3630 redo B;
3631 }
3632
3633 ## generate implied end tags
3634 if ({
3635 dd => 1, dt => 1, li => 1, p => 1,
3636 td => 1, th => 1, tr => 1,
3637 }->{$self->{open_elements}->[-1]->[1]}) {
3638 !!!back-token; # <table>
3639 $token = {type => 'end tag', tag_name => 'table'};
3640 !!!back-token;
3641 $token = {type => 'end tag',
3642 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3643 redo B;
3644 }
3645
3646 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3647 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3648 }
3649
3650 splice @{$self->{open_elements}}, $i;
3651
3652 $self->_reset_insertion_mode;
3653
3654 ## reprocess
3655 redo B;
3656 } else {
3657 #
3658 }
3659 } elsif ($token->{type} eq 'end tag') {
3660 if ($token->{tag_name} eq 'table') {
3661 ## have a table element in table scope
3662 my $i;
3663 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3664 my $node = $self->{open_elements}->[$_];
3665 if ($node->[1] eq $token->{tag_name}) {
3666 $i = $_;
3667 last INSCOPE;
3668 } elsif ({
3669 table => 1, html => 1,
3670 }->{$node->[1]}) {
3671 last INSCOPE;
3672 }
3673 } # INSCOPE
3674 unless (defined $i) {
3675 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3676 ## Ignore the token
3677 !!!next-token;
3678 redo B;
3679 }
3680
3681 ## generate implied end tags
3682 if ({
3683 dd => 1, dt => 1, li => 1, p => 1,
3684 td => 1, th => 1, tr => 1,
3685 }->{$self->{open_elements}->[-1]->[1]}) {
3686 !!!back-token;
3687 $token = {type => 'end tag',
3688 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3689 redo B;
3690 }
3691
3692 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3693 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3694 }
3695
3696 splice @{$self->{open_elements}}, $i;
3697
3698 $self->_reset_insertion_mode;
3699
3700 !!!next-token;
3701 redo B;
3702 } elsif ({
3703 body => 1, caption => 1, col => 1, colgroup => 1,
3704 html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3705 thead => 1, tr => 1,
3706 }->{$token->{tag_name}}) {
3707 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3708 ## Ignore the token
3709 !!!next-token;
3710 redo B;
3711 } else {
3712 #
3713 }
3714 } else {
3715 #
3716 }
3717
3718 !!!parse-error (type => 'in table:'.$token->{tag_name});
3719 $in_body->($insert_to_foster);
3720 redo B;
3721 } elsif ($self->{insertion_mode} eq 'in caption') {
3722 if ($token->{type} eq 'character') {
3723 ## NOTE: This is a code clone of "character in body".
3724 $reconstruct_active_formatting_elements->($insert_to_current);
3725
3726 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3727
3728 !!!next-token;
3729 redo B;
3730 } elsif ($token->{type} eq 'comment') {
3731 ## NOTE: This is a code clone of "comment in body".
3732 my $comment = $self->{document}->create_comment ($token->{data});
3733 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3734 !!!next-token;
3735 redo B;
3736 } elsif ($token->{type} eq 'start tag') {
3737 if ({
3738 caption => 1, col => 1, colgroup => 1, tbody => 1,
3739 td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3740 }->{$token->{tag_name}}) {
3741 !!!parse-error (type => 'not closed:caption');
3742
3743 ## As if </caption>
3744 ## have a table element in table scope
3745 my $i;
3746 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3747 my $node = $self->{open_elements}->[$_];
3748 if ($node->[1] eq 'caption') {
3749 $i = $_;
3750 last INSCOPE;
3751 } elsif ({
3752 table => 1, html => 1,
3753 }->{$node->[1]}) {
3754 last INSCOPE;
3755 }
3756 } # INSCOPE
3757 unless (defined $i) {
3758 !!!parse-error (type => 'unmatched end tag:caption');
3759 ## Ignore the token
3760 !!!next-token;
3761 redo B;
3762 }
3763
3764 ## generate implied end tags
3765 if ({
3766 dd => 1, dt => 1, li => 1, p => 1,
3767 td => 1, th => 1, tr => 1,
3768 }->{$self->{open_elements}->[-1]->[1]}) {
3769 !!!back-token; # <?>
3770 $token = {type => 'end tag', tag_name => 'caption'};
3771 !!!back-token;
3772 $token = {type => 'end tag',
3773 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3774 redo B;
3775 }
3776
3777 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3778 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3779 }
3780
3781 splice @{$self->{open_elements}}, $i;
3782
3783 $clear_up_to_marker->();
3784
3785 $self->{insertion_mode} = 'in table';
3786
3787 ## reprocess
3788 redo B;
3789 } else {
3790 #
3791 }
3792 } elsif ($token->{type} eq 'end tag') {
3793 if ($token->{tag_name} eq 'caption') {
3794 ## have a table element in table scope
3795 my $i;
3796 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3797 my $node = $self->{open_elements}->[$_];
3798 if ($node->[1] eq $token->{tag_name}) {
3799 $i = $_;
3800 last INSCOPE;
3801 } elsif ({
3802 table => 1, html => 1,
3803 }->{$node->[1]}) {
3804 last INSCOPE;
3805 }
3806 } # INSCOPE
3807 unless (defined $i) {
3808 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3809 ## Ignore the token
3810 !!!next-token;
3811 redo B;
3812 }
3813
3814 ## generate implied end tags
3815 if ({
3816 dd => 1, dt => 1, li => 1, p => 1,
3817 td => 1, th => 1, tr => 1,
3818 }->{$self->{open_elements}->[-1]->[1]}) {
3819 !!!back-token;
3820 $token = {type => 'end tag',
3821 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3822 redo B;
3823 }
3824
3825 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3826 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3827 }
3828
3829 splice @{$self->{open_elements}}, $i;
3830
3831 $clear_up_to_marker->();
3832
3833 $self->{insertion_mode} = 'in table';
3834
3835 !!!next-token;
3836 redo B;
3837 } elsif ($token->{tag_name} eq 'table') {
3838 !!!parse-error (type => 'not closed:caption');
3839
3840 ## As if </caption>
3841 ## have a table element in table scope
3842 my $i;
3843 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3844 my $node = $self->{open_elements}->[$_];
3845 if ($node->[1] eq 'caption') {
3846 $i = $_;
3847 last INSCOPE;
3848 } elsif ({
3849 table => 1, html => 1,
3850 }->{$node->[1]}) {
3851 last INSCOPE;
3852 }
3853 } # INSCOPE
3854 unless (defined $i) {
3855 !!!parse-error (type => 'unmatched end tag:caption');
3856 ## Ignore the token
3857 !!!next-token;
3858 redo B;
3859 }
3860
3861 ## generate implied end tags
3862 if ({
3863 dd => 1, dt => 1, li => 1, p => 1,
3864 td => 1, th => 1, tr => 1,
3865 }->{$self->{open_elements}->[-1]->[1]}) {
3866 !!!back-token; # </table>
3867 $token = {type => 'end tag', tag_name => 'caption'};
3868 !!!back-token;
3869 $token = {type => 'end tag',
3870 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3871 redo B;
3872 }
3873
3874 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3875 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3876 }
3877
3878 splice @{$self->{open_elements}}, $i;
3879
3880 $clear_up_to_marker->();
3881
3882 $self->{insertion_mode} = 'in table';
3883
3884 ## reprocess
3885 redo B;
3886 } elsif ({
3887 body => 1, col => 1, colgroup => 1,
3888 html => 1, tbody => 1, td => 1, tfoot => 1,
3889 th => 1, thead => 1, tr => 1,
3890 }->{$token->{tag_name}}) {
3891 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3892 ## Ignore the token
3893 redo B;
3894 } else {
3895 #
3896 }
3897 } else {
3898 #
3899 }
3900
3901 $in_body->($insert_to_current);
3902 redo B;
3903 } elsif ($self->{insertion_mode} eq 'in column group') {
3904 if ($token->{type} eq 'character') {
3905 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3906 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3907 unless (length $token->{data}) {
3908 !!!next-token;
3909 redo B;
3910 }
3911 }
3912
3913 #
3914 } elsif ($token->{type} eq 'comment') {
3915 my $comment = $self->{document}->create_comment ($token->{data});
3916 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3917 !!!next-token;
3918 redo B;
3919 } elsif ($token->{type} eq 'start tag') {
3920 if ($token->{tag_name} eq 'col') {
3921 !!!insert-element ($token->{tag_name}, $token->{attributes});
3922 pop @{$self->{open_elements}};
3923 !!!next-token;
3924 redo B;
3925 } else {
3926 #
3927 }
3928 } elsif ($token->{type} eq 'end tag') {
3929 if ($token->{tag_name} eq 'colgroup') {
3930 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3931 !!!parse-error (type => 'unmatched end tag:colgroup');
3932 ## Ignore the token
3933 !!!next-token;
3934 redo B;
3935 } else {
3936 pop @{$self->{open_elements}}; # colgroup
3937 $self->{insertion_mode} = 'in table';
3938 !!!next-token;
3939 redo B;
3940 }
3941 } elsif ($token->{tag_name} eq 'col') {
3942 !!!parse-error (type => 'unmatched end tag:col');
3943 ## Ignore the token
3944 !!!next-token;
3945 redo B;
3946 } else {
3947 #
3948 }
3949 } else {
3950 #
3951 }
3952
3953 ## As if </colgroup>
3954 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3955 !!!parse-error (type => 'unmatched end tag:colgroup');
3956 ## Ignore the token
3957 !!!next-token;
3958 redo B;
3959 } else {
3960 pop @{$self->{open_elements}}; # colgroup
3961 $self->{insertion_mode} = 'in table';
3962 ## reprocess
3963 redo B;
3964 }
3965 } elsif ($self->{insertion_mode} eq 'in table body') {
3966 if ($token->{type} eq 'character') {
3967 ## NOTE: This is a "character in table" code clone.
3968 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3969 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3970
3971 unless (length $token->{data}) {
3972 !!!next-token;
3973 redo B;
3974 }
3975 }
3976
3977 !!!parse-error (type => 'in table:#character');
3978
3979 ## As if in body, but insert into foster parent element
3980 ## ISSUE: Spec says that "whenever a node would be inserted
3981 ## into the current node" while characters might not be
3982 ## result in a new Text node.
3983 $reconstruct_active_formatting_elements->($insert_to_foster);
3984
3985 if ({
3986 table => 1, tbody => 1, tfoot => 1,
3987 thead => 1, tr => 1,
3988 }->{$self->{open_elements}->[-1]->[1]}) {
3989 # MUST
3990 my $foster_parent_element;
3991 my $next_sibling;
3992 my $prev_sibling;
3993 OE: for (reverse 0..$#{$self->{open_elements}}) {
3994 if ($self->{open_elements}->[$_]->[1] eq 'table') {
3995 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3996 if (defined $parent and $parent->node_type == 1) {
3997 $foster_parent_element = $parent;
3998 $next_sibling = $self->{open_elements}->[$_]->[0];
3999 $prev_sibling = $next_sibling->previous_sibling;
4000 } else {
4001 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4002 $prev_sibling = $foster_parent_element->last_child;
4003 }
4004 last OE;
4005 }
4006 } # OE
4007 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4008 $prev_sibling = $foster_parent_element->last_child
4009 unless defined $foster_parent_element;
4010 if (defined $prev_sibling and
4011 $prev_sibling->node_type == 3) {
4012 $prev_sibling->manakai_append_text ($token->{data});
4013 } else {
4014 $foster_parent_element->insert_before
4015 ($self->{document}->create_text_node ($token->{data}),
4016 $next_sibling);
4017 }
4018 } else {
4019 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4020 }
4021
4022 !!!next-token;
4023 redo B;
4024 } elsif ($token->{type} eq 'comment') {
4025 ## Copied from 'in table'
4026 my $comment = $self->{document}->create_comment ($token->{data});
4027 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4028 !!!next-token;
4029 redo B;
4030 } elsif ($token->{type} eq 'start tag') {
4031 if ({
4032 tr => 1,
4033 th => 1, td => 1,
4034 }->{$token->{tag_name}}) {
4035 unless ($token->{tag_name} eq 'tr') {
4036 !!!parse-error (type => 'missing start tag:tr');
4037 }
4038
4039 ## Clear back to table body context
4040 while (not {
4041 tbody => 1, tfoot => 1, thead => 1, html => 1,
4042 }->{$self->{open_elements}->[-1]->[1]}) {
4043 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4044 pop @{$self->{open_elements}};
4045 }
4046
4047 $self->{insertion_mode} = 'in row';
4048 if ($token->{tag_name} eq 'tr') {
4049 !!!insert-element ($token->{tag_name}, $token->{attributes});
4050 !!!next-token;
4051 } else {
4052 !!!insert-element ('tr');
4053 ## reprocess
4054 }
4055 redo B;
4056 } elsif ({
4057 caption => 1, col => 1, colgroup => 1,
4058 tbody => 1, tfoot => 1, thead => 1,
4059 }->{$token->{tag_name}}) {
4060 ## have an element in table scope
4061 my $i;
4062 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4063 my $node = $self->{open_elements}->[$_];
4064 if ({
4065 tbody => 1, thead => 1, tfoot => 1,
4066 }->{$node->[1]}) {
4067 $i = $_;
4068 last INSCOPE;
4069 } elsif ({
4070 table => 1, html => 1,
4071 }->{$node->[1]}) {
4072 last INSCOPE;
4073 }
4074 } # INSCOPE
4075 unless (defined $i) {
4076 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4077 ## Ignore the token
4078 !!!next-token;
4079 redo B;
4080 }
4081
4082 ## Clear back to table body context
4083 while (not {
4084 tbody => 1, tfoot => 1, thead => 1, html => 1,
4085 }->{$self->{open_elements}->[-1]->[1]}) {
4086 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4087 pop @{$self->{open_elements}};
4088 }
4089
4090 ## As if <{current node}>
4091 ## have an element in table scope
4092 ## true by definition
4093
4094 ## Clear back to table body context
4095 ## nop by definition
4096
4097 pop @{$self->{open_elements}};
4098 $self->{insertion_mode} = 'in table';
4099 ## reprocess
4100 redo B;
4101 } elsif ($token->{tag_name} eq 'table') {
4102 ## NOTE: This is a code clone of "table in table"
4103 !!!parse-error (type => 'not closed:table');
4104
4105 ## As if </table>
4106 ## have a table element in table scope
4107 my $i;
4108 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4109 my $node = $self->{open_elements}->[$_];
4110 if ($node->[1] eq 'table') {
4111 $i = $_;
4112 last INSCOPE;
4113 } elsif ({
4114 table => 1, html => 1,
4115 }->{$node->[1]}) {
4116 last INSCOPE;
4117 }
4118 } # INSCOPE
4119 unless (defined $i) {
4120 !!!parse-error (type => 'unmatched end tag:table');
4121 ## Ignore tokens </table><table>
4122 !!!next-token;
4123 redo B;
4124 }
4125
4126 ## generate implied end tags
4127 if ({
4128 dd => 1, dt => 1, li => 1, p => 1,
4129 td => 1, th => 1, tr => 1,
4130 }->{$self->{open_elements}->[-1]->[1]}) {
4131 !!!back-token; # <table>
4132 $token = {type => 'end tag', tag_name => 'table'};
4133 !!!back-token;
4134 $token = {type => 'end tag',
4135 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4136 redo B;
4137 }
4138
4139 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4140 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4141 }
4142
4143 splice @{$self->{open_elements}}, $i;
4144
4145 $self->_reset_insertion_mode;
4146
4147 ## reprocess
4148 redo B;
4149 } else {
4150 #
4151 }
4152 } elsif ($token->{type} eq 'end tag') {
4153 if ({
4154 tbody => 1, tfoot => 1, thead => 1,
4155 }->{$token->{tag_name}}) {
4156 ## have an element in table scope
4157 my $i;
4158 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4159 my $node = $self->{open_elements}->[$_];
4160 if ($node->[1] eq $token->{tag_name}) {
4161 $i = $_;
4162 last INSCOPE;
4163 } elsif ({
4164 table => 1, html => 1,
4165 }->{$node->[1]}) {
4166 last INSCOPE;
4167 }
4168 } # INSCOPE
4169 unless (defined $i) {
4170 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4171 ## Ignore the token
4172 !!!next-token;
4173 redo B;
4174 }
4175
4176 ## Clear back to table body context
4177 while (not {
4178 tbody => 1, tfoot => 1, thead => 1, html => 1,
4179 }->{$self->{open_elements}->[-1]->[1]}) {
4180 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4181 pop @{$self->{open_elements}};
4182 }
4183
4184 pop @{$self->{open_elements}};
4185 $self->{insertion_mode} = 'in table';
4186 !!!next-token;
4187 redo B;
4188 } elsif ($token->{tag_name} eq 'table') {
4189 ## have an element in table scope
4190 my $i;
4191 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4192 my $node = $self->{open_elements}->[$_];
4193 if ({
4194 tbody => 1, thead => 1, tfoot => 1,
4195 }->{$node->[1]}) {
4196 $i = $_;
4197 last INSCOPE;
4198 } elsif ({
4199 table => 1, html => 1,
4200 }->{$node->[1]}) {
4201 last INSCOPE;
4202 }
4203 } # INSCOPE
4204 unless (defined $i) {
4205 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4206 ## Ignore the token
4207 !!!next-token;
4208 redo B;
4209 }
4210
4211 ## Clear back to table body context
4212 while (not {
4213 tbody => 1, tfoot => 1, thead => 1, html => 1,
4214 }->{$self->{open_elements}->[-1]->[1]}) {
4215 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4216 pop @{$self->{open_elements}};
4217 }
4218
4219 ## As if <{current node}>
4220 ## have an element in table scope
4221 ## true by definition
4222
4223 ## Clear back to table body context
4224 ## nop by definition
4225
4226 pop @{$self->{open_elements}};
4227 $self->{insertion_mode} = 'in table';
4228 ## reprocess
4229 redo B;
4230 } elsif ({
4231 body => 1, caption => 1, col => 1, colgroup => 1,
4232 html => 1, td => 1, th => 1, tr => 1,
4233 }->{$token->{tag_name}}) {
4234 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4235 ## Ignore the token
4236 !!!next-token;
4237 redo B;
4238 } else {
4239 #
4240 }
4241 } else {
4242 #
4243 }
4244
4245 ## As if in table
4246 !!!parse-error (type => 'in table:'.$token->{tag_name});
4247 $in_body->($insert_to_foster);
4248 redo B;
4249 } elsif ($self->{insertion_mode} eq 'in row') {
4250 if ($token->{type} eq 'character') {
4251 ## NOTE: This is a "character in table" code clone.
4252 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4253 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4254
4255 unless (length $token->{data}) {
4256 !!!next-token;
4257 redo B;
4258 }
4259 }
4260
4261 !!!parse-error (type => 'in table:#character');
4262
4263 ## As if in body, but insert into foster parent element
4264 ## ISSUE: Spec says that "whenever a node would be inserted
4265 ## into the current node" while characters might not be
4266 ## result in a new Text node.
4267 $reconstruct_active_formatting_elements->($insert_to_foster);
4268
4269 if ({
4270 table => 1, tbody => 1, tfoot => 1,
4271 thead => 1, tr => 1,
4272 }->{$self->{open_elements}->[-1]->[1]}) {
4273 # MUST
4274 my $foster_parent_element;
4275 my $next_sibling;
4276 my $prev_sibling;
4277 OE: for (reverse 0..$#{$self->{open_elements}}) {
4278 if ($self->{open_elements}->[$_]->[1] eq 'table') {
4279 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4280 if (defined $parent and $parent->node_type == 1) {
4281 $foster_parent_element = $parent;
4282 $next_sibling = $self->{open_elements}->[$_]->[0];
4283 $prev_sibling = $next_sibling->previous_sibling;
4284 } else {
4285 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4286 $prev_sibling = $foster_parent_element->last_child;
4287 }
4288 last OE;
4289 }
4290 } # OE
4291 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4292 $prev_sibling = $foster_parent_element->last_child
4293 unless defined $foster_parent_element;
4294 if (defined $prev_sibling and
4295 $prev_sibling->node_type == 3) {
4296 $prev_sibling->manakai_append_text ($token->{data});
4297 } else {
4298 $foster_parent_element->insert_before
4299 ($self->{document}->create_text_node ($token->{data}),
4300 $next_sibling);
4301 }
4302 } else {
4303 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4304 }
4305
4306 !!!next-token;
4307 redo B;
4308 } elsif ($token->{type} eq 'comment') {
4309 ## Copied from 'in table'
4310 my $comment = $self->{document}->create_comment ($token->{data});
4311 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4312 !!!next-token;
4313 redo B;
4314 } elsif ($token->{type} eq 'start tag') {
4315 if ($token->{tag_name} eq 'th' or
4316 $token->{tag_name} eq 'td') {
4317 ## Clear back to table row context
4318 while (not {
4319 tr => 1, html => 1,
4320 }->{$self->{open_elements}->[-1]->[1]}) {
4321 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4322 pop @{$self->{open_elements}};
4323 }
4324
4325 !!!insert-element ($token->{tag_name}, $token->{attributes});
4326 $self->{insertion_mode} = 'in cell';
4327
4328 push @$active_formatting_elements, ['#marker', ''];
4329
4330 !!!next-token;
4331 redo B;
4332 } elsif ({
4333 caption => 1, col => 1, colgroup => 1,
4334 tbody => 1, tfoot => 1, thead => 1, tr => 1,
4335 }->{$token->{tag_name}}) {
4336 ## As if </tr>
4337 ## have an element in table scope
4338 my $i;
4339 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4340 my $node = $self->{open_elements}->[$_];
4341 if ($node->[1] eq 'tr') {
4342 $i = $_;
4343 last INSCOPE;
4344 } elsif ({
4345 table => 1, html => 1,
4346 }->{$node->[1]}) {
4347 last INSCOPE;
4348 }
4349 } # INSCOPE
4350 unless (defined $i) {
4351 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4352 ## Ignore the token
4353 !!!next-token;
4354 redo B;
4355 }
4356
4357 ## Clear back to table row context
4358 while (not {
4359 tr => 1, html => 1,
4360 }->{$self->{open_elements}->[-1]->[1]}) {
4361 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4362 pop @{$self->{open_elements}};
4363 }
4364
4365 pop @{$self->{open_elements}}; # tr
4366 $self->{insertion_mode} = 'in table body';
4367 ## reprocess
4368 redo B;
4369 } elsif ($token->{tag_name} eq 'table') {
4370 ## NOTE: This is a code clone of "table in table"
4371 !!!parse-error (type => 'not closed:table');
4372
4373 ## As if </table>
4374 ## have a table element in table scope
4375 my $i;
4376 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4377 my $node = $self->{open_elements}->[$_];
4378 if ($node->[1] eq 'table') {
4379 $i = $_;
4380 last INSCOPE;
4381 } elsif ({
4382 table => 1, html => 1,
4383 }->{$node->[1]}) {
4384 last INSCOPE;
4385 }
4386 } # INSCOPE
4387 unless (defined $i) {
4388 !!!parse-error (type => 'unmatched end tag:table');
4389 ## Ignore tokens </table><table>
4390 !!!next-token;
4391 redo B;
4392 }
4393
4394 ## generate implied end tags
4395 if ({
4396 dd => 1, dt => 1, li => 1, p => 1,
4397 td => 1, th => 1, tr => 1,
4398 }->{$self->{open_elements}->[-1]->[1]}) {
4399 !!!back-token; # <table>
4400 $token = {type => 'end tag', tag_name => 'table'};
4401 !!!back-token;
4402 $token = {type => 'end tag',
4403 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4404 redo B;
4405 }
4406
4407 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4408 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4409 }
4410
4411 splice @{$self->{open_elements}}, $i;
4412
4413 $self->_reset_insertion_mode;
4414
4415 ## reprocess
4416 redo B;
4417 } else {
4418 #
4419 }
4420 } elsif ($token->{type} eq 'end tag') {
4421 if ($token->{tag_name} eq 'tr') {
4422 ## have an element in table scope
4423 my $i;
4424 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4425 my $node = $self->{open_elements}->[$_];
4426 if ($node->[1] eq $token->{tag_name}) {
4427 $i = $_;
4428 last INSCOPE;
4429 } elsif ({
4430 table => 1, html => 1,
4431 }->{$node->[1]}) {
4432 last INSCOPE;
4433 }
4434 } # INSCOPE
4435 unless (defined $i) {
4436 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4437 ## Ignore the token
4438 !!!next-token;
4439 redo B;
4440 }
4441
4442 ## Clear back to table row context
4443 while (not {
4444 tr => 1, html => 1,
4445 }->{$self->{open_elements}->[-1]->[1]}) {
4446 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4447 pop @{$self->{open_elements}};
4448 }
4449
4450 pop @{$self->{open_elements}}; # tr
4451 $self->{insertion_mode} = 'in table body';
4452 !!!next-token;
4453 redo B;
4454 } elsif ($token->{tag_name} eq 'table') {
4455 ## As if </tr>
4456 ## have an element in table scope
4457 my $i;
4458 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4459 my $node = $self->{open_elements}->[$_];
4460 if ($node->[1] eq 'tr') {
4461 $i = $_;
4462 last INSCOPE;
4463 } elsif ({
4464 table => 1, html => 1,
4465 }->{$node->[1]}) {
4466 last INSCOPE;
4467 }
4468 } # INSCOPE
4469 unless (defined $i) {
4470 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4471 ## Ignore the token
4472 !!!next-token;
4473 redo B;
4474 }
4475
4476 ## Clear back to table row context
4477 while (not {
4478 tr => 1, html => 1,
4479 }->{$self->{open_elements}->[-1]->[1]}) {
4480 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4481 pop @{$self->{open_elements}};
4482 }
4483
4484 pop @{$self->{open_elements}}; # tr
4485 $self->{insertion_mode} = 'in table body';
4486 ## reprocess
4487 redo B;
4488 } elsif ({
4489 tbody => 1, tfoot => 1, thead => 1,
4490 }->{$token->{tag_name}}) {
4491 ## have an element in table scope
4492 my $i;
4493 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4494 my $node = $self->{open_elements}->[$_];
4495 if ($node->[1] eq $token->{tag_name}) {
4496 $i = $_;
4497 last INSCOPE;
4498 } elsif ({
4499 table => 1, html => 1,
4500 }->{$node->[1]}) {
4501 last INSCOPE;
4502 }
4503 } # INSCOPE
4504 unless (defined $i) {
4505 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4506 ## Ignore the token
4507 !!!next-token;
4508 redo B;
4509 }
4510
4511 ## As if </tr>
4512 ## have an element in table scope
4513 my $i;
4514 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4515 my $node = $self->{open_elements}->[$_];
4516 if ($node->[1] eq 'tr') {
4517 $i = $_;
4518 last INSCOPE;
4519 } elsif ({
4520 table => 1, html => 1,
4521 }->{$node->[1]}) {
4522 last INSCOPE;
4523 }
4524 } # INSCOPE
4525 unless (defined $i) {
4526 !!!parse-error (type => 'unmatched end tag:tr');
4527 ## Ignore the token
4528 !!!next-token;
4529 redo B;
4530 }
4531
4532 ## Clear back to table row context
4533 while (not {
4534 tr => 1, html => 1,
4535 }->{$self->{open_elements}->[-1]->[1]}) {
4536 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4537 pop @{$self->{open_elements}};
4538 }
4539
4540 pop @{$self->{open_elements}}; # tr
4541 $self->{insertion_mode} = 'in table body';
4542 ## reprocess
4543 redo B;
4544 } elsif ({
4545 body => 1, caption => 1, col => 1,
4546 colgroup => 1, html => 1, td => 1, th => 1,
4547 }->{$token->{tag_name}}) {
4548 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4549 ## Ignore the token
4550 !!!next-token;
4551 redo B;
4552 } else {
4553 #
4554 }
4555 } else {
4556 #
4557 }
4558
4559 ## As if in table
4560 !!!parse-error (type => 'in table:'.$token->{tag_name});
4561 $in_body->($insert_to_foster);
4562 redo B;
4563 } elsif ($self->{insertion_mode} eq 'in cell') {
4564 if ($token->{type} eq 'character') {
4565 ## NOTE: This is a code clone of "character in body".
4566 $reconstruct_active_formatting_elements->($insert_to_current);
4567
4568 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4569
4570 !!!next-token;
4571 redo B;
4572 } elsif ($token->{type} eq 'comment') {
4573 ## NOTE: This is a code clone of "comment in body".
4574 my $comment = $self->{document}->create_comment ($token->{data});
4575 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4576 !!!next-token;
4577 redo B;
4578 } elsif ($token->{type} eq 'start tag') {
4579 if ({
4580 caption => 1, col => 1, colgroup => 1,
4581 tbody => 1, td => 1, tfoot => 1, th => 1,
4582 thead => 1, tr => 1,
4583 }->{$token->{tag_name}}) {
4584 ## have an element in table scope
4585 my $tn;
4586 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4587 my $node = $self->{open_elements}->[$_];
4588 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4589 $tn = $node->[1];
4590 last INSCOPE;
4591 } elsif ({
4592 table => 1, html => 1,
4593 }->{$node->[1]}) {
4594 last INSCOPE;
4595 }
4596 } # INSCOPE
4597 unless (defined $tn) {
4598 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4599 ## Ignore the token
4600 !!!next-token;
4601 redo B;
4602 }
4603
4604 ## Close the cell
4605 !!!back-token; # <?>
4606 $token = {type => 'end tag', tag_name => $tn};
4607 redo B;
4608 } else {
4609 #
4610 }
4611 } elsif ($token->{type} eq 'end tag') {
4612 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4613 ## have an element in table scope
4614 my $i;
4615 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4616 my $node = $self->{open_elements}->[$_];
4617 if ($node->[1] eq $token->{tag_name}) {
4618 $i = $_;
4619 last INSCOPE;
4620 } elsif ({
4621 table => 1, html => 1,
4622 }->{$node->[1]}) {
4623 last INSCOPE;
4624 }
4625 } # INSCOPE
4626 unless (defined $i) {
4627 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4628 ## Ignore the token
4629 !!!next-token;
4630 redo B;
4631 }
4632
4633 ## generate implied end tags
4634 if ({
4635 dd => 1, dt => 1, li => 1, p => 1,
4636 td => ($token->{tag_name} eq 'th'),
4637 th => ($token->{tag_name} eq 'td'),
4638 tr => 1,
4639 }->{$self->{open_elements}->[-1]->[1]}) {
4640 !!!back-token;
4641 $token = {type => 'end tag',
4642 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4643 redo B;
4644 }
4645
4646 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4647 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4648 }
4649
4650 splice @{$self->{open_elements}}, $i;
4651
4652 $clear_up_to_marker->();
4653
4654 $self->{insertion_mode} = 'in row';
4655
4656 !!!next-token;
4657 redo B;
4658 } elsif ({
4659 body => 1, caption => 1, col => 1,
4660 colgroup => 1, html => 1,
4661 }->{$token->{tag_name}}) {
4662 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4663 ## Ignore the token
4664 !!!next-token;
4665 redo B;
4666 } elsif ({
4667 table => 1, tbody => 1, tfoot => 1,
4668 thead => 1, tr => 1,
4669 }->{$token->{tag_name}}) {
4670 ## have an element in table scope
4671 my $i;
4672 my $tn;
4673 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4674 my $node = $self->{open_elements}->[$_];
4675 if ($node->[1] eq $token->{tag_name}) {
4676 $i = $_;
4677 last INSCOPE;
4678 } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4679 $tn = $node->[1];
4680 ## NOTE: There is exactly one |td| or |th| element
4681 ## in scope in the stack of open elements by definition.
4682 } elsif ({
4683 table => 1, html => 1,
4684 }->{$node->[1]}) {
4685 last INSCOPE;
4686 }
4687 } # INSCOPE
4688 unless (defined $i) {
4689 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4690 ## Ignore the token
4691 !!!next-token;
4692 redo B;
4693 }
4694
4695 ## Close the cell
4696 !!!back-token; # </?>
4697 $token = {type => 'end tag', tag_name => $tn};
4698 redo B;
4699 } else {
4700 #
4701 }
4702 } else {
4703 #
4704 }
4705
4706 $in_body->($insert_to_current);
4707 redo B;
4708 } elsif ($self->{insertion_mode} eq 'in select') {
4709 if ($token->{type} eq 'character') {
4710 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4711 !!!next-token;
4712 redo B;
4713 } elsif ($token->{type} eq 'comment') {
4714 my $comment = $self->{document}->create_comment ($token->{data});
4715 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4716 !!!next-token;
4717 redo B;
4718 } elsif ($token->{type} eq 'start tag') {
4719 if ($token->{tag_name} eq 'option') {
4720 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4721 ## As if </option>
4722 pop @{$self->{open_elements}};
4723 }
4724
4725 !!!insert-element ($token->{tag_name}, $token->{attributes});
4726 !!!next-token;
4727 redo B;
4728 } elsif ($token->{tag_name} eq 'optgroup') {
4729 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4730 ## As if </option>
4731 pop @{$self->{open_elements}};
4732 }
4733
4734 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4735 ## As if </optgroup>
4736 pop @{$self->{open_elements}};
4737 }
4738
4739 !!!insert-element ($token->{tag_name}, $token->{attributes});
4740 !!!next-token;
4741 redo B;
4742 } elsif ($token->{tag_name} eq 'select') {
4743 !!!parse-error (type => 'not closed:select');
4744 ## As if </select> instead
4745 ## have an element in table scope
4746 my $i;
4747 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4748 my $node = $self->{open_elements}->[$_];
4749 if ($node->[1] eq $token->{tag_name}) {
4750 $i = $_;
4751 last INSCOPE;
4752 } elsif ({
4753 table => 1, html => 1,
4754 }->{$node->[1]}) {
4755 last INSCOPE;
4756 }
4757 } # INSCOPE
4758 unless (defined $i) {
4759 !!!parse-error (type => 'unmatched end tag:select');
4760 ## Ignore the token
4761 !!!next-token;
4762 redo B;
4763 }
4764
4765 splice @{$self->{open_elements}}, $i;
4766
4767 $self->_reset_insertion_mode;
4768
4769 !!!next-token;
4770 redo B;
4771 } else {
4772 #
4773 }
4774 } elsif ($token->{type} eq 'end tag') {
4775 if ($token->{tag_name} eq 'optgroup') {
4776 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4777 $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4778 ## As if </option>
4779 splice @{$self->{open_elements}}, -2;
4780 } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4781 pop @{$self->{open_elements}};
4782 } else {
4783 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4784 ## Ignore the token
4785 }
4786 !!!next-token;
4787 redo B;
4788 } elsif ($token->{tag_name} eq 'option') {
4789 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4790 pop @{$self->{open_elements}};
4791 } else {
4792 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4793 ## Ignore the token
4794 }
4795 !!!next-token;
4796 redo B;
4797 } elsif ($token->{tag_name} eq 'select') {
4798 ## have an element in table scope
4799 my $i;
4800 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4801 my $node = $self->{open_elements}->[$_];
4802 if ($node->[1] eq $token->{tag_name}) {
4803 $i = $_;
4804 last INSCOPE;
4805 } elsif ({
4806 table => 1, html => 1,
4807 }->{$node->[1]}) {
4808 last INSCOPE;
4809 }
4810 } # INSCOPE
4811 unless (defined $i) {
4812 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4813 ## Ignore the token
4814 !!!next-token;
4815 redo B;
4816 }
4817
4818 splice @{$self->{open_elements}}, $i;
4819
4820 $self->_reset_insertion_mode;
4821
4822 !!!next-token;
4823 redo B;
4824 } elsif ({
4825 caption => 1, table => 1, tbody => 1,
4826 tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4827 }->{$token->{tag_name}}) {
4828 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4829
4830 ## have an element in table scope
4831 my $i;
4832 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4833 my $node = $self->{open_elements}->[$_];
4834 if ($node->[1] eq $token->{tag_name}) {
4835 $i = $_;
4836 last INSCOPE;
4837 } elsif ({
4838 table => 1, html => 1,
4839 }->{$node->[1]}) {
4840 last INSCOPE;
4841 }
4842 } # INSCOPE
4843 unless (defined $i) {
4844 ## Ignore the token
4845 !!!next-token;
4846 redo B;
4847 }
4848
4849 ## As if </select>
4850 ## have an element in table scope
4851 undef $i;
4852 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4853 my $node = $self->{open_elements}->[$_];
4854 if ($node->[1] eq 'select') {
4855 $i = $_;
4856 last INSCOPE;
4857 } elsif ({
4858 table => 1, html => 1,
4859 }->{$node->[1]}) {
4860 last INSCOPE;
4861 }
4862 } # INSCOPE
4863 unless (defined $i) {
4864 !!!parse-error (type => 'unmatched end tag:select');
4865 ## Ignore the </select> token
4866 !!!next-token; ## TODO: ok?
4867 redo B;
4868 }
4869
4870 splice @{$self->{open_elements}}, $i;
4871
4872 $self->_reset_insertion_mode;
4873
4874 ## reprocess
4875 redo B;
4876 } else {
4877 #
4878 }
4879 } else {
4880 #
4881 }
4882
4883 !!!parse-error (type => 'in select:'.$token->{tag_name});
4884 ## Ignore the token
4885 !!!next-token;
4886 redo B;
4887 } elsif ($self->{insertion_mode} eq 'after body') {
4888 if ($token->{type} eq 'character') {
4889 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4890 ## As if in body
4891 $reconstruct_active_formatting_elements->($insert_to_current);
4892
4893 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4894
4895 unless (length $token->{data}) {
4896 !!!next-token;
4897 redo B;
4898 }
4899 }
4900
4901 #
4902 !!!parse-error (type => 'after body:#'.$token->{type});
4903 } elsif ($token->{type} eq 'comment') {
4904 my $comment = $self->{document}->create_comment ($token->{data});
4905 $self->{open_elements}->[0]->[0]->append_child ($comment);
4906 !!!next-token;
4907 redo B;
4908 } elsif ($token->{type} eq 'start tag') {
4909 !!!parse-error (type => 'after body:'.$token->{tag_name});
4910 #
4911 } elsif ($token->{type} eq 'end tag') {
4912 if ($token->{tag_name} eq 'html') {
4913 if (defined $self->{inner_html_node}) {
4914 !!!parse-error (type => 'unmatched end tag:html');
4915 ## Ignore the token
4916 !!!next-token;
4917 redo B;
4918 } else {
4919 $phase = 'trailing end';
4920 !!!next-token;
4921 redo B;
4922 }
4923 } else {
4924 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4925 }
4926 } else {
4927 !!!parse-error (type => 'after body:#'.$token->{type});
4928 }
4929
4930 $self->{insertion_mode} = 'in body';
4931 ## reprocess
4932 redo B;
4933 } elsif ($self->{insertion_mode} eq 'in frameset') {
4934 if ($token->{type} eq 'character') {
4935 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4936 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4937
4938 unless (length $token->{data}) {
4939 !!!next-token;
4940 redo B;
4941 }
4942 }
4943
4944 #
4945 } elsif ($token->{type} eq 'comment') {
4946 my $comment = $self->{document}->create_comment ($token->{data});
4947 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4948 !!!next-token;
4949 redo B;
4950 } elsif ($token->{type} eq 'start tag') {
4951 if ($token->{tag_name} eq 'frameset') {
4952 !!!insert-element ($token->{tag_name}, $token->{attributes});
4953 !!!next-token;
4954 redo B;
4955 } elsif ($token->{tag_name} eq 'frame') {
4956 !!!insert-element ($token->{tag_name}, $token->{attributes});
4957 pop @{$self->{open_elements}};
4958 !!!next-token;
4959 redo B;
4960 } elsif ($token->{tag_name} eq 'noframes') {
4961 $in_body->($insert_to_current);
4962 redo B;
4963 } else {
4964 #
4965 }
4966 } elsif ($token->{type} eq 'end tag') {
4967 if ($token->{tag_name} eq 'frameset') {
4968 if ($self->{open_elements}->[-1]->[1] eq 'html' and
4969 @{$self->{open_elements}} == 1) {
4970 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4971 ## Ignore the token
4972 !!!next-token;
4973 } else {
4974 pop @{$self->{open_elements}};
4975 !!!next-token;
4976 }
4977
4978 ## if not inner_html and
4979 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
4980 $self->{insertion_mode} = 'after frameset';
4981 }
4982 redo B;
4983 } else {
4984 #
4985 }
4986 } else {
4987 #
4988 }
4989
4990 if (defined $token->{tag_name}) {
4991 !!!parse-error (type => 'in frameset:'.$token->{tag_name});
4992 } else {
4993 !!!parse-error (type => 'in frameset:#'.$token->{type});
4994 }
4995 ## Ignore the token
4996 !!!next-token;
4997 redo B;
4998 } elsif ($self->{insertion_mode} eq 'after frameset') {
4999 if ($token->{type} eq 'character') {
5000 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5001 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5002
5003 unless (length $token->{data}) {
5004 !!!next-token;
5005 redo B;
5006 }
5007 }
5008
5009 #
5010 } elsif ($token->{type} eq 'comment') {
5011 my $comment = $self->{document}->create_comment ($token->{data});
5012 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5013 !!!next-token;
5014 redo B;
5015 } elsif ($token->{type} eq 'start tag') {
5016 if ($token->{tag_name} eq 'noframes') {
5017 $in_body->($insert_to_current);
5018 redo B;
5019 } else {
5020 #
5021 }
5022 } elsif ($token->{type} eq 'end tag') {
5023 if ($token->{tag_name} eq 'html') {
5024 $phase = 'trailing end';
5025 !!!next-token;
5026 redo B;
5027 } else {
5028 #
5029 }
5030 } else {
5031 #
5032 }
5033
5034 if (defined $token->{tag_name}) {
5035 !!!parse-error (type => 'after frameset:'.$token->{tag_name});
5036 } else {
5037 !!!parse-error (type => 'after frameset:#'.$token->{type});
5038 }
5039 ## Ignore the token
5040 !!!next-token;
5041 redo B;
5042
5043 ## ISSUE: An issue in spec there
5044 } else {
5045 die "$0: $self->{insertion_mode}: Unknown insertion mode";
5046 }
5047 }
5048 } elsif ($phase eq 'trailing end') {
5049 ## states in the main stage is preserved yet # MUST
5050
5051 if ($token->{type} eq 'DOCTYPE') {
5052 !!!parse-error (type => 'after html:#DOCTYPE');
5053 ## Ignore the token
5054 !!!next-token;
5055 redo B;
5056 } elsif ($token->{type} eq 'comment') {
5057 my $comment = $self->{document}->create_comment ($token->{data});
5058 $self->{document}->append_child ($comment);
5059 !!!next-token;
5060 redo B;
5061 } elsif ($token->{type} eq 'character') {
5062 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5063 my $data = $1;
5064 ## As if in the main phase.
5065 ## NOTE: The insertion mode in the main phase
5066 ## just before the phase has been changed to the trailing
5067 ## end phase is either "after body" or "after frameset".
5068 $reconstruct_active_formatting_elements->($insert_to_current)
5069 if $phase eq 'main';
5070
5071 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
5072
5073 unless (length $token->{data}) {
5074 !!!next-token;
5075 redo B;
5076 }
5077 }
5078
5079 !!!parse-error (type => 'after html:#character');
5080 $phase = 'main';
5081 ## reprocess
5082 redo B;
5083 } elsif ($token->{type} eq 'start tag' or
5084 $token->{type} eq 'end tag') {
5085 !!!parse-error (type => 'after html:'.$token->{tag_name});
5086 $phase = 'main';
5087 ## reprocess
5088 redo B;
5089 } elsif ($token->{type} eq 'end-of-file') {
5090 ## Stop parsing
5091 last B;
5092 } else {
5093 die "$0: $token->{type}: Unknown token";
5094 }
5095 }
5096 } # B
5097
5098 ## Stop parsing # MUST
5099
5100 ## TODO: script stuffs
5101 } # _tree_construct_main
5102
5103 sub set_inner_html ($$$) {
5104 my $class = shift;
5105 my $node = shift;
5106 my $s = \$_[0];
5107 my $onerror = $_[1];
5108
5109 my $nt = $node->node_type;
5110 if ($nt == 9) {
5111 # MUST
5112
5113 ## Step 1 # MUST
5114 ## TODO: If the document has an active parser, ...
5115 ## ISSUE: There is an issue in the spec.
5116
5117 ## Step 2 # MUST
5118 my @cn = @{$node->child_nodes};
5119 for (@cn) {
5120 $node->remove_child ($_);
5121 }
5122
5123 ## Step 3, 4, 5 # MUST
5124 $class->parse_string ($$s => $node, $onerror);
5125 } elsif ($nt == 1) {
5126 ## TODO: If non-html element
5127
5128 ## NOTE: Most of this code is copied from |parse_string|
5129
5130 ## Step 1 # MUST
5131 my $this_doc = $node->owner_document;
5132 my $doc = $this_doc->implementation->create_document;
5133 $doc->manakai_is_html (1);
5134 my $p = $class->new;
5135 $p->{document} = $doc;
5136
5137 ## Step 9 # MUST
5138 my $i = 0;
5139 my $line = 1;
5140 my $column = 0;
5141 $p->{set_next_input_character} = sub {
5142 my $self = shift;
5143
5144 pop @{$self->{prev_input_character}};
5145 unshift @{$self->{prev_input_character}}, $self->{next_input_character};
5146
5147 $self->{next_input_character} = -1 and return if $i >= length $$s;
5148 $self->{next_input_character} = ord substr $$s, $i++, 1;
5149 $column++;
5150
5151 if ($self->{next_input_character} == 0x000A) { # LF
5152 $line++;
5153 $column = 0;
5154 } elsif ($self->{next_input_character} == 0x000D) { # CR
5155 $i++ if substr ($$s, $i, 1) eq "\x0A";
5156 $self->{next_input_character} = 0x000A; # LF # MUST
5157 $line++;
5158 $column = 0;
5159 } elsif ($self->{next_input_character} > 0x10FFFF) {
5160 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5161 } elsif ($self->{next_input_character} == 0x0000) { # NULL
5162 !!!parse-error (type => 'NULL');
5163 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5164 }
5165 };
5166 $p->{prev_input_character} = [-1, -1, -1];
5167 $p->{next_input_character} = -1;
5168
5169 my $ponerror = $onerror || sub {
5170 my (%opt) = @_;
5171 warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
5172 };
5173 $p->{parse_error} = sub {
5174 $ponerror->(@_, line => $line, column => $column);
5175 };
5176
5177 $p->_initialize_tokenizer;
5178 $p->_initialize_tree_constructor;
5179
5180 ## Step 2
5181 my $node_ln = $node->local_name;
5182 $p->{content_model_flag} = {
5183 title => 'RCDATA',
5184 textarea => 'RCDATA',
5185 style => 'CDATA',
5186 script => 'CDATA',
5187 xmp => 'CDATA',
5188 iframe => 'CDATA',
5189 noembed => 'CDATA',
5190 noframes => 'CDATA',
5191 noscript => 'CDATA',
5192 plaintext => 'PLAINTEXT',
5193 }->{$node_ln} || 'PCDATA';
5194 ## ISSUE: What is "the name of the element"? local name?
5195
5196 $p->{inner_html_node} = [$node, $node_ln];
5197
5198 ## Step 4
5199 my $root = $doc->create_element_ns
5200 ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5201
5202 ## Step 5 # MUST
5203 $doc->append_child ($root);
5204
5205 ## Step 6 # MUST
5206 push @{$p->{open_elements}}, [$root, 'html'];
5207
5208 undef $p->{head_element};
5209
5210 ## Step 7 # MUST
5211 $p->_reset_insertion_mode;
5212
5213 ## Step 8 # MUST
5214 my $anode = $node;
5215 AN: while (defined $anode) {
5216 if ($anode->node_type == 1) {
5217 my $nsuri = $anode->namespace_uri;
5218 if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5219 if ($anode->local_name eq 'form') { ## TODO: case?
5220 $p->{form_element} = $anode;
5221 last AN;
5222 }
5223 }
5224 }
5225 $anode = $anode->parent_node;
5226 } # AN
5227
5228 ## Step 3 # MUST
5229 ## Step 10 # MUST
5230 {
5231 my $self = $p;
5232 !!!next-token;
5233 }
5234 $p->_tree_construction_main;
5235
5236 ## Step 11 # MUST
5237 my @cn = @{$node->child_nodes};
5238 for (@cn) {
5239 $node->remove_child ($_);
5240 }
5241 ## ISSUE: mutation events? read-only?
5242
5243 ## Step 12 # MUST
5244 @cn = @{$root->child_nodes};
5245 for (@cn) {
5246 $this_doc->adopt_node ($_);
5247 $node->append_child ($_);
5248 }
5249 ## ISSUE: mutation events?
5250
5251 $p->_terminate_tree_constructor;
5252 } else {
5253 die "$0: |set_inner_html| is not defined for node of type $nt";
5254 }
5255 } # set_inner_html
5256
5257 } # tree construction stage
5258
5259 sub get_inner_html ($$$) {
5260 my (undef, $node, $on_error) = @_;
5261
5262 ## Step 1
5263 my $s = '';
5264
5265 my $in_cdata;
5266 my $parent = $node;
5267 while (defined $parent) {
5268 if ($parent->node_type == 1 and
5269 $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
5270 {
5271 style => 1, script => 1, xmp => 1, iframe => 1,
5272 noembed => 1, noframes => 1, noscript => 1,
5273 }->{$parent->local_name}) { ## TODO: case thingy
5274 $in_cdata = 1;
5275 }
5276 $parent = $parent->parent_node;
5277 }
5278
5279 ## Step 2
5280 my @node = @{$node->child_nodes};
5281 C: while (@node) {
5282 my $child = shift @node;
5283 unless (ref $child) {
5284 if ($child eq 'cdata-out') {
5285 $in_cdata = 0;
5286 } else {
5287 $s .= $child; # end tag
5288 }
5289 next C;
5290 }
5291
5292 my $nt = $child->node_type;
5293 if ($nt == 1) { # Element
5294 my $tag_name = $child->tag_name; ## TODO: manakai_tag_name
5295 $s .= '<' . $tag_name;
5296 ## NOTE: Non-HTML case:
5297 ## <http://permalink.gmane.org/gmane.org.w3c.whatwg.discuss/11191>
5298
5299 my @attrs = @{$child->attributes}; # sort order MUST be stable
5300 for my $attr (@attrs) { # order is implementation dependent
5301 my $attr_name = $attr->name; ## TODO: manakai_name
5302 $s .= ' ' . $attr_name . '="';
5303 my $attr_value = $attr->value;
5304 ## escape
5305 $attr_value =~ s/&/&amp;/g;
5306 $attr_value =~ s/</&lt;/g;
5307 $attr_value =~ s/>/&gt;/g;
5308 $attr_value =~ s/"/&quot;/g;
5309 $s .= $attr_value . '"';
5310 }
5311 $s .= '>';
5312
5313 next C if {
5314 area => 1, base => 1, basefont => 1, bgsound => 1,
5315 br => 1, col => 1, embed => 1, frame => 1, hr => 1,
5316 img => 1, input => 1, link => 1, meta => 1, param => 1,
5317 spacer => 1, wbr => 1,
5318 }->{$tag_name};
5319
5320 $s .= "\x0A" if $tag_name eq 'pre' or $tag_name eq 'textarea';
5321
5322 if (not $in_cdata and {
5323 style => 1, script => 1, xmp => 1, iframe => 1,
5324 noembed => 1, noframes => 1, noscript => 1,
5325 plaintext => 1,
5326 }->{$tag_name}) {
5327 unshift @node, 'cdata-out';
5328 $in_cdata = 1;
5329 }
5330
5331 unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
5332 } elsif ($nt == 3 or $nt == 4) {
5333 if ($in_cdata) {
5334 $s .= $child->data;
5335 } else {
5336 my $value = $child->data;
5337 $value =~ s/&/&amp;/g;
5338 $value =~ s/</&lt;/g;
5339 $value =~ s/>/&gt;/g;
5340 $value =~ s/"/&quot;/g;
5341 $s .= $value;
5342 }
5343 } elsif ($nt == 8) {
5344 $s .= '<!--' . $child->data . '-->';
5345 } elsif ($nt == 10) {
5346 $s .= '<!DOCTYPE ' . $child->name . '>';
5347 } elsif ($nt == 5) { # entrefs
5348 push @node, @{$child->child_nodes};
5349 } else {
5350 $on_error->($child) if defined $on_error;
5351 }
5352 ## ISSUE: This code does not support PIs.
5353 } # C
5354
5355 ## Step 3
5356 return \$s;
5357 } # get_inner_html
5358
5359 1;
5360 # $Date: 2007/06/24 06:20:37 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24