/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.176 - (show annotations) (download) (as text)
Sun Sep 14 07:19:47 2008 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.175: +3 -3 lines
File MIME type: application/x-wais-source
++ whatpm/Whatpm/ChangeLog	14 Sep 2008 07:19:17 -0000
	* HTML.pm.src: Make a "bare ero" error for unknown
	entities point the "&" character.

2008-09-14  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/Charset/ChangeLog	14 Sep 2008 07:19:40 -0000
	* DecodeHandle.pm: Merge the ShiftJIS class into the Encode class.

2008-09-14  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::HTML;
2 use strict;
3 our $VERSION=do{my @r=(q$Revision: 1.175 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 use Error qw(:try);
5
6 ## ISSUE:
7 ## var doc = implementation.createDocument (null, null, null);
8 ## doc.write ('');
9 ## alert (doc.compatMode);
10
11 require IO::Handle;
12
13 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
14 my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
15 my $SVG_NS = q<http://www.w3.org/2000/svg>;
16 my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
17 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
18 my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
19
20 sub A_EL () { 0b1 }
21 sub ADDRESS_EL () { 0b10 }
22 sub BODY_EL () { 0b100 }
23 sub BUTTON_EL () { 0b1000 }
24 sub CAPTION_EL () { 0b10000 }
25 sub DD_EL () { 0b100000 }
26 sub DIV_EL () { 0b1000000 }
27 sub DT_EL () { 0b10000000 }
28 sub FORM_EL () { 0b100000000 }
29 sub FORMATTING_EL () { 0b1000000000 }
30 sub FRAMESET_EL () { 0b10000000000 }
31 sub HEADING_EL () { 0b100000000000 }
32 sub HTML_EL () { 0b1000000000000 }
33 sub LI_EL () { 0b10000000000000 }
34 sub NOBR_EL () { 0b100000000000000 }
35 sub OPTION_EL () { 0b1000000000000000 }
36 sub OPTGROUP_EL () { 0b10000000000000000 }
37 sub P_EL () { 0b100000000000000000 }
38 sub SELECT_EL () { 0b1000000000000000000 }
39 sub TABLE_EL () { 0b10000000000000000000 }
40 sub TABLE_CELL_EL () { 0b100000000000000000000 }
41 sub TABLE_ROW_EL () { 0b1000000000000000000000 }
42 sub TABLE_ROW_GROUP_EL () { 0b10000000000000000000000 }
43 sub MISC_SCOPING_EL () { 0b100000000000000000000000 }
44 sub MISC_SPECIAL_EL () { 0b1000000000000000000000000 }
45 sub FOREIGN_EL () { 0b10000000000000000000000000 }
46 sub FOREIGN_FLOW_CONTENT_EL () { 0b100000000000000000000000000 }
47 sub MML_AXML_EL () { 0b1000000000000000000000000000 }
48 sub RUBY_EL () { 0b10000000000000000000000000000 }
49 sub RUBY_COMPONENT_EL () { 0b100000000000000000000000000000 }
50
51 sub TABLE_ROWS_EL () {
52 TABLE_EL |
53 TABLE_ROW_EL |
54 TABLE_ROW_GROUP_EL
55 }
56
57 ## NOTE: Used in "generate implied end tags" algorithm.
58 ## NOTE: There is a code where a modified version of END_TAG_OPTIONAL_EL
59 ## is used in "generate implied end tags" implementation (search for the
60 ## function mae).
61 sub END_TAG_OPTIONAL_EL () {
62 DD_EL |
63 DT_EL |
64 LI_EL |
65 P_EL |
66 RUBY_COMPONENT_EL
67 }
68
69 ## NOTE: Used in </body> and EOF algorithms.
70 sub ALL_END_TAG_OPTIONAL_EL () {
71 DD_EL |
72 DT_EL |
73 LI_EL |
74 P_EL |
75
76 BODY_EL |
77 HTML_EL |
78 TABLE_CELL_EL |
79 TABLE_ROW_EL |
80 TABLE_ROW_GROUP_EL
81 }
82
83 sub SCOPING_EL () {
84 BUTTON_EL |
85 CAPTION_EL |
86 HTML_EL |
87 TABLE_EL |
88 TABLE_CELL_EL |
89 MISC_SCOPING_EL
90 }
91
92 sub TABLE_SCOPING_EL () {
93 HTML_EL |
94 TABLE_EL
95 }
96
97 sub TABLE_ROWS_SCOPING_EL () {
98 HTML_EL |
99 TABLE_ROW_GROUP_EL
100 }
101
102 sub TABLE_ROW_SCOPING_EL () {
103 HTML_EL |
104 TABLE_ROW_EL
105 }
106
107 sub SPECIAL_EL () {
108 ADDRESS_EL |
109 BODY_EL |
110 DIV_EL |
111
112 DD_EL |
113 DT_EL |
114 LI_EL |
115 P_EL |
116
117 FORM_EL |
118 FRAMESET_EL |
119 HEADING_EL |
120 OPTION_EL |
121 OPTGROUP_EL |
122 SELECT_EL |
123 TABLE_ROW_EL |
124 TABLE_ROW_GROUP_EL |
125 MISC_SPECIAL_EL
126 }
127
128 my $el_category = {
129 a => A_EL | FORMATTING_EL,
130 address => ADDRESS_EL,
131 applet => MISC_SCOPING_EL,
132 area => MISC_SPECIAL_EL,
133 b => FORMATTING_EL,
134 base => MISC_SPECIAL_EL,
135 basefont => MISC_SPECIAL_EL,
136 bgsound => MISC_SPECIAL_EL,
137 big => FORMATTING_EL,
138 blockquote => MISC_SPECIAL_EL,
139 body => BODY_EL,
140 br => MISC_SPECIAL_EL,
141 button => BUTTON_EL,
142 caption => CAPTION_EL,
143 center => MISC_SPECIAL_EL,
144 col => MISC_SPECIAL_EL,
145 colgroup => MISC_SPECIAL_EL,
146 dd => DD_EL,
147 dir => MISC_SPECIAL_EL,
148 div => DIV_EL,
149 dl => MISC_SPECIAL_EL,
150 dt => DT_EL,
151 em => FORMATTING_EL,
152 embed => MISC_SPECIAL_EL,
153 fieldset => MISC_SPECIAL_EL,
154 font => FORMATTING_EL,
155 form => FORM_EL,
156 frame => MISC_SPECIAL_EL,
157 frameset => FRAMESET_EL,
158 h1 => HEADING_EL,
159 h2 => HEADING_EL,
160 h3 => HEADING_EL,
161 h4 => HEADING_EL,
162 h5 => HEADING_EL,
163 h6 => HEADING_EL,
164 head => MISC_SPECIAL_EL,
165 hr => MISC_SPECIAL_EL,
166 html => HTML_EL,
167 i => FORMATTING_EL,
168 iframe => MISC_SPECIAL_EL,
169 img => MISC_SPECIAL_EL,
170 input => MISC_SPECIAL_EL,
171 isindex => MISC_SPECIAL_EL,
172 li => LI_EL,
173 link => MISC_SPECIAL_EL,
174 listing => MISC_SPECIAL_EL,
175 marquee => MISC_SCOPING_EL,
176 menu => MISC_SPECIAL_EL,
177 meta => MISC_SPECIAL_EL,
178 nobr => NOBR_EL | FORMATTING_EL,
179 noembed => MISC_SPECIAL_EL,
180 noframes => MISC_SPECIAL_EL,
181 noscript => MISC_SPECIAL_EL,
182 object => MISC_SCOPING_EL,
183 ol => MISC_SPECIAL_EL,
184 optgroup => OPTGROUP_EL,
185 option => OPTION_EL,
186 p => P_EL,
187 param => MISC_SPECIAL_EL,
188 plaintext => MISC_SPECIAL_EL,
189 pre => MISC_SPECIAL_EL,
190 rp => RUBY_COMPONENT_EL,
191 rt => RUBY_COMPONENT_EL,
192 ruby => RUBY_EL,
193 s => FORMATTING_EL,
194 script => MISC_SPECIAL_EL,
195 select => SELECT_EL,
196 small => FORMATTING_EL,
197 spacer => MISC_SPECIAL_EL,
198 strike => FORMATTING_EL,
199 strong => FORMATTING_EL,
200 style => MISC_SPECIAL_EL,
201 table => TABLE_EL,
202 tbody => TABLE_ROW_GROUP_EL,
203 td => TABLE_CELL_EL,
204 textarea => MISC_SPECIAL_EL,
205 tfoot => TABLE_ROW_GROUP_EL,
206 th => TABLE_CELL_EL,
207 thead => TABLE_ROW_GROUP_EL,
208 title => MISC_SPECIAL_EL,
209 tr => TABLE_ROW_EL,
210 tt => FORMATTING_EL,
211 u => FORMATTING_EL,
212 ul => MISC_SPECIAL_EL,
213 wbr => MISC_SPECIAL_EL,
214 };
215
216 my $el_category_f = {
217 $MML_NS => {
218 'annotation-xml' => MML_AXML_EL,
219 mi => FOREIGN_FLOW_CONTENT_EL,
220 mo => FOREIGN_FLOW_CONTENT_EL,
221 mn => FOREIGN_FLOW_CONTENT_EL,
222 ms => FOREIGN_FLOW_CONTENT_EL,
223 mtext => FOREIGN_FLOW_CONTENT_EL,
224 },
225 $SVG_NS => {
226 foreignObject => FOREIGN_FLOW_CONTENT_EL,
227 desc => FOREIGN_FLOW_CONTENT_EL,
228 title => FOREIGN_FLOW_CONTENT_EL,
229 },
230 ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
231 };
232
233 my $svg_attr_name = {
234 attributename => 'attributeName',
235 attributetype => 'attributeType',
236 basefrequency => 'baseFrequency',
237 baseprofile => 'baseProfile',
238 calcmode => 'calcMode',
239 clippathunits => 'clipPathUnits',
240 contentscripttype => 'contentScriptType',
241 contentstyletype => 'contentStyleType',
242 diffuseconstant => 'diffuseConstant',
243 edgemode => 'edgeMode',
244 externalresourcesrequired => 'externalResourcesRequired',
245 filterres => 'filterRes',
246 filterunits => 'filterUnits',
247 glyphref => 'glyphRef',
248 gradienttransform => 'gradientTransform',
249 gradientunits => 'gradientUnits',
250 kernelmatrix => 'kernelMatrix',
251 kernelunitlength => 'kernelUnitLength',
252 keypoints => 'keyPoints',
253 keysplines => 'keySplines',
254 keytimes => 'keyTimes',
255 lengthadjust => 'lengthAdjust',
256 limitingconeangle => 'limitingConeAngle',
257 markerheight => 'markerHeight',
258 markerunits => 'markerUnits',
259 markerwidth => 'markerWidth',
260 maskcontentunits => 'maskContentUnits',
261 maskunits => 'maskUnits',
262 numoctaves => 'numOctaves',
263 pathlength => 'pathLength',
264 patterncontentunits => 'patternContentUnits',
265 patterntransform => 'patternTransform',
266 patternunits => 'patternUnits',
267 pointsatx => 'pointsAtX',
268 pointsaty => 'pointsAtY',
269 pointsatz => 'pointsAtZ',
270 preservealpha => 'preserveAlpha',
271 preserveaspectratio => 'preserveAspectRatio',
272 primitiveunits => 'primitiveUnits',
273 refx => 'refX',
274 refy => 'refY',
275 repeatcount => 'repeatCount',
276 repeatdur => 'repeatDur',
277 requiredextensions => 'requiredExtensions',
278 requiredfeatures => 'requiredFeatures',
279 specularconstant => 'specularConstant',
280 specularexponent => 'specularExponent',
281 spreadmethod => 'spreadMethod',
282 startoffset => 'startOffset',
283 stddeviation => 'stdDeviation',
284 stitchtiles => 'stitchTiles',
285 surfacescale => 'surfaceScale',
286 systemlanguage => 'systemLanguage',
287 tablevalues => 'tableValues',
288 targetx => 'targetX',
289 targety => 'targetY',
290 textlength => 'textLength',
291 viewbox => 'viewBox',
292 viewtarget => 'viewTarget',
293 xchannelselector => 'xChannelSelector',
294 ychannelselector => 'yChannelSelector',
295 zoomandpan => 'zoomAndPan',
296 };
297
298 my $foreign_attr_xname = {
299 'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
300 'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
301 'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
302 'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
303 'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
304 'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
305 'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
306 'xml:base' => [$XML_NS, ['xml', 'base']],
307 'xml:lang' => [$XML_NS, ['xml', 'lang']],
308 'xml:space' => [$XML_NS, ['xml', 'space']],
309 'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
310 'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
311 };
312
313 ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
314
315 my $c1_entity_char = {
316 0x80 => 0x20AC,
317 0x81 => 0xFFFD,
318 0x82 => 0x201A,
319 0x83 => 0x0192,
320 0x84 => 0x201E,
321 0x85 => 0x2026,
322 0x86 => 0x2020,
323 0x87 => 0x2021,
324 0x88 => 0x02C6,
325 0x89 => 0x2030,
326 0x8A => 0x0160,
327 0x8B => 0x2039,
328 0x8C => 0x0152,
329 0x8D => 0xFFFD,
330 0x8E => 0x017D,
331 0x8F => 0xFFFD,
332 0x90 => 0xFFFD,
333 0x91 => 0x2018,
334 0x92 => 0x2019,
335 0x93 => 0x201C,
336 0x94 => 0x201D,
337 0x95 => 0x2022,
338 0x96 => 0x2013,
339 0x97 => 0x2014,
340 0x98 => 0x02DC,
341 0x99 => 0x2122,
342 0x9A => 0x0161,
343 0x9B => 0x203A,
344 0x9C => 0x0153,
345 0x9D => 0xFFFD,
346 0x9E => 0x017E,
347 0x9F => 0x0178,
348 }; # $c1_entity_char
349
350 sub parse_byte_string ($$$$;$) {
351 my $self = shift;
352 my $charset_name = shift;
353 open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
354 return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
355 } # parse_byte_string
356
357 sub parse_byte_stream ($$$$;$$) {
358 # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
359 my $self = ref $_[0] ? shift : shift->new;
360 my $charset_name = shift;
361 my $byte_stream = $_[0];
362
363 my $onerror = $_[2] || sub {
364 my (%opt) = @_;
365 warn "Parse error ($opt{type})\n";
366 };
367 $self->{parse_error} = $onerror; # updated later by parse_char_string
368
369 my $get_wrapper = $_[3] || sub ($) {
370 return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
371 };
372
373 ## HTML5 encoding sniffing algorithm
374 require Message::Charset::Info;
375 my $charset;
376 my $buffer;
377 my ($char_stream, $e_status);
378
379 SNIFFING: {
380 ## NOTE: By setting |allow_fallback| option true when the
381 ## |get_decode_handle| method is invoked, we ignore what the HTML5
382 ## spec requires, i.e. unsupported encoding should be ignored.
383 ## TODO: We should not do this unless the parser is invoked
384 ## in the conformance checking mode, in which this behavior
385 ## would be useful.
386
387 ## Step 1
388 if (defined $charset_name) {
389 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
390 ## TODO: Is this ok? Transfer protocol's parameter should be
391 ## interpreted in its semantics?
392
393 ## ISSUE: Unsupported encoding is not ignored according to the spec.
394 ($char_stream, $e_status) = $charset->get_decode_handle
395 ($byte_stream, allow_error_reporting => 1,
396 allow_fallback => 1);
397 if ($char_stream) {
398 $self->{confident} = 1;
399 last SNIFFING;
400 } else {
401 ## TODO: unsupported error
402 }
403 }
404
405 ## Step 2
406 my $byte_buffer = '';
407 for (1..1024) {
408 my $char = $byte_stream->getc;
409 last unless defined $char;
410 $byte_buffer .= $char;
411 } ## TODO: timeout
412
413 ## Step 3
414 if ($byte_buffer =~ /^\xFE\xFF/) {
415 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
416 ($char_stream, $e_status) = $charset->get_decode_handle
417 ($byte_stream, allow_error_reporting => 1,
418 allow_fallback => 1, byte_buffer => \$byte_buffer);
419 $self->{confident} = 1;
420 last SNIFFING;
421 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
422 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
423 ($char_stream, $e_status) = $charset->get_decode_handle
424 ($byte_stream, allow_error_reporting => 1,
425 allow_fallback => 1, byte_buffer => \$byte_buffer);
426 $self->{confident} = 1;
427 last SNIFFING;
428 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
429 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
430 ($char_stream, $e_status) = $charset->get_decode_handle
431 ($byte_stream, allow_error_reporting => 1,
432 allow_fallback => 1, byte_buffer => \$byte_buffer);
433 $self->{confident} = 1;
434 last SNIFFING;
435 }
436
437 ## Step 4
438 ## TODO: <meta charset>
439
440 ## Step 5
441 ## TODO: from history
442
443 ## Step 6
444 require Whatpm::Charset::UniversalCharDet;
445 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
446 ($byte_buffer);
447 if (defined $charset_name) {
448 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
449
450 ## ISSUE: Unsupported encoding is not ignored according to the spec.
451 require Whatpm::Charset::DecodeHandle;
452 $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
453 ($byte_stream);
454 ($char_stream, $e_status) = $charset->get_decode_handle
455 ($buffer, allow_error_reporting => 1,
456 allow_fallback => 1, byte_buffer => \$byte_buffer);
457 if ($char_stream) {
458 $buffer->{buffer} = $byte_buffer;
459 !!!parse-error (type => 'sniffing:chardet',
460 text => $charset_name,
461 level => $self->{level}->{info},
462 layer => 'encode',
463 line => 1, column => 1);
464 $self->{confident} = 0;
465 last SNIFFING;
466 }
467 }
468
469 ## Step 7: default
470 ## TODO: Make this configurable.
471 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
472 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
473 ## detectable in the step 6.
474 require Whatpm::Charset::DecodeHandle;
475 $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
476 ($byte_stream);
477 ($char_stream, $e_status)
478 = $charset->get_decode_handle ($buffer,
479 allow_error_reporting => 1,
480 allow_fallback => 1,
481 byte_buffer => \$byte_buffer);
482 $buffer->{buffer} = $byte_buffer;
483 !!!parse-error (type => 'sniffing:default',
484 text => 'windows-1252',
485 level => $self->{level}->{info},
486 line => 1, column => 1,
487 layer => 'encode');
488 $self->{confident} = 0;
489 } # SNIFFING
490
491 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
492 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
493 !!!parse-error (type => 'chardecode:fallback',
494 #text => $self->{input_encoding},
495 level => $self->{level}->{uncertain},
496 line => 1, column => 1,
497 layer => 'encode');
498 } elsif (not ($e_status &
499 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
500 $self->{input_encoding} = $charset->get_iana_name;
501 !!!parse-error (type => 'chardecode:no error',
502 text => $self->{input_encoding},
503 level => $self->{level}->{uncertain},
504 line => 1, column => 1,
505 layer => 'encode');
506 } else {
507 $self->{input_encoding} = $charset->get_iana_name;
508 }
509
510 $self->{change_encoding} = sub {
511 my $self = shift;
512 $charset_name = shift;
513 my $token = shift;
514
515 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
516 ($char_stream, $e_status) = $charset->get_decode_handle
517 ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
518 byte_buffer => \ $buffer->{buffer});
519
520 if ($char_stream) { # if supported
521 ## "Change the encoding" algorithm:
522
523 ## Step 1
524 if ($charset->{category} &
525 Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
526 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
527 ($char_stream, $e_status) = $charset->get_decode_handle
528 ($byte_stream,
529 byte_buffer => \ $buffer->{buffer});
530 }
531 $charset_name = $charset->get_iana_name;
532
533 ## Step 2
534 if (defined $self->{input_encoding} and
535 $self->{input_encoding} eq $charset_name) {
536 !!!parse-error (type => 'charset label:matching',
537 text => $charset_name,
538 level => $self->{level}->{info});
539 $self->{confident} = 1;
540 return;
541 }
542
543 !!!parse-error (type => 'charset label detected',
544 text => $self->{input_encoding},
545 value => $charset_name,
546 level => $self->{level}->{warn},
547 token => $token);
548
549 ## Step 3
550 # if (can) {
551 ## change the encoding on the fly.
552 #$self->{confident} = 1;
553 #return;
554 # }
555
556 ## Step 4
557 throw Whatpm::HTML::RestartParser ();
558 }
559 }; # $self->{change_encoding}
560
561 my $char_onerror = sub {
562 my (undef, $type, %opt) = @_;
563 !!!parse-error (layer => 'encode',
564 line => $self->{line}, column => $self->{column} + 1,
565 %opt, type => $type);
566 if ($opt{octets}) {
567 ${$opt{octets}} = "\x{FFFD}"; # relacement character
568 }
569 };
570
571 my $wrapped_char_stream = $get_wrapper->($char_stream);
572 $wrapped_char_stream->onerror ($char_onerror);
573
574 my @args = @_; shift @args; # $s
575 my $return;
576 try {
577 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
578 } catch Whatpm::HTML::RestartParser with {
579 ## NOTE: Invoked after {change_encoding}.
580
581 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
582 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
583 !!!parse-error (type => 'chardecode:fallback',
584 level => $self->{level}->{uncertain},
585 #text => $self->{input_encoding},
586 line => 1, column => 1,
587 layer => 'encode');
588 } elsif (not ($e_status &
589 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
590 $self->{input_encoding} = $charset->get_iana_name;
591 !!!parse-error (type => 'chardecode:no error',
592 text => $self->{input_encoding},
593 level => $self->{level}->{uncertain},
594 line => 1, column => 1,
595 layer => 'encode');
596 } else {
597 $self->{input_encoding} = $charset->get_iana_name;
598 }
599 $self->{confident} = 1;
600
601 $wrapped_char_stream = $get_wrapper->($char_stream);
602 $wrapped_char_stream->onerror ($char_onerror);
603
604 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
605 };
606 return $return;
607 } # parse_byte_stream
608
609 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
610 ## and the HTML layer MUST ignore it. However, we does strip BOM in
611 ## the encoding layer and the HTML layer does not ignore any U+FEFF,
612 ## because the core part of our HTML parser expects a string of character,
613 ## not a string of bytes or code units or anything which might contain a BOM.
614 ## Therefore, any parser interface that accepts a string of bytes,
615 ## such as |parse_byte_string| in this module, must ensure that it does
616 ## strip the BOM and never strip any ZWNBSP.
617
618 sub parse_char_string ($$$;$$) {
619 #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
620 my $self = shift;
621 my $s = ref $_[0] ? $_[0] : \($_[0]);
622 require Whatpm::Charset::DecodeHandle;
623 my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
624 if ($_[3]) {
625 $input = $_[3]->($input);
626 }
627 return $self->parse_char_stream ($input, @_[1..$#_]);
628 } # parse_char_string
629 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
630
631 sub parse_char_stream ($$$;$) {
632 my $self = ref $_[0] ? shift : shift->new;
633 my $input = $_[0];
634 $self->{document} = $_[1];
635 @{$self->{document}->child_nodes} = ();
636
637 ## NOTE: |set_inner_html| copies most of this method's code
638
639 $self->{confident} = 1 unless exists $self->{confident};
640 $self->{document}->input_encoding ($self->{input_encoding})
641 if defined $self->{input_encoding};
642
643 my $i = 0;
644 $self->{line_prev} = $self->{line} = 1;
645 $self->{column_prev} = $self->{column} = 0;
646 $self->{set_next_char} = sub {
647 my $self = shift;
648
649 pop @{$self->{prev_char}};
650 unshift @{$self->{prev_char}}, $self->{next_char};
651
652 my $char;
653 if (defined $self->{next_next_char}) {
654 $char = $self->{next_next_char};
655 delete $self->{next_next_char};
656 } else {
657 $char = $input->getc;
658 }
659 $self->{next_char} = -1 and return unless defined $char;
660 $self->{next_char} = ord $char;
661
662 ($self->{line_prev}, $self->{column_prev})
663 = ($self->{line}, $self->{column});
664 $self->{column}++;
665
666 if ($self->{next_char} == 0x000A) { # LF
667 !!!cp ('j1');
668 $self->{line}++;
669 $self->{column} = 0;
670 } elsif ($self->{next_char} == 0x000D) { # CR
671 !!!cp ('j2');
672 ## TODO: support for abort/streaming
673 my $next = $input->getc;
674 if (defined $next and $next ne "\x0A") {
675 $self->{next_next_char} = $next;
676 }
677 $self->{next_char} = 0x000A; # LF # MUST
678 $self->{line}++;
679 $self->{column} = 0;
680 } elsif ($self->{next_char} > 0x10FFFF) {
681 !!!cp ('j3');
682 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
683 } elsif ($self->{next_char} == 0x0000) { # NULL
684 !!!cp ('j4');
685 !!!parse-error (type => 'NULL');
686 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
687 } elsif ($self->{next_char} <= 0x0008 or
688 (0x000E <= $self->{next_char} and $self->{next_char} <= 0x001F) or
689 (0x007F <= $self->{next_char} and $self->{next_char} <= 0x009F) or
690 (0xD800 <= $self->{next_char} and $self->{next_char} <= 0xDFFF) or
691 (0xFDD0 <= $self->{next_char} and $self->{next_char} <= 0xFDDF) or
692 ## ISSUE: U+FDE0-U+FDEF are not excluded
693 {
694 0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
695 0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
696 0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
697 0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
698 0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
699 0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
700 0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
701 0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
702 0x10FFFE => 1, 0x10FFFF => 1,
703 }->{$self->{next_char}}) {
704 !!!cp ('j5');
705 if ($self->{next_char} < 0x10000) {
706 !!!parse-error (type => 'control char',
707 text => (sprintf 'U+%04X', $self->{next_char}));
708 } else {
709 !!!parse-error (type => 'control char',
710 text => (sprintf 'U-%08X', $self->{next_char}));
711 }
712 }
713 };
714 $self->{prev_char} = [-1, -1, -1];
715 $self->{next_char} = -1;
716
717 $self->{read_until} = sub {
718 #my ($scalar, $specials_range, $offset) = @_;
719 my $specials_range = $_[1];
720 return 0 if defined $self->{next_next_char};
721 my $count = $input->manakai_read_until
722 ($_[0],
723 qr/(?![$specials_range\x{FDD0}-\x{FDDF}\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}])[\x20-\x7E\xA0-\x{D7FF}\x{E000}-\x{10FFFD}]/,
724 $_[2]);
725 if ($count) {
726 $self->{column} += $count;
727 $self->{column_prev} += $count;
728 $self->{prev_char} = [-1, -1, -1];
729 $self->{next_char} = -1;
730 }
731 return $count;
732 }; # $self->{read_until}
733
734 my $onerror = $_[2] || sub {
735 my (%opt) = @_;
736 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
737 my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
738 warn "Parse error ($opt{type}) at line $line column $column\n";
739 };
740 $self->{parse_error} = sub {
741 $onerror->(line => $self->{line}, column => $self->{column}, @_);
742 };
743
744 $self->_initialize_tokenizer;
745 $self->_initialize_tree_constructor;
746 $self->_construct_tree;
747 $self->_terminate_tree_constructor;
748
749 delete $self->{parse_error}; # remove loop
750
751 return $self->{document};
752 } # parse_char_stream
753
754 sub new ($) {
755 my $class = shift;
756 my $self = bless {
757 level => {must => 'm',
758 should => 's',
759 warn => 'w',
760 info => 'i',
761 uncertain => 'u'},
762 }, $class;
763 $self->{set_next_char} = sub {
764 $self->{next_char} = -1;
765 };
766 $self->{parse_error} = sub {
767 #
768 };
769 $self->{change_encoding} = sub {
770 # if ($_[0] is a supported encoding) {
771 # run "change the encoding" algorithm;
772 # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
773 # }
774 };
775 $self->{application_cache_selection} = sub {
776 #
777 };
778 return $self;
779 } # new
780
781 sub CM_ENTITY () { 0b001 } # & markup in data
782 sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
783 sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
784
785 sub PLAINTEXT_CONTENT_MODEL () { 0 }
786 sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
787 sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
788 sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
789
790 sub DATA_STATE () { 0 }
791 #sub ENTITY_DATA_STATE () { 1 }
792 sub TAG_OPEN_STATE () { 2 }
793 sub CLOSE_TAG_OPEN_STATE () { 3 }
794 sub TAG_NAME_STATE () { 4 }
795 sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
796 sub ATTRIBUTE_NAME_STATE () { 6 }
797 sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
798 sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
799 sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
800 sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
801 sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
802 #sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
803 sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
804 sub COMMENT_START_STATE () { 14 }
805 sub COMMENT_START_DASH_STATE () { 15 }
806 sub COMMENT_STATE () { 16 }
807 sub COMMENT_END_STATE () { 17 }
808 sub COMMENT_END_DASH_STATE () { 18 }
809 sub BOGUS_COMMENT_STATE () { 19 }
810 sub DOCTYPE_STATE () { 20 }
811 sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
812 sub DOCTYPE_NAME_STATE () { 22 }
813 sub AFTER_DOCTYPE_NAME_STATE () { 23 }
814 sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
815 sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
816 sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
817 sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
818 sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
819 sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
820 sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
821 sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
822 sub BOGUS_DOCTYPE_STATE () { 32 }
823 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
824 sub SELF_CLOSING_START_TAG_STATE () { 34 }
825 sub CDATA_SECTION_STATE () { 35 }
826 sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec
827 sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec
828 sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec
829 sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec
830 sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec
831 sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec
832 sub PUBLIC_STATE () { 42 } # "after DOCTYPE name state" in the spec
833 sub SYSTEM_STATE () { 43 } # "after DOCTYPE name state" in the spec
834 ## NOTE: "Entity data state", "entity in attribute value state", and
835 ## "consume a character reference" algorithm are jointly implemented
836 ## using the following six states:
837 sub ENTITY_STATE () { 44 }
838 sub ENTITY_HASH_STATE () { 45 }
839 sub NCR_NUM_STATE () { 46 }
840 sub HEXREF_X_STATE () { 47 }
841 sub HEXREF_HEX_STATE () { 48 }
842 sub ENTITY_NAME_STATE () { 49 }
843
844 sub DOCTYPE_TOKEN () { 1 }
845 sub COMMENT_TOKEN () { 2 }
846 sub START_TAG_TOKEN () { 3 }
847 sub END_TAG_TOKEN () { 4 }
848 sub END_OF_FILE_TOKEN () { 5 }
849 sub CHARACTER_TOKEN () { 6 }
850
851 sub AFTER_HTML_IMS () { 0b100 }
852 sub HEAD_IMS () { 0b1000 }
853 sub BODY_IMS () { 0b10000 }
854 sub BODY_TABLE_IMS () { 0b100000 }
855 sub TABLE_IMS () { 0b1000000 }
856 sub ROW_IMS () { 0b10000000 }
857 sub BODY_AFTER_IMS () { 0b100000000 }
858 sub FRAME_IMS () { 0b1000000000 }
859 sub SELECT_IMS () { 0b10000000000 }
860 sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 }
861 ## NOTE: "in foreign content" insertion mode is special; it is combined
862 ## with the secondary insertion mode. In this parser, they are stored
863 ## together in the bit-or'ed form.
864
865 ## NOTE: "initial" and "before html" insertion modes have no constants.
866
867 ## NOTE: "after after body" insertion mode.
868 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
869
870 ## NOTE: "after after frameset" insertion mode.
871 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
872
873 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
874 sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
875 sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
876 sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
877 sub IN_BODY_IM () { BODY_IMS }
878 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
879 sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
880 sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
881 sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
882 sub IN_TABLE_IM () { TABLE_IMS }
883 sub AFTER_BODY_IM () { BODY_AFTER_IMS }
884 sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
885 sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
886 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
887 sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
888 sub IN_COLUMN_GROUP_IM () { 0b10 }
889
890 ## Implementations MUST act as if state machine in the spec
891
892 sub _initialize_tokenizer ($) {
893 my $self = shift;
894 $self->{state} = DATA_STATE; # MUST
895 #$self->{state_keyword}; # initialized when used
896 #$self->{entity__value}; # initialized when used
897 #$self->{entity__match}; # initialized when used
898 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
899 undef $self->{current_token};
900 undef $self->{current_attribute};
901 undef $self->{last_emitted_start_tag_name};
902 #$self->{prev_state}; # initialized when used
903 delete $self->{self_closing};
904 # $self->{next_char}
905 !!!next-input-character;
906 $self->{token} = [];
907 # $self->{escape}
908 } # _initialize_tokenizer
909
910 ## A token has:
911 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
912 ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
913 ## ->{name} (DOCTYPE_TOKEN)
914 ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
915 ## ->{public_identifier} (DOCTYPE_TOKEN)
916 ## ->{system_identifier} (DOCTYPE_TOKEN)
917 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
918 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
919 ## ->{name}
920 ## ->{value}
921 ## ->{has_reference} == 1 or 0
922 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
923 ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
924 ## |->{self_closing}| is used to save the value of |$self->{self_closing}|
925 ## while the token is pushed back to the stack.
926
927 ## Emitted token MUST immediately be handled by the tree construction state.
928
929 ## Before each step, UA MAY check to see if either one of the scripts in
930 ## "list of scripts that will execute as soon as possible" or the first
931 ## script in the "list of scripts that will execute asynchronously",
932 ## has completed loading. If one has, then it MUST be executed
933 ## and removed from the list.
934
935 ## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.)
936 ## (This requirement was dropped from HTML5 spec, unfortunately.)
937
938 sub _get_next_token ($) {
939 my $self = shift;
940
941 if ($self->{self_closing}) {
942 !!!parse-error (type => 'nestc', token => $self->{current_token});
943 ## NOTE: The |self_closing| flag is only set by start tag token.
944 ## In addition, when a start tag token is emitted, it is always set to
945 ## |current_token|.
946 delete $self->{self_closing};
947 }
948
949 if (@{$self->{token}}) {
950 $self->{self_closing} = $self->{token}->[0]->{self_closing};
951 return shift @{$self->{token}};
952 }
953
954 A: {
955 if ($self->{state} == DATA_STATE) {
956 if ($self->{next_char} == 0x0026) { # &
957 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
958 not $self->{escape}) {
959 !!!cp (1);
960 ## NOTE: In the spec, the tokenizer is switched to the
961 ## "entity data state". In this implementation, the tokenizer
962 ## is switched to the |ENTITY_STATE|, which is an implementation
963 ## of the "consume a character reference" algorithm.
964 $self->{entity_additional} = -1;
965 $self->{prev_state} = DATA_STATE;
966 $self->{state} = ENTITY_STATE;
967 !!!next-input-character;
968 redo A;
969 } else {
970 !!!cp (2);
971 #
972 }
973 } elsif ($self->{next_char} == 0x002D) { # -
974 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
975 unless ($self->{escape}) {
976 if ($self->{prev_char}->[0] == 0x002D and # -
977 $self->{prev_char}->[1] == 0x0021 and # !
978 $self->{prev_char}->[2] == 0x003C) { # <
979 !!!cp (3);
980 $self->{escape} = 1;
981 } else {
982 !!!cp (4);
983 }
984 } else {
985 !!!cp (5);
986 }
987 }
988
989 #
990 } elsif ($self->{next_char} == 0x003C) { # <
991 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
992 (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
993 not $self->{escape})) {
994 !!!cp (6);
995 $self->{state} = TAG_OPEN_STATE;
996 !!!next-input-character;
997 redo A;
998 } else {
999 !!!cp (7);
1000 #
1001 }
1002 } elsif ($self->{next_char} == 0x003E) { # >
1003 if ($self->{escape} and
1004 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
1005 if ($self->{prev_char}->[0] == 0x002D and # -
1006 $self->{prev_char}->[1] == 0x002D) { # -
1007 !!!cp (8);
1008 delete $self->{escape};
1009 } else {
1010 !!!cp (9);
1011 }
1012 } else {
1013 !!!cp (10);
1014 }
1015
1016 #
1017 } elsif ($self->{next_char} == -1) {
1018 !!!cp (11);
1019 !!!emit ({type => END_OF_FILE_TOKEN,
1020 line => $self->{line}, column => $self->{column}});
1021 last A; ## TODO: ok?
1022 } else {
1023 !!!cp (12);
1024 }
1025 # Anything else
1026 my $token = {type => CHARACTER_TOKEN,
1027 data => chr $self->{next_char},
1028 line => $self->{line}, column => $self->{column},
1029 };
1030 $self->{read_until}->($token->{data}, q[-!<>&], length $token->{data});
1031
1032 ## Stay in the data state
1033 !!!next-input-character;
1034
1035 !!!emit ($token);
1036
1037 redo A;
1038 } elsif ($self->{state} == TAG_OPEN_STATE) {
1039 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1040 if ($self->{next_char} == 0x002F) { # /
1041 !!!cp (15);
1042 !!!next-input-character;
1043 $self->{state} = CLOSE_TAG_OPEN_STATE;
1044 redo A;
1045 } else {
1046 !!!cp (16);
1047 ## reconsume
1048 $self->{state} = DATA_STATE;
1049
1050 !!!emit ({type => CHARACTER_TOKEN, data => '<',
1051 line => $self->{line_prev},
1052 column => $self->{column_prev},
1053 });
1054
1055 redo A;
1056 }
1057 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
1058 if ($self->{next_char} == 0x0021) { # !
1059 !!!cp (17);
1060 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
1061 !!!next-input-character;
1062 redo A;
1063 } elsif ($self->{next_char} == 0x002F) { # /
1064 !!!cp (18);
1065 $self->{state} = CLOSE_TAG_OPEN_STATE;
1066 !!!next-input-character;
1067 redo A;
1068 } elsif (0x0041 <= $self->{next_char} and
1069 $self->{next_char} <= 0x005A) { # A..Z
1070 !!!cp (19);
1071 $self->{current_token}
1072 = {type => START_TAG_TOKEN,
1073 tag_name => chr ($self->{next_char} + 0x0020),
1074 line => $self->{line_prev},
1075 column => $self->{column_prev}};
1076 $self->{state} = TAG_NAME_STATE;
1077 !!!next-input-character;
1078 redo A;
1079 } elsif (0x0061 <= $self->{next_char} and
1080 $self->{next_char} <= 0x007A) { # a..z
1081 !!!cp (20);
1082 $self->{current_token} = {type => START_TAG_TOKEN,
1083 tag_name => chr ($self->{next_char}),
1084 line => $self->{line_prev},
1085 column => $self->{column_prev}};
1086 $self->{state} = TAG_NAME_STATE;
1087 !!!next-input-character;
1088 redo A;
1089 } elsif ($self->{next_char} == 0x003E) { # >
1090 !!!cp (21);
1091 !!!parse-error (type => 'empty start tag',
1092 line => $self->{line_prev},
1093 column => $self->{column_prev});
1094 $self->{state} = DATA_STATE;
1095 !!!next-input-character;
1096
1097 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
1098 line => $self->{line_prev},
1099 column => $self->{column_prev},
1100 });
1101
1102 redo A;
1103 } elsif ($self->{next_char} == 0x003F) { # ?
1104 !!!cp (22);
1105 !!!parse-error (type => 'pio',
1106 line => $self->{line_prev},
1107 column => $self->{column_prev});
1108 $self->{state} = BOGUS_COMMENT_STATE;
1109 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1110 line => $self->{line_prev},
1111 column => $self->{column_prev},
1112 };
1113 ## $self->{next_char} is intentionally left as is
1114 redo A;
1115 } else {
1116 !!!cp (23);
1117 !!!parse-error (type => 'bare stago',
1118 line => $self->{line_prev},
1119 column => $self->{column_prev});
1120 $self->{state} = DATA_STATE;
1121 ## reconsume
1122
1123 !!!emit ({type => CHARACTER_TOKEN, data => '<',
1124 line => $self->{line_prev},
1125 column => $self->{column_prev},
1126 });
1127
1128 redo A;
1129 }
1130 } else {
1131 die "$0: $self->{content_model} in tag open";
1132 }
1133 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
1134 ## NOTE: The "close tag open state" in the spec is implemented as
1135 ## |CLOSE_TAG_OPEN_STATE| and |CDATA_PCDATA_CLOSE_TAG_STATE|.
1136
1137 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
1138 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1139 if (defined $self->{last_emitted_start_tag_name}) {
1140 $self->{state} = CDATA_PCDATA_CLOSE_TAG_STATE;
1141 $self->{state_keyword} = '';
1142 ## Reconsume.
1143 redo A;
1144 } else {
1145 ## No start tag token has ever been emitted
1146 ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
1147 !!!cp (28);
1148 $self->{state} = DATA_STATE;
1149 ## Reconsume.
1150 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1151 line => $l, column => $c,
1152 });
1153 redo A;
1154 }
1155 }
1156
1157 if (0x0041 <= $self->{next_char} and
1158 $self->{next_char} <= 0x005A) { # A..Z
1159 !!!cp (29);
1160 $self->{current_token}
1161 = {type => END_TAG_TOKEN,
1162 tag_name => chr ($self->{next_char} + 0x0020),
1163 line => $l, column => $c};
1164 $self->{state} = TAG_NAME_STATE;
1165 !!!next-input-character;
1166 redo A;
1167 } elsif (0x0061 <= $self->{next_char} and
1168 $self->{next_char} <= 0x007A) { # a..z
1169 !!!cp (30);
1170 $self->{current_token} = {type => END_TAG_TOKEN,
1171 tag_name => chr ($self->{next_char}),
1172 line => $l, column => $c};
1173 $self->{state} = TAG_NAME_STATE;
1174 !!!next-input-character;
1175 redo A;
1176 } elsif ($self->{next_char} == 0x003E) { # >
1177 !!!cp (31);
1178 !!!parse-error (type => 'empty end tag',
1179 line => $self->{line_prev}, ## "<" in "</>"
1180 column => $self->{column_prev} - 1);
1181 $self->{state} = DATA_STATE;
1182 !!!next-input-character;
1183 redo A;
1184 } elsif ($self->{next_char} == -1) {
1185 !!!cp (32);
1186 !!!parse-error (type => 'bare etago');
1187 $self->{state} = DATA_STATE;
1188 # reconsume
1189
1190 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1191 line => $l, column => $c,
1192 });
1193
1194 redo A;
1195 } else {
1196 !!!cp (33);
1197 !!!parse-error (type => 'bogus end tag');
1198 $self->{state} = BOGUS_COMMENT_STATE;
1199 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1200 line => $self->{line_prev}, # "<" of "</"
1201 column => $self->{column_prev} - 1,
1202 };
1203 ## NOTE: $self->{next_char} is intentionally left as is.
1204 ## Although the "anything else" case of the spec not explicitly
1205 ## states that the next input character is to be reconsumed,
1206 ## it will be included to the |data| of the comment token
1207 ## generated from the bogus end tag, as defined in the
1208 ## "bogus comment state" entry.
1209 redo A;
1210 }
1211 } elsif ($self->{state} == CDATA_PCDATA_CLOSE_TAG_STATE) {
1212 my $ch = substr $self->{last_emitted_start_tag_name}, length $self->{state_keyword}, 1;
1213 if (length $ch) {
1214 my $CH = $ch;
1215 $ch =~ tr/a-z/A-Z/;
1216 my $nch = chr $self->{next_char};
1217 if ($nch eq $ch or $nch eq $CH) {
1218 !!!cp (24);
1219 ## Stay in the state.
1220 $self->{state_keyword} .= $nch;
1221 !!!next-input-character;
1222 redo A;
1223 } else {
1224 !!!cp (25);
1225 $self->{state} = DATA_STATE;
1226 ## Reconsume.
1227 !!!emit ({type => CHARACTER_TOKEN,
1228 data => '</' . $self->{state_keyword},
1229 line => $self->{line_prev},
1230 column => $self->{column_prev} - 1 - length $self->{state_keyword},
1231 });
1232 redo A;
1233 }
1234 } else { # after "<{tag-name}"
1235 unless ({
1236 0x0009 => 1, # HT
1237 0x000A => 1, # LF
1238 0x000B => 1, # VT
1239 0x000C => 1, # FF
1240 0x0020 => 1, # SP
1241 0x003E => 1, # >
1242 0x002F => 1, # /
1243 -1 => 1, # EOF
1244 }->{$self->{next_char}}) {
1245 !!!cp (26);
1246 ## Reconsume.
1247 $self->{state} = DATA_STATE;
1248 !!!emit ({type => CHARACTER_TOKEN,
1249 data => '</' . $self->{state_keyword},
1250 line => $self->{line_prev},
1251 column => $self->{column_prev} - 1 - length $self->{state_keyword},
1252 });
1253 redo A;
1254 } else {
1255 !!!cp (27);
1256 $self->{current_token}
1257 = {type => END_TAG_TOKEN,
1258 tag_name => $self->{last_emitted_start_tag_name},
1259 line => $self->{line_prev},
1260 column => $self->{column_prev} - 1 - length $self->{state_keyword}};
1261 $self->{state} = TAG_NAME_STATE;
1262 ## Reconsume.
1263 redo A;
1264 }
1265 }
1266 } elsif ($self->{state} == TAG_NAME_STATE) {
1267 if ($self->{next_char} == 0x0009 or # HT
1268 $self->{next_char} == 0x000A or # LF
1269 $self->{next_char} == 0x000B or # VT
1270 $self->{next_char} == 0x000C or # FF
1271 $self->{next_char} == 0x0020) { # SP
1272 !!!cp (34);
1273 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1274 !!!next-input-character;
1275 redo A;
1276 } elsif ($self->{next_char} == 0x003E) { # >
1277 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1278 !!!cp (35);
1279 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1280 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1281 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1282 #if ($self->{current_token}->{attributes}) {
1283 # ## NOTE: This should never be reached.
1284 # !!! cp (36);
1285 # !!! parse-error (type => 'end tag attribute');
1286 #} else {
1287 !!!cp (37);
1288 #}
1289 } else {
1290 die "$0: $self->{current_token}->{type}: Unknown token type";
1291 }
1292 $self->{state} = DATA_STATE;
1293 !!!next-input-character;
1294
1295 !!!emit ($self->{current_token}); # start tag or end tag
1296
1297 redo A;
1298 } elsif (0x0041 <= $self->{next_char} and
1299 $self->{next_char} <= 0x005A) { # A..Z
1300 !!!cp (38);
1301 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
1302 # start tag or end tag
1303 ## Stay in this state
1304 !!!next-input-character;
1305 redo A;
1306 } elsif ($self->{next_char} == -1) {
1307 !!!parse-error (type => 'unclosed tag');
1308 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1309 !!!cp (39);
1310 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1311 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1312 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1313 #if ($self->{current_token}->{attributes}) {
1314 # ## NOTE: This state should never be reached.
1315 # !!! cp (40);
1316 # !!! parse-error (type => 'end tag attribute');
1317 #} else {
1318 !!!cp (41);
1319 #}
1320 } else {
1321 die "$0: $self->{current_token}->{type}: Unknown token type";
1322 }
1323 $self->{state} = DATA_STATE;
1324 # reconsume
1325
1326 !!!emit ($self->{current_token}); # start tag or end tag
1327
1328 redo A;
1329 } elsif ($self->{next_char} == 0x002F) { # /
1330 !!!cp (42);
1331 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1332 !!!next-input-character;
1333 redo A;
1334 } else {
1335 !!!cp (44);
1336 $self->{current_token}->{tag_name} .= chr $self->{next_char};
1337 # start tag or end tag
1338 ## Stay in the state
1339 !!!next-input-character;
1340 redo A;
1341 }
1342 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1343 if ($self->{next_char} == 0x0009 or # HT
1344 $self->{next_char} == 0x000A or # LF
1345 $self->{next_char} == 0x000B or # VT
1346 $self->{next_char} == 0x000C or # FF
1347 $self->{next_char} == 0x0020) { # SP
1348 !!!cp (45);
1349 ## Stay in the state
1350 !!!next-input-character;
1351 redo A;
1352 } elsif ($self->{next_char} == 0x003E) { # >
1353 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1354 !!!cp (46);
1355 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1356 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1357 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1358 if ($self->{current_token}->{attributes}) {
1359 !!!cp (47);
1360 !!!parse-error (type => 'end tag attribute');
1361 } else {
1362 !!!cp (48);
1363 }
1364 } else {
1365 die "$0: $self->{current_token}->{type}: Unknown token type";
1366 }
1367 $self->{state} = DATA_STATE;
1368 !!!next-input-character;
1369
1370 !!!emit ($self->{current_token}); # start tag or end tag
1371
1372 redo A;
1373 } elsif (0x0041 <= $self->{next_char} and
1374 $self->{next_char} <= 0x005A) { # A..Z
1375 !!!cp (49);
1376 $self->{current_attribute}
1377 = {name => chr ($self->{next_char} + 0x0020),
1378 value => '',
1379 line => $self->{line}, column => $self->{column}};
1380 $self->{state} = ATTRIBUTE_NAME_STATE;
1381 !!!next-input-character;
1382 redo A;
1383 } elsif ($self->{next_char} == 0x002F) { # /
1384 !!!cp (50);
1385 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1386 !!!next-input-character;
1387 redo A;
1388 } elsif ($self->{next_char} == -1) {
1389 !!!parse-error (type => 'unclosed tag');
1390 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1391 !!!cp (52);
1392 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1393 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1394 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1395 if ($self->{current_token}->{attributes}) {
1396 !!!cp (53);
1397 !!!parse-error (type => 'end tag attribute');
1398 } else {
1399 !!!cp (54);
1400 }
1401 } else {
1402 die "$0: $self->{current_token}->{type}: Unknown token type";
1403 }
1404 $self->{state} = DATA_STATE;
1405 # reconsume
1406
1407 !!!emit ($self->{current_token}); # start tag or end tag
1408
1409 redo A;
1410 } else {
1411 if ({
1412 0x0022 => 1, # "
1413 0x0027 => 1, # '
1414 0x003D => 1, # =
1415 }->{$self->{next_char}}) {
1416 !!!cp (55);
1417 !!!parse-error (type => 'bad attribute name');
1418 } else {
1419 !!!cp (56);
1420 }
1421 $self->{current_attribute}
1422 = {name => chr ($self->{next_char}),
1423 value => '',
1424 line => $self->{line}, column => $self->{column}};
1425 $self->{state} = ATTRIBUTE_NAME_STATE;
1426 !!!next-input-character;
1427 redo A;
1428 }
1429 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1430 my $before_leave = sub {
1431 if (exists $self->{current_token}->{attributes} # start tag or end tag
1432 ->{$self->{current_attribute}->{name}}) { # MUST
1433 !!!cp (57);
1434 !!!parse-error (type => 'duplicate attribute', text => $self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
1435 ## Discard $self->{current_attribute} # MUST
1436 } else {
1437 !!!cp (58);
1438 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
1439 = $self->{current_attribute};
1440 }
1441 }; # $before_leave
1442
1443 if ($self->{next_char} == 0x0009 or # HT
1444 $self->{next_char} == 0x000A or # LF
1445 $self->{next_char} == 0x000B or # VT
1446 $self->{next_char} == 0x000C or # FF
1447 $self->{next_char} == 0x0020) { # SP
1448 !!!cp (59);
1449 $before_leave->();
1450 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
1451 !!!next-input-character;
1452 redo A;
1453 } elsif ($self->{next_char} == 0x003D) { # =
1454 !!!cp (60);
1455 $before_leave->();
1456 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1457 !!!next-input-character;
1458 redo A;
1459 } elsif ($self->{next_char} == 0x003E) { # >
1460 $before_leave->();
1461 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1462 !!!cp (61);
1463 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1464 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1465 !!!cp (62);
1466 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1467 if ($self->{current_token}->{attributes}) {
1468 !!!parse-error (type => 'end tag attribute');
1469 }
1470 } else {
1471 die "$0: $self->{current_token}->{type}: Unknown token type";
1472 }
1473 $self->{state} = DATA_STATE;
1474 !!!next-input-character;
1475
1476 !!!emit ($self->{current_token}); # start tag or end tag
1477
1478 redo A;
1479 } elsif (0x0041 <= $self->{next_char} and
1480 $self->{next_char} <= 0x005A) { # A..Z
1481 !!!cp (63);
1482 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
1483 ## Stay in the state
1484 !!!next-input-character;
1485 redo A;
1486 } elsif ($self->{next_char} == 0x002F) { # /
1487 !!!cp (64);
1488 $before_leave->();
1489 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1490 !!!next-input-character;
1491 redo A;
1492 } elsif ($self->{next_char} == -1) {
1493 !!!parse-error (type => 'unclosed tag');
1494 $before_leave->();
1495 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1496 !!!cp (66);
1497 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1498 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1499 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1500 if ($self->{current_token}->{attributes}) {
1501 !!!cp (67);
1502 !!!parse-error (type => 'end tag attribute');
1503 } else {
1504 ## NOTE: This state should never be reached.
1505 !!!cp (68);
1506 }
1507 } else {
1508 die "$0: $self->{current_token}->{type}: Unknown token type";
1509 }
1510 $self->{state} = DATA_STATE;
1511 # reconsume
1512
1513 !!!emit ($self->{current_token}); # start tag or end tag
1514
1515 redo A;
1516 } else {
1517 if ($self->{next_char} == 0x0022 or # "
1518 $self->{next_char} == 0x0027) { # '
1519 !!!cp (69);
1520 !!!parse-error (type => 'bad attribute name');
1521 } else {
1522 !!!cp (70);
1523 }
1524 $self->{current_attribute}->{name} .= chr ($self->{next_char});
1525 ## Stay in the state
1526 !!!next-input-character;
1527 redo A;
1528 }
1529 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1530 if ($self->{next_char} == 0x0009 or # HT
1531 $self->{next_char} == 0x000A or # LF
1532 $self->{next_char} == 0x000B or # VT
1533 $self->{next_char} == 0x000C or # FF
1534 $self->{next_char} == 0x0020) { # SP
1535 !!!cp (71);
1536 ## Stay in the state
1537 !!!next-input-character;
1538 redo A;
1539 } elsif ($self->{next_char} == 0x003D) { # =
1540 !!!cp (72);
1541 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1542 !!!next-input-character;
1543 redo A;
1544 } elsif ($self->{next_char} == 0x003E) { # >
1545 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1546 !!!cp (73);
1547 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1548 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1549 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1550 if ($self->{current_token}->{attributes}) {
1551 !!!cp (74);
1552 !!!parse-error (type => 'end tag attribute');
1553 } else {
1554 ## NOTE: This state should never be reached.
1555 !!!cp (75);
1556 }
1557 } else {
1558 die "$0: $self->{current_token}->{type}: Unknown token type";
1559 }
1560 $self->{state} = DATA_STATE;
1561 !!!next-input-character;
1562
1563 !!!emit ($self->{current_token}); # start tag or end tag
1564
1565 redo A;
1566 } elsif (0x0041 <= $self->{next_char} and
1567 $self->{next_char} <= 0x005A) { # A..Z
1568 !!!cp (76);
1569 $self->{current_attribute}
1570 = {name => chr ($self->{next_char} + 0x0020),
1571 value => '',
1572 line => $self->{line}, column => $self->{column}};
1573 $self->{state} = ATTRIBUTE_NAME_STATE;
1574 !!!next-input-character;
1575 redo A;
1576 } elsif ($self->{next_char} == 0x002F) { # /
1577 !!!cp (77);
1578 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1579 !!!next-input-character;
1580 redo A;
1581 } elsif ($self->{next_char} == -1) {
1582 !!!parse-error (type => 'unclosed tag');
1583 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1584 !!!cp (79);
1585 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1586 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1587 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1588 if ($self->{current_token}->{attributes}) {
1589 !!!cp (80);
1590 !!!parse-error (type => 'end tag attribute');
1591 } else {
1592 ## NOTE: This state should never be reached.
1593 !!!cp (81);
1594 }
1595 } else {
1596 die "$0: $self->{current_token}->{type}: Unknown token type";
1597 }
1598 $self->{state} = DATA_STATE;
1599 # reconsume
1600
1601 !!!emit ($self->{current_token}); # start tag or end tag
1602
1603 redo A;
1604 } else {
1605 if ($self->{next_char} == 0x0022 or # "
1606 $self->{next_char} == 0x0027) { # '
1607 !!!cp (78);
1608 !!!parse-error (type => 'bad attribute name');
1609 } else {
1610 !!!cp (82);
1611 }
1612 $self->{current_attribute}
1613 = {name => chr ($self->{next_char}),
1614 value => '',
1615 line => $self->{line}, column => $self->{column}};
1616 $self->{state} = ATTRIBUTE_NAME_STATE;
1617 !!!next-input-character;
1618 redo A;
1619 }
1620 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1621 if ($self->{next_char} == 0x0009 or # HT
1622 $self->{next_char} == 0x000A or # LF
1623 $self->{next_char} == 0x000B or # VT
1624 $self->{next_char} == 0x000C or # FF
1625 $self->{next_char} == 0x0020) { # SP
1626 !!!cp (83);
1627 ## Stay in the state
1628 !!!next-input-character;
1629 redo A;
1630 } elsif ($self->{next_char} == 0x0022) { # "
1631 !!!cp (84);
1632 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1633 !!!next-input-character;
1634 redo A;
1635 } elsif ($self->{next_char} == 0x0026) { # &
1636 !!!cp (85);
1637 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1638 ## reconsume
1639 redo A;
1640 } elsif ($self->{next_char} == 0x0027) { # '
1641 !!!cp (86);
1642 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1643 !!!next-input-character;
1644 redo A;
1645 } elsif ($self->{next_char} == 0x003E) { # >
1646 !!!parse-error (type => 'empty unquoted attribute value');
1647 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1648 !!!cp (87);
1649 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1650 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1651 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1652 if ($self->{current_token}->{attributes}) {
1653 !!!cp (88);
1654 !!!parse-error (type => 'end tag attribute');
1655 } else {
1656 ## NOTE: This state should never be reached.
1657 !!!cp (89);
1658 }
1659 } else {
1660 die "$0: $self->{current_token}->{type}: Unknown token type";
1661 }
1662 $self->{state} = DATA_STATE;
1663 !!!next-input-character;
1664
1665 !!!emit ($self->{current_token}); # start tag or end tag
1666
1667 redo A;
1668 } elsif ($self->{next_char} == -1) {
1669 !!!parse-error (type => 'unclosed tag');
1670 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1671 !!!cp (90);
1672 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1673 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1674 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1675 if ($self->{current_token}->{attributes}) {
1676 !!!cp (91);
1677 !!!parse-error (type => 'end tag attribute');
1678 } else {
1679 ## NOTE: This state should never be reached.
1680 !!!cp (92);
1681 }
1682 } else {
1683 die "$0: $self->{current_token}->{type}: Unknown token type";
1684 }
1685 $self->{state} = DATA_STATE;
1686 ## reconsume
1687
1688 !!!emit ($self->{current_token}); # start tag or end tag
1689
1690 redo A;
1691 } else {
1692 if ($self->{next_char} == 0x003D) { # =
1693 !!!cp (93);
1694 !!!parse-error (type => 'bad attribute value');
1695 } else {
1696 !!!cp (94);
1697 }
1698 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1699 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1700 !!!next-input-character;
1701 redo A;
1702 }
1703 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1704 if ($self->{next_char} == 0x0022) { # "
1705 !!!cp (95);
1706 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1707 !!!next-input-character;
1708 redo A;
1709 } elsif ($self->{next_char} == 0x0026) { # &
1710 !!!cp (96);
1711 ## NOTE: In the spec, the tokenizer is switched to the
1712 ## "entity in attribute value state". In this implementation, the
1713 ## tokenizer is switched to the |ENTITY_STATE|, which is an
1714 ## implementation of the "consume a character reference" algorithm.
1715 $self->{prev_state} = $self->{state};
1716 $self->{entity_additional} = 0x0022; # "
1717 $self->{state} = ENTITY_STATE;
1718 !!!next-input-character;
1719 redo A;
1720 } elsif ($self->{next_char} == -1) {
1721 !!!parse-error (type => 'unclosed attribute value');
1722 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1723 !!!cp (97);
1724 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1725 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1726 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1727 if ($self->{current_token}->{attributes}) {
1728 !!!cp (98);
1729 !!!parse-error (type => 'end tag attribute');
1730 } else {
1731 ## NOTE: This state should never be reached.
1732 !!!cp (99);
1733 }
1734 } else {
1735 die "$0: $self->{current_token}->{type}: Unknown token type";
1736 }
1737 $self->{state} = DATA_STATE;
1738 ## reconsume
1739
1740 !!!emit ($self->{current_token}); # start tag or end tag
1741
1742 redo A;
1743 } else {
1744 !!!cp (100);
1745 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1746 $self->{read_until}->($self->{current_attribute}->{value},
1747 q["&],
1748 length $self->{current_attribute}->{value});
1749
1750 ## Stay in the state
1751 !!!next-input-character;
1752 redo A;
1753 }
1754 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1755 if ($self->{next_char} == 0x0027) { # '
1756 !!!cp (101);
1757 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1758 !!!next-input-character;
1759 redo A;
1760 } elsif ($self->{next_char} == 0x0026) { # &
1761 !!!cp (102);
1762 ## NOTE: In the spec, the tokenizer is switched to the
1763 ## "entity in attribute value state". In this implementation, the
1764 ## tokenizer is switched to the |ENTITY_STATE|, which is an
1765 ## implementation of the "consume a character reference" algorithm.
1766 $self->{entity_additional} = 0x0027; # '
1767 $self->{prev_state} = $self->{state};
1768 $self->{state} = ENTITY_STATE;
1769 !!!next-input-character;
1770 redo A;
1771 } elsif ($self->{next_char} == -1) {
1772 !!!parse-error (type => 'unclosed attribute value');
1773 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1774 !!!cp (103);
1775 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1776 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1777 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1778 if ($self->{current_token}->{attributes}) {
1779 !!!cp (104);
1780 !!!parse-error (type => 'end tag attribute');
1781 } else {
1782 ## NOTE: This state should never be reached.
1783 !!!cp (105);
1784 }
1785 } else {
1786 die "$0: $self->{current_token}->{type}: Unknown token type";
1787 }
1788 $self->{state} = DATA_STATE;
1789 ## reconsume
1790
1791 !!!emit ($self->{current_token}); # start tag or end tag
1792
1793 redo A;
1794 } else {
1795 !!!cp (106);
1796 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1797 $self->{read_until}->($self->{current_attribute}->{value},
1798 q['&],
1799 length $self->{current_attribute}->{value});
1800
1801 ## Stay in the state
1802 !!!next-input-character;
1803 redo A;
1804 }
1805 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1806 if ($self->{next_char} == 0x0009 or # HT
1807 $self->{next_char} == 0x000A or # LF
1808 $self->{next_char} == 0x000B or # HT
1809 $self->{next_char} == 0x000C or # FF
1810 $self->{next_char} == 0x0020) { # SP
1811 !!!cp (107);
1812 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1813 !!!next-input-character;
1814 redo A;
1815 } elsif ($self->{next_char} == 0x0026) { # &
1816 !!!cp (108);
1817 ## NOTE: In the spec, the tokenizer is switched to the
1818 ## "entity in attribute value state". In this implementation, the
1819 ## tokenizer is switched to the |ENTITY_STATE|, which is an
1820 ## implementation of the "consume a character reference" algorithm.
1821 $self->{entity_additional} = -1;
1822 $self->{prev_state} = $self->{state};
1823 $self->{state} = ENTITY_STATE;
1824 !!!next-input-character;
1825 redo A;
1826 } elsif ($self->{next_char} == 0x003E) { # >
1827 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1828 !!!cp (109);
1829 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1830 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1831 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1832 if ($self->{current_token}->{attributes}) {
1833 !!!cp (110);
1834 !!!parse-error (type => 'end tag attribute');
1835 } else {
1836 ## NOTE: This state should never be reached.
1837 !!!cp (111);
1838 }
1839 } else {
1840 die "$0: $self->{current_token}->{type}: Unknown token type";
1841 }
1842 $self->{state} = DATA_STATE;
1843 !!!next-input-character;
1844
1845 !!!emit ($self->{current_token}); # start tag or end tag
1846
1847 redo A;
1848 } elsif ($self->{next_char} == -1) {
1849 !!!parse-error (type => 'unclosed tag');
1850 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1851 !!!cp (112);
1852 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1853 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1854 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1855 if ($self->{current_token}->{attributes}) {
1856 !!!cp (113);
1857 !!!parse-error (type => 'end tag attribute');
1858 } else {
1859 ## NOTE: This state should never be reached.
1860 !!!cp (114);
1861 }
1862 } else {
1863 die "$0: $self->{current_token}->{type}: Unknown token type";
1864 }
1865 $self->{state} = DATA_STATE;
1866 ## reconsume
1867
1868 !!!emit ($self->{current_token}); # start tag or end tag
1869
1870 redo A;
1871 } else {
1872 if ({
1873 0x0022 => 1, # "
1874 0x0027 => 1, # '
1875 0x003D => 1, # =
1876 }->{$self->{next_char}}) {
1877 !!!cp (115);
1878 !!!parse-error (type => 'bad attribute value');
1879 } else {
1880 !!!cp (116);
1881 }
1882 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1883 $self->{read_until}->($self->{current_attribute}->{value},
1884 q["'=& >],
1885 length $self->{current_attribute}->{value});
1886
1887 ## Stay in the state
1888 !!!next-input-character;
1889 redo A;
1890 }
1891 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1892 if ($self->{next_char} == 0x0009 or # HT
1893 $self->{next_char} == 0x000A or # LF
1894 $self->{next_char} == 0x000B or # VT
1895 $self->{next_char} == 0x000C or # FF
1896 $self->{next_char} == 0x0020) { # SP
1897 !!!cp (118);
1898 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1899 !!!next-input-character;
1900 redo A;
1901 } elsif ($self->{next_char} == 0x003E) { # >
1902 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1903 !!!cp (119);
1904 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1905 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1906 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1907 if ($self->{current_token}->{attributes}) {
1908 !!!cp (120);
1909 !!!parse-error (type => 'end tag attribute');
1910 } else {
1911 ## NOTE: This state should never be reached.
1912 !!!cp (121);
1913 }
1914 } else {
1915 die "$0: $self->{current_token}->{type}: Unknown token type";
1916 }
1917 $self->{state} = DATA_STATE;
1918 !!!next-input-character;
1919
1920 !!!emit ($self->{current_token}); # start tag or end tag
1921
1922 redo A;
1923 } elsif ($self->{next_char} == 0x002F) { # /
1924 !!!cp (122);
1925 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1926 !!!next-input-character;
1927 redo A;
1928 } elsif ($self->{next_char} == -1) {
1929 !!!parse-error (type => 'unclosed tag');
1930 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1931 !!!cp (122.3);
1932 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1933 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1934 if ($self->{current_token}->{attributes}) {
1935 !!!cp (122.1);
1936 !!!parse-error (type => 'end tag attribute');
1937 } else {
1938 ## NOTE: This state should never be reached.
1939 !!!cp (122.2);
1940 }
1941 } else {
1942 die "$0: $self->{current_token}->{type}: Unknown token type";
1943 }
1944 $self->{state} = DATA_STATE;
1945 ## Reconsume.
1946 !!!emit ($self->{current_token}); # start tag or end tag
1947 redo A;
1948 } else {
1949 !!!cp ('124.1');
1950 !!!parse-error (type => 'no space between attributes');
1951 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1952 ## reconsume
1953 redo A;
1954 }
1955 } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
1956 if ($self->{next_char} == 0x003E) { # >
1957 if ($self->{current_token}->{type} == END_TAG_TOKEN) {
1958 !!!cp ('124.2');
1959 !!!parse-error (type => 'nestc', token => $self->{current_token});
1960 ## TODO: Different type than slash in start tag
1961 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1962 if ($self->{current_token}->{attributes}) {
1963 !!!cp ('124.4');
1964 !!!parse-error (type => 'end tag attribute');
1965 } else {
1966 !!!cp ('124.5');
1967 }
1968 ## TODO: Test |<title></title/>|
1969 } else {
1970 !!!cp ('124.3');
1971 $self->{self_closing} = 1;
1972 }
1973
1974 $self->{state} = DATA_STATE;
1975 !!!next-input-character;
1976
1977 !!!emit ($self->{current_token}); # start tag or end tag
1978
1979 redo A;
1980 } elsif ($self->{next_char} == -1) {
1981 !!!parse-error (type => 'unclosed tag');
1982 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1983 !!!cp (124.7);
1984 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1985 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1986 if ($self->{current_token}->{attributes}) {
1987 !!!cp (124.5);
1988 !!!parse-error (type => 'end tag attribute');
1989 } else {
1990 ## NOTE: This state should never be reached.
1991 !!!cp (124.6);
1992 }
1993 } else {
1994 die "$0: $self->{current_token}->{type}: Unknown token type";
1995 }
1996 $self->{state} = DATA_STATE;
1997 ## Reconsume.
1998 !!!emit ($self->{current_token}); # start tag or end tag
1999 redo A;
2000 } else {
2001 !!!cp ('124.4');
2002 !!!parse-error (type => 'nestc');
2003 ## TODO: This error type is wrong.
2004 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2005 ## Reconsume.
2006 redo A;
2007 }
2008 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2009 ## (only happen if PCDATA state)
2010
2011 ## NOTE: Unlike spec's "bogus comment state", this implementation
2012 ## consumes characters one-by-one basis.
2013
2014 if ($self->{next_char} == 0x003E) { # >
2015 !!!cp (124);
2016 $self->{state} = DATA_STATE;
2017 !!!next-input-character;
2018
2019 !!!emit ($self->{current_token}); # comment
2020 redo A;
2021 } elsif ($self->{next_char} == -1) {
2022 !!!cp (125);
2023 $self->{state} = DATA_STATE;
2024 ## reconsume
2025
2026 !!!emit ($self->{current_token}); # comment
2027 redo A;
2028 } else {
2029 !!!cp (126);
2030 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
2031 $self->{read_until}->($self->{current_token}->{data},
2032 q[>],
2033 length $self->{current_token}->{data});
2034
2035 ## Stay in the state.
2036 !!!next-input-character;
2037 redo A;
2038 }
2039 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2040 ## (only happen if PCDATA state)
2041
2042 if ($self->{next_char} == 0x002D) { # -
2043 !!!cp (133);
2044 $self->{state} = MD_HYPHEN_STATE;
2045 !!!next-input-character;
2046 redo A;
2047 } elsif ($self->{next_char} == 0x0044 or # D
2048 $self->{next_char} == 0x0064) { # d
2049 ## ASCII case-insensitive.
2050 !!!cp (130);
2051 $self->{state} = MD_DOCTYPE_STATE;
2052 $self->{state_keyword} = chr $self->{next_char};
2053 !!!next-input-character;
2054 redo A;
2055 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
2056 $self->{open_elements}->[-1]->[1] & FOREIGN_EL and
2057 $self->{next_char} == 0x005B) { # [
2058 !!!cp (135.4);
2059 $self->{state} = MD_CDATA_STATE;
2060 $self->{state_keyword} = '[';
2061 !!!next-input-character;
2062 redo A;
2063 } else {
2064 !!!cp (136);
2065 }
2066
2067 !!!parse-error (type => 'bogus comment',
2068 line => $self->{line_prev},
2069 column => $self->{column_prev} - 1);
2070 ## Reconsume.
2071 $self->{state} = BOGUS_COMMENT_STATE;
2072 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
2073 line => $self->{line_prev},
2074 column => $self->{column_prev} - 1,
2075 };
2076 redo A;
2077 } elsif ($self->{state} == MD_HYPHEN_STATE) {
2078 if ($self->{next_char} == 0x002D) { # -
2079 !!!cp (127);
2080 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
2081 line => $self->{line_prev},
2082 column => $self->{column_prev} - 2,
2083 };
2084 $self->{state} = COMMENT_START_STATE;
2085 !!!next-input-character;
2086 redo A;
2087 } else {
2088 !!!cp (128);
2089 !!!parse-error (type => 'bogus comment',
2090 line => $self->{line_prev},
2091 column => $self->{column_prev} - 2);
2092 $self->{state} = BOGUS_COMMENT_STATE;
2093 ## Reconsume.
2094 $self->{current_token} = {type => COMMENT_TOKEN,
2095 data => '-',
2096 line => $self->{line_prev},
2097 column => $self->{column_prev} - 2,
2098 };
2099 redo A;
2100 }
2101 } elsif ($self->{state} == MD_DOCTYPE_STATE) {
2102 ## ASCII case-insensitive.
2103 if ($self->{next_char} == [
2104 undef,
2105 0x004F, # O
2106 0x0043, # C
2107 0x0054, # T
2108 0x0059, # Y
2109 0x0050, # P
2110 ]->[length $self->{state_keyword}] or
2111 $self->{next_char} == [
2112 undef,
2113 0x006F, # o
2114 0x0063, # c
2115 0x0074, # t
2116 0x0079, # y
2117 0x0070, # p
2118 ]->[length $self->{state_keyword}]) {
2119 !!!cp (131);
2120 ## Stay in the state.
2121 $self->{state_keyword} .= chr $self->{next_char};
2122 !!!next-input-character;
2123 redo A;
2124 } elsif ((length $self->{state_keyword}) == 6 and
2125 ($self->{next_char} == 0x0045 or # E
2126 $self->{next_char} == 0x0065)) { # e
2127 !!!cp (129);
2128 $self->{state} = DOCTYPE_STATE;
2129 $self->{current_token} = {type => DOCTYPE_TOKEN,
2130 quirks => 1,
2131 line => $self->{line_prev},
2132 column => $self->{column_prev} - 7,
2133 };
2134 !!!next-input-character;
2135 redo A;
2136 } else {
2137 !!!cp (132);
2138 !!!parse-error (type => 'bogus comment',
2139 line => $self->{line_prev},
2140 column => $self->{column_prev} - 1 - length $self->{state_keyword});
2141 $self->{state} = BOGUS_COMMENT_STATE;
2142 ## Reconsume.
2143 $self->{current_token} = {type => COMMENT_TOKEN,
2144 data => $self->{state_keyword},
2145 line => $self->{line_prev},
2146 column => $self->{column_prev} - 1 - length $self->{state_keyword},
2147 };
2148 redo A;
2149 }
2150 } elsif ($self->{state} == MD_CDATA_STATE) {
2151 if ($self->{next_char} == {
2152 '[' => 0x0043, # C
2153 '[C' => 0x0044, # D
2154 '[CD' => 0x0041, # A
2155 '[CDA' => 0x0054, # T
2156 '[CDAT' => 0x0041, # A
2157 }->{$self->{state_keyword}}) {
2158 !!!cp (135.1);
2159 ## Stay in the state.
2160 $self->{state_keyword} .= chr $self->{next_char};
2161 !!!next-input-character;
2162 redo A;
2163 } elsif ($self->{state_keyword} eq '[CDATA' and
2164 $self->{next_char} == 0x005B) { # [
2165 !!!cp (135.2);
2166 $self->{current_token} = {type => CHARACTER_TOKEN,
2167 data => '',
2168 line => $self->{line_prev},
2169 column => $self->{column_prev} - 7};
2170 $self->{state} = CDATA_SECTION_STATE;
2171 !!!next-input-character;
2172 redo A;
2173 } else {
2174 !!!cp (135.3);
2175 !!!parse-error (type => 'bogus comment',
2176 line => $self->{line_prev},
2177 column => $self->{column_prev} - 1 - length $self->{state_keyword});
2178 $self->{state} = BOGUS_COMMENT_STATE;
2179 ## Reconsume.
2180 $self->{current_token} = {type => COMMENT_TOKEN,
2181 data => $self->{state_keyword},
2182 line => $self->{line_prev},
2183 column => $self->{column_prev} - 1 - length $self->{state_keyword},
2184 };
2185 redo A;
2186 }
2187 } elsif ($self->{state} == COMMENT_START_STATE) {
2188 if ($self->{next_char} == 0x002D) { # -
2189 !!!cp (137);
2190 $self->{state} = COMMENT_START_DASH_STATE;
2191 !!!next-input-character;
2192 redo A;
2193 } elsif ($self->{next_char} == 0x003E) { # >
2194 !!!cp (138);
2195 !!!parse-error (type => 'bogus comment');
2196 $self->{state} = DATA_STATE;
2197 !!!next-input-character;
2198
2199 !!!emit ($self->{current_token}); # comment
2200
2201 redo A;
2202 } elsif ($self->{next_char} == -1) {
2203 !!!cp (139);
2204 !!!parse-error (type => 'unclosed comment');
2205 $self->{state} = DATA_STATE;
2206 ## reconsume
2207
2208 !!!emit ($self->{current_token}); # comment
2209
2210 redo A;
2211 } else {
2212 !!!cp (140);
2213 $self->{current_token}->{data} # comment
2214 .= chr ($self->{next_char});
2215 $self->{state} = COMMENT_STATE;
2216 !!!next-input-character;
2217 redo A;
2218 }
2219 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
2220 if ($self->{next_char} == 0x002D) { # -
2221 !!!cp (141);
2222 $self->{state} = COMMENT_END_STATE;
2223 !!!next-input-character;
2224 redo A;
2225 } elsif ($self->{next_char} == 0x003E) { # >
2226 !!!cp (142);
2227 !!!parse-error (type => 'bogus comment');
2228 $self->{state} = DATA_STATE;
2229 !!!next-input-character;
2230
2231 !!!emit ($self->{current_token}); # comment
2232
2233 redo A;
2234 } elsif ($self->{next_char} == -1) {
2235 !!!cp (143);
2236 !!!parse-error (type => 'unclosed comment');
2237 $self->{state} = DATA_STATE;
2238 ## reconsume
2239
2240 !!!emit ($self->{current_token}); # comment
2241
2242 redo A;
2243 } else {
2244 !!!cp (144);
2245 $self->{current_token}->{data} # comment
2246 .= '-' . chr ($self->{next_char});
2247 $self->{state} = COMMENT_STATE;
2248 !!!next-input-character;
2249 redo A;
2250 }
2251 } elsif ($self->{state} == COMMENT_STATE) {
2252 if ($self->{next_char} == 0x002D) { # -
2253 !!!cp (145);
2254 $self->{state} = COMMENT_END_DASH_STATE;
2255 !!!next-input-character;
2256 redo A;
2257 } elsif ($self->{next_char} == -1) {
2258 !!!cp (146);
2259 !!!parse-error (type => 'unclosed comment');
2260 $self->{state} = DATA_STATE;
2261 ## reconsume
2262
2263 !!!emit ($self->{current_token}); # comment
2264
2265 redo A;
2266 } else {
2267 !!!cp (147);
2268 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
2269 $self->{read_until}->($self->{current_token}->{data},
2270 q[-],
2271 length $self->{current_token}->{data});
2272
2273 ## Stay in the state
2274 !!!next-input-character;
2275 redo A;
2276 }
2277 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2278 if ($self->{next_char} == 0x002D) { # -
2279 !!!cp (148);
2280 $self->{state} = COMMENT_END_STATE;
2281 !!!next-input-character;
2282 redo A;
2283 } elsif ($self->{next_char} == -1) {
2284 !!!cp (149);
2285 !!!parse-error (type => 'unclosed comment');
2286 $self->{state} = DATA_STATE;
2287 ## reconsume
2288
2289 !!!emit ($self->{current_token}); # comment
2290
2291 redo A;
2292 } else {
2293 !!!cp (150);
2294 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
2295 $self->{state} = COMMENT_STATE;
2296 !!!next-input-character;
2297 redo A;
2298 }
2299 } elsif ($self->{state} == COMMENT_END_STATE) {
2300 if ($self->{next_char} == 0x003E) { # >
2301 !!!cp (151);
2302 $self->{state} = DATA_STATE;
2303 !!!next-input-character;
2304
2305 !!!emit ($self->{current_token}); # comment
2306
2307 redo A;
2308 } elsif ($self->{next_char} == 0x002D) { # -
2309 !!!cp (152);
2310 !!!parse-error (type => 'dash in comment',
2311 line => $self->{line_prev},
2312 column => $self->{column_prev});
2313 $self->{current_token}->{data} .= '-'; # comment
2314 ## Stay in the state
2315 !!!next-input-character;
2316 redo A;
2317 } elsif ($self->{next_char} == -1) {
2318 !!!cp (153);
2319 !!!parse-error (type => 'unclosed comment');
2320 $self->{state} = DATA_STATE;
2321 ## reconsume
2322
2323 !!!emit ($self->{current_token}); # comment
2324
2325 redo A;
2326 } else {
2327 !!!cp (154);
2328 !!!parse-error (type => 'dash in comment',
2329 line => $self->{line_prev},
2330 column => $self->{column_prev});
2331 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
2332 $self->{state} = COMMENT_STATE;
2333 !!!next-input-character;
2334 redo A;
2335 }
2336 } elsif ($self->{state} == DOCTYPE_STATE) {
2337 if ($self->{next_char} == 0x0009 or # HT
2338 $self->{next_char} == 0x000A or # LF
2339 $self->{next_char} == 0x000B or # VT
2340 $self->{next_char} == 0x000C or # FF
2341 $self->{next_char} == 0x0020) { # SP
2342 !!!cp (155);
2343 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2344 !!!next-input-character;
2345 redo A;
2346 } else {
2347 !!!cp (156);
2348 !!!parse-error (type => 'no space before DOCTYPE name');
2349 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2350 ## reconsume
2351 redo A;
2352 }
2353 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2354 if ($self->{next_char} == 0x0009 or # HT
2355 $self->{next_char} == 0x000A or # LF
2356 $self->{next_char} == 0x000B or # VT
2357 $self->{next_char} == 0x000C or # FF
2358 $self->{next_char} == 0x0020) { # SP
2359 !!!cp (157);
2360 ## Stay in the state
2361 !!!next-input-character;
2362 redo A;
2363 } elsif ($self->{next_char} == 0x003E) { # >
2364 !!!cp (158);
2365 !!!parse-error (type => 'no DOCTYPE name');
2366 $self->{state} = DATA_STATE;
2367 !!!next-input-character;
2368
2369 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2370
2371 redo A;
2372 } elsif ($self->{next_char} == -1) {
2373 !!!cp (159);
2374 !!!parse-error (type => 'no DOCTYPE name');
2375 $self->{state} = DATA_STATE;
2376 ## reconsume
2377
2378 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2379
2380 redo A;
2381 } else {
2382 !!!cp (160);
2383 $self->{current_token}->{name} = chr $self->{next_char};
2384 delete $self->{current_token}->{quirks};
2385 ## ISSUE: "Set the token's name name to the" in the spec
2386 $self->{state} = DOCTYPE_NAME_STATE;
2387 !!!next-input-character;
2388 redo A;
2389 }
2390 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2391 ## ISSUE: Redundant "First," in the spec.
2392 if ($self->{next_char} == 0x0009 or # HT
2393 $self->{next_char} == 0x000A or # LF
2394 $self->{next_char} == 0x000B or # VT
2395 $self->{next_char} == 0x000C or # FF
2396 $self->{next_char} == 0x0020) { # SP
2397 !!!cp (161);
2398 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
2399 !!!next-input-character;
2400 redo A;
2401 } elsif ($self->{next_char} == 0x003E) { # >
2402 !!!cp (162);
2403 $self->{state} = DATA_STATE;
2404 !!!next-input-character;
2405
2406 !!!emit ($self->{current_token}); # DOCTYPE
2407
2408 redo A;
2409 } elsif ($self->{next_char} == -1) {
2410 !!!cp (163);
2411 !!!parse-error (type => 'unclosed DOCTYPE');
2412 $self->{state} = DATA_STATE;
2413 ## reconsume
2414
2415 $self->{current_token}->{quirks} = 1;
2416 !!!emit ($self->{current_token}); # DOCTYPE
2417
2418 redo A;
2419 } else {
2420 !!!cp (164);
2421 $self->{current_token}->{name}
2422 .= chr ($self->{next_char}); # DOCTYPE
2423 ## Stay in the state
2424 !!!next-input-character;
2425 redo A;
2426 }
2427 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
2428 if ($self->{next_char} == 0x0009 or # HT
2429 $self->{next_char} == 0x000A or # LF
2430 $self->{next_char} == 0x000B or # VT
2431 $self->{next_char} == 0x000C or # FF
2432 $self->{next_char} == 0x0020) { # SP
2433 !!!cp (165);
2434 ## Stay in the state
2435 !!!next-input-character;
2436 redo A;
2437 } elsif ($self->{next_char} == 0x003E) { # >
2438 !!!cp (166);
2439 $self->{state} = DATA_STATE;
2440 !!!next-input-character;
2441
2442 !!!emit ($self->{current_token}); # DOCTYPE
2443
2444 redo A;
2445 } elsif ($self->{next_char} == -1) {
2446 !!!cp (167);
2447 !!!parse-error (type => 'unclosed DOCTYPE');
2448 $self->{state} = DATA_STATE;
2449 ## reconsume
2450
2451 $self->{current_token}->{quirks} = 1;
2452 !!!emit ($self->{current_token}); # DOCTYPE
2453
2454 redo A;
2455 } elsif ($self->{next_char} == 0x0050 or # P
2456 $self->{next_char} == 0x0070) { # p
2457 $self->{state} = PUBLIC_STATE;
2458 $self->{state_keyword} = chr $self->{next_char};
2459 !!!next-input-character;
2460 redo A;
2461 } elsif ($self->{next_char} == 0x0053 or # S
2462 $self->{next_char} == 0x0073) { # s
2463 $self->{state} = SYSTEM_STATE;
2464 $self->{state_keyword} = chr $self->{next_char};
2465 !!!next-input-character;
2466 redo A;
2467 } else {
2468 !!!cp (180);
2469 !!!parse-error (type => 'string after DOCTYPE name');
2470 $self->{current_token}->{quirks} = 1;
2471
2472 $self->{state} = BOGUS_DOCTYPE_STATE;
2473 !!!next-input-character;
2474 redo A;
2475 }
2476 } elsif ($self->{state} == PUBLIC_STATE) {
2477 ## ASCII case-insensitive
2478 if ($self->{next_char} == [
2479 undef,
2480 0x0055, # U
2481 0x0042, # B
2482 0x004C, # L
2483 0x0049, # I
2484 ]->[length $self->{state_keyword}] or
2485 $self->{next_char} == [
2486 undef,
2487 0x0075, # u
2488 0x0062, # b
2489 0x006C, # l
2490 0x0069, # i
2491 ]->[length $self->{state_keyword}]) {
2492 !!!cp (175);
2493 ## Stay in the state.
2494 $self->{state_keyword} .= chr $self->{next_char};
2495 !!!next-input-character;
2496 redo A;
2497 } elsif ((length $self->{state_keyword}) == 5 and
2498 ($self->{next_char} == 0x0043 or # C
2499 $self->{next_char} == 0x0063)) { # c
2500 !!!cp (168);
2501 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2502 !!!next-input-character;
2503 redo A;
2504 } else {
2505 !!!cp (169);
2506 !!!parse-error (type => 'string after DOCTYPE name',
2507 line => $self->{line_prev},
2508 column => $self->{column_prev} + 1 - length $self->{state_keyword});
2509 $self->{current_token}->{quirks} = 1;
2510
2511 $self->{state} = BOGUS_DOCTYPE_STATE;
2512 ## Reconsume.
2513 redo A;
2514 }
2515 } elsif ($self->{state} == SYSTEM_STATE) {
2516 ## ASCII case-insensitive
2517 if ($self->{next_char} == [
2518 undef,
2519 0x0059, # Y
2520 0x0053, # S
2521 0x0054, # T
2522 0x0045, # E
2523 ]->[length $self->{state_keyword}] or
2524 $self->{next_char} == [
2525 undef,
2526 0x0079, # y
2527 0x0073, # s
2528 0x0074, # t
2529 0x0065, # e
2530 ]->[length $self->{state_keyword}]) {
2531 !!!cp (170);
2532 ## Stay in the state.
2533 $self->{state_keyword} .= chr $self->{next_char};
2534 !!!next-input-character;
2535 redo A;
2536 } elsif ((length $self->{state_keyword}) == 5 and
2537 ($self->{next_char} == 0x004D or # M
2538 $self->{next_char} == 0x006D)) { # m
2539 !!!cp (171);
2540 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2541 !!!next-input-character;
2542 redo A;
2543 } else {
2544 !!!cp (172);
2545 !!!parse-error (type => 'string after DOCTYPE name',
2546 line => $self->{line_prev},
2547 column => $self->{column_prev} + 1 - length $self->{state_keyword});
2548 $self->{current_token}->{quirks} = 1;
2549
2550 $self->{state} = BOGUS_DOCTYPE_STATE;
2551 ## Reconsume.
2552 redo A;
2553 }
2554 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2555 if ({
2556 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2557 #0x000D => 1, # HT, LF, VT, FF, SP, CR
2558 }->{$self->{next_char}}) {
2559 !!!cp (181);
2560 ## Stay in the state
2561 !!!next-input-character;
2562 redo A;
2563 } elsif ($self->{next_char} eq 0x0022) { # "
2564 !!!cp (182);
2565 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2566 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2567 !!!next-input-character;
2568 redo A;
2569 } elsif ($self->{next_char} eq 0x0027) { # '
2570 !!!cp (183);
2571 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2572 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2573 !!!next-input-character;
2574 redo A;
2575 } elsif ($self->{next_char} eq 0x003E) { # >
2576 !!!cp (184);
2577 !!!parse-error (type => 'no PUBLIC literal');
2578
2579 $self->{state} = DATA_STATE;
2580 !!!next-input-character;
2581
2582 $self->{current_token}->{quirks} = 1;
2583 !!!emit ($self->{current_token}); # DOCTYPE
2584
2585 redo A;
2586 } elsif ($self->{next_char} == -1) {
2587 !!!cp (185);
2588 !!!parse-error (type => 'unclosed DOCTYPE');
2589
2590 $self->{state} = DATA_STATE;
2591 ## reconsume
2592
2593 $self->{current_token}->{quirks} = 1;
2594 !!!emit ($self->{current_token}); # DOCTYPE
2595
2596 redo A;
2597 } else {
2598 !!!cp (186);
2599 !!!parse-error (type => 'string after PUBLIC');
2600 $self->{current_token}->{quirks} = 1;
2601
2602 $self->{state} = BOGUS_DOCTYPE_STATE;
2603 !!!next-input-character;
2604 redo A;
2605 }
2606 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2607 if ($self->{next_char} == 0x0022) { # "
2608 !!!cp (187);
2609 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2610 !!!next-input-character;
2611 redo A;
2612 } elsif ($self->{next_char} == 0x003E) { # >
2613 !!!cp (188);
2614 !!!parse-error (type => 'unclosed PUBLIC literal');
2615
2616 $self->{state} = DATA_STATE;
2617 !!!next-input-character;
2618
2619 $self->{current_token}->{quirks} = 1;
2620 !!!emit ($self->{current_token}); # DOCTYPE
2621
2622 redo A;
2623 } elsif ($self->{next_char} == -1) {
2624 !!!cp (189);
2625 !!!parse-error (type => 'unclosed PUBLIC literal');
2626
2627 $self->{state} = DATA_STATE;
2628 ## reconsume
2629
2630 $self->{current_token}->{quirks} = 1;
2631 !!!emit ($self->{current_token}); # DOCTYPE
2632
2633 redo A;
2634 } else {
2635 !!!cp (190);
2636 $self->{current_token}->{public_identifier} # DOCTYPE
2637 .= chr $self->{next_char};
2638 $self->{read_until}->($self->{current_token}->{public_identifier},
2639 q[">],
2640 length $self->{current_token}->{public_identifier});
2641
2642 ## Stay in the state
2643 !!!next-input-character;
2644 redo A;
2645 }
2646 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2647 if ($self->{next_char} == 0x0027) { # '
2648 !!!cp (191);
2649 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2650 !!!next-input-character;
2651 redo A;
2652 } elsif ($self->{next_char} == 0x003E) { # >
2653 !!!cp (192);
2654 !!!parse-error (type => 'unclosed PUBLIC literal');
2655
2656 $self->{state} = DATA_STATE;
2657 !!!next-input-character;
2658
2659 $self->{current_token}->{quirks} = 1;
2660 !!!emit ($self->{current_token}); # DOCTYPE
2661
2662 redo A;
2663 } elsif ($self->{next_char} == -1) {
2664 !!!cp (193);
2665 !!!parse-error (type => 'unclosed PUBLIC literal');
2666
2667 $self->{state} = DATA_STATE;
2668 ## reconsume
2669
2670 $self->{current_token}->{quirks} = 1;
2671 !!!emit ($self->{current_token}); # DOCTYPE
2672
2673 redo A;
2674 } else {
2675 !!!cp (194);
2676 $self->{current_token}->{public_identifier} # DOCTYPE
2677 .= chr $self->{next_char};
2678 $self->{read_until}->($self->{current_token}->{public_identifier},
2679 q['>],
2680 length $self->{current_token}->{public_identifier});
2681
2682 ## Stay in the state
2683 !!!next-input-character;
2684 redo A;
2685 }
2686 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2687 if ({
2688 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2689 #0x000D => 1, # HT, LF, VT, FF, SP, CR
2690 }->{$self->{next_char}}) {
2691 !!!cp (195);
2692 ## Stay in the state
2693 !!!next-input-character;
2694 redo A;
2695 } elsif ($self->{next_char} == 0x0022) { # "
2696 !!!cp (196);
2697 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2698 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2699 !!!next-input-character;
2700 redo A;
2701 } elsif ($self->{next_char} == 0x0027) { # '
2702 !!!cp (197);
2703 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2704 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2705 !!!next-input-character;
2706 redo A;
2707 } elsif ($self->{next_char} == 0x003E) { # >
2708 !!!cp (198);
2709 $self->{state} = DATA_STATE;
2710 !!!next-input-character;
2711
2712 !!!emit ($self->{current_token}); # DOCTYPE
2713
2714 redo A;
2715 } elsif ($self->{next_char} == -1) {
2716 !!!cp (199);
2717 !!!parse-error (type => 'unclosed DOCTYPE');
2718
2719 $self->{state} = DATA_STATE;
2720 ## reconsume
2721
2722 $self->{current_token}->{quirks} = 1;
2723 !!!emit ($self->{current_token}); # DOCTYPE
2724
2725 redo A;
2726 } else {
2727 !!!cp (200);
2728 !!!parse-error (type => 'string after PUBLIC literal');
2729 $self->{current_token}->{quirks} = 1;
2730
2731 $self->{state} = BOGUS_DOCTYPE_STATE;
2732 !!!next-input-character;
2733 redo A;
2734 }
2735 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2736 if ({
2737 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2738 #0x000D => 1, # HT, LF, VT, FF, SP, CR
2739 }->{$self->{next_char}}) {
2740 !!!cp (201);
2741 ## Stay in the state
2742 !!!next-input-character;
2743 redo A;
2744 } elsif ($self->{next_char} == 0x0022) { # "
2745 !!!cp (202);
2746 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2747 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2748 !!!next-input-character;
2749 redo A;
2750 } elsif ($self->{next_char} == 0x0027) { # '
2751 !!!cp (203);
2752 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2753 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2754 !!!next-input-character;
2755 redo A;
2756 } elsif ($self->{next_char} == 0x003E) { # >
2757 !!!cp (204);
2758 !!!parse-error (type => 'no SYSTEM literal');
2759 $self->{state} = DATA_STATE;
2760 !!!next-input-character;
2761
2762 $self->{current_token}->{quirks} = 1;
2763 !!!emit ($self->{current_token}); # DOCTYPE
2764
2765 redo A;
2766 } elsif ($self->{next_char} == -1) {
2767 !!!cp (205);
2768 !!!parse-error (type => 'unclosed DOCTYPE');
2769
2770 $self->{state} = DATA_STATE;
2771 ## reconsume
2772
2773 $self->{current_token}->{quirks} = 1;
2774 !!!emit ($self->{current_token}); # DOCTYPE
2775
2776 redo A;
2777 } else {
2778 !!!cp (206);
2779 !!!parse-error (type => 'string after SYSTEM');
2780 $self->{current_token}->{quirks} = 1;
2781
2782 $self->{state} = BOGUS_DOCTYPE_STATE;
2783 !!!next-input-character;
2784 redo A;
2785 }
2786 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2787 if ($self->{next_char} == 0x0022) { # "
2788 !!!cp (207);
2789 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2790 !!!next-input-character;
2791 redo A;
2792 } elsif ($self->{next_char} == 0x003E) { # >
2793 !!!cp (208);
2794 !!!parse-error (type => 'unclosed SYSTEM literal');
2795
2796 $self->{state} = DATA_STATE;
2797 !!!next-input-character;
2798
2799 $self->{current_token}->{quirks} = 1;
2800 !!!emit ($self->{current_token}); # DOCTYPE
2801
2802 redo A;
2803 } elsif ($self->{next_char} == -1) {
2804 !!!cp (209);
2805 !!!parse-error (type => 'unclosed SYSTEM literal');
2806
2807 $self->{state} = DATA_STATE;
2808 ## reconsume
2809
2810 $self->{current_token}->{quirks} = 1;
2811 !!!emit ($self->{current_token}); # DOCTYPE
2812
2813 redo A;
2814 } else {
2815 !!!cp (210);
2816 $self->{current_token}->{system_identifier} # DOCTYPE
2817 .= chr $self->{next_char};
2818 $self->{read_until}->($self->{current_token}->{system_identifier},
2819 q[">],
2820 length $self->{current_token}->{system_identifier});
2821
2822 ## Stay in the state
2823 !!!next-input-character;
2824 redo A;
2825 }
2826 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2827 if ($self->{next_char} == 0x0027) { # '
2828 !!!cp (211);
2829 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2830 !!!next-input-character;
2831 redo A;
2832 } elsif ($self->{next_char} == 0x003E) { # >
2833 !!!cp (212);
2834 !!!parse-error (type => 'unclosed SYSTEM literal');
2835
2836 $self->{state} = DATA_STATE;
2837 !!!next-input-character;
2838
2839 $self->{current_token}->{quirks} = 1;
2840 !!!emit ($self->{current_token}); # DOCTYPE
2841
2842 redo A;
2843 } elsif ($self->{next_char} == -1) {
2844 !!!cp (213);
2845 !!!parse-error (type => 'unclosed SYSTEM literal');
2846
2847 $self->{state} = DATA_STATE;
2848 ## reconsume
2849
2850 $self->{current_token}->{quirks} = 1;
2851 !!!emit ($self->{current_token}); # DOCTYPE
2852
2853 redo A;
2854 } else {
2855 !!!cp (214);
2856 $self->{current_token}->{system_identifier} # DOCTYPE
2857 .= chr $self->{next_char};
2858 $self->{read_until}->($self->{current_token}->{system_identifier},
2859 q['>],
2860 length $self->{current_token}->{system_identifier});
2861
2862 ## Stay in the state
2863 !!!next-input-character;
2864 redo A;
2865 }
2866 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2867 if ({
2868 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2869 #0x000D => 1, # HT, LF, VT, FF, SP, CR
2870 }->{$self->{next_char}}) {
2871 !!!cp (215);
2872 ## Stay in the state
2873 !!!next-input-character;
2874 redo A;
2875 } elsif ($self->{next_char} == 0x003E) { # >
2876 !!!cp (216);
2877 $self->{state} = DATA_STATE;
2878 !!!next-input-character;
2879
2880 !!!emit ($self->{current_token}); # DOCTYPE
2881
2882 redo A;
2883 } elsif ($self->{next_char} == -1) {
2884 !!!cp (217);
2885 !!!parse-error (type => 'unclosed DOCTYPE');
2886 $self->{state} = DATA_STATE;
2887 ## reconsume
2888
2889 $self->{current_token}->{quirks} = 1;
2890 !!!emit ($self->{current_token}); # DOCTYPE
2891
2892 redo A;
2893 } else {
2894 !!!cp (218);
2895 !!!parse-error (type => 'string after SYSTEM literal');
2896 #$self->{current_token}->{quirks} = 1;
2897
2898 $self->{state} = BOGUS_DOCTYPE_STATE;
2899 !!!next-input-character;
2900 redo A;
2901 }
2902 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2903 if ($self->{next_char} == 0x003E) { # >
2904 !!!cp (219);
2905 $self->{state} = DATA_STATE;
2906 !!!next-input-character;
2907
2908 !!!emit ($self->{current_token}); # DOCTYPE
2909
2910 redo A;
2911 } elsif ($self->{next_char} == -1) {
2912 !!!cp (220);
2913 !!!parse-error (type => 'unclosed DOCTYPE');
2914 $self->{state} = DATA_STATE;
2915 ## reconsume
2916
2917 !!!emit ($self->{current_token}); # DOCTYPE
2918
2919 redo A;
2920 } else {
2921 !!!cp (221);
2922 my $s = '';
2923 $self->{read_until}->($s, q[>], 0);
2924
2925 ## Stay in the state
2926 !!!next-input-character;
2927 redo A;
2928 }
2929 } elsif ($self->{state} == CDATA_SECTION_STATE) {
2930 ## NOTE: "CDATA section state" in the state is jointly implemented
2931 ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
2932 ## and |CDATA_SECTION_MSE2_STATE|.
2933
2934 if ($self->{next_char} == 0x005D) { # ]
2935 !!!cp (221.1);
2936 $self->{state} = CDATA_SECTION_MSE1_STATE;
2937 !!!next-input-character;
2938 redo A;
2939 } elsif ($self->{next_char} == -1) {
2940 $self->{state} = DATA_STATE;
2941 !!!next-input-character;
2942 if (length $self->{current_token}->{data}) { # character
2943 !!!cp (221.2);
2944 !!!emit ($self->{current_token}); # character
2945 } else {
2946 !!!cp (221.3);
2947 ## No token to emit. $self->{current_token} is discarded.
2948 }
2949 redo A;
2950 } else {
2951 !!!cp (221.4);
2952 $self->{current_token}->{data} .= chr $self->{next_char};
2953 $self->{read_until}->($self->{current_token}->{data},
2954 q<]>,
2955 length $self->{current_token}->{data});
2956
2957 ## Stay in the state.
2958 !!!next-input-character;
2959 redo A;
2960 }
2961
2962 ## ISSUE: "text tokens" in spec.
2963 } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
2964 if ($self->{next_char} == 0x005D) { # ]
2965 !!!cp (221.5);
2966 $self->{state} = CDATA_SECTION_MSE2_STATE;
2967 !!!next-input-character;
2968 redo A;
2969 } else {
2970 !!!cp (221.6);
2971 $self->{current_token}->{data} .= ']';
2972 $self->{state} = CDATA_SECTION_STATE;
2973 ## Reconsume.
2974 redo A;
2975 }
2976 } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
2977 if ($self->{next_char} == 0x003E) { # >
2978 $self->{state} = DATA_STATE;
2979 !!!next-input-character;
2980 if (length $self->{current_token}->{data}) { # character
2981 !!!cp (221.7);
2982 !!!emit ($self->{current_token}); # character
2983 } else {
2984 !!!cp (221.8);
2985 ## No token to emit. $self->{current_token} is discarded.
2986 }
2987 redo A;
2988 } elsif ($self->{next_char} == 0x005D) { # ]
2989 !!!cp (221.9); # character
2990 $self->{current_token}->{data} .= ']'; ## Add first "]" of "]]]".
2991 ## Stay in the state.
2992 !!!next-input-character;
2993 redo A;
2994 } else {
2995 !!!cp (221.11);
2996 $self->{current_token}->{data} .= ']]'; # character
2997 $self->{state} = CDATA_SECTION_STATE;
2998 ## Reconsume.
2999 redo A;
3000 }
3001 } elsif ($self->{state} == ENTITY_STATE) {
3002 if ({
3003 0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
3004 0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, &
3005 $self->{entity_additional} => 1,
3006 }->{$self->{next_char}}) {
3007 !!!cp (1001);
3008 ## Don't consume
3009 ## No error
3010 ## Return nothing.
3011 #
3012 } elsif ($self->{next_char} == 0x0023) { # #
3013 !!!cp (999);
3014 $self->{state} = ENTITY_HASH_STATE;
3015 $self->{state_keyword} = '#';
3016 !!!next-input-character;
3017 redo A;
3018 } elsif ((0x0041 <= $self->{next_char} and
3019 $self->{next_char} <= 0x005A) or # A..Z
3020 (0x0061 <= $self->{next_char} and
3021 $self->{next_char} <= 0x007A)) { # a..z
3022 !!!cp (998);
3023 require Whatpm::_NamedEntityList;
3024 $self->{state} = ENTITY_NAME_STATE;
3025 $self->{state_keyword} = chr $self->{next_char};
3026 $self->{entity__value} = $self->{state_keyword};
3027 $self->{entity__match} = 0;
3028 !!!next-input-character;
3029 redo A;
3030 } else {
3031 !!!cp (1027);
3032 !!!parse-error (type => 'bare ero');
3033 ## Return nothing.
3034 #
3035 }
3036
3037 ## NOTE: No character is consumed by the "consume a character
3038 ## reference" algorithm. In other word, there is an "&" character
3039 ## that does not introduce a character reference, which would be
3040 ## appended to the parent element or the attribute value in later
3041 ## process of the tokenizer.
3042
3043 if ($self->{prev_state} == DATA_STATE) {
3044 !!!cp (997);
3045 $self->{state} = $self->{prev_state};
3046 ## Reconsume.
3047 !!!emit ({type => CHARACTER_TOKEN, data => '&',
3048 line => $self->{line_prev},
3049 column => $self->{column_prev},
3050 });
3051 redo A;
3052 } else {
3053 !!!cp (996);
3054 $self->{current_attribute}->{value} .= '&';
3055 $self->{state} = $self->{prev_state};
3056 ## Reconsume.
3057 redo A;
3058 }
3059 } elsif ($self->{state} == ENTITY_HASH_STATE) {
3060 if ($self->{next_char} == 0x0078 or # x
3061 $self->{next_char} == 0x0058) { # X
3062 !!!cp (995);
3063 $self->{state} = HEXREF_X_STATE;
3064 $self->{state_keyword} .= chr $self->{next_char};
3065 !!!next-input-character;
3066 redo A;
3067 } elsif (0x0030 <= $self->{next_char} and
3068 $self->{next_char} <= 0x0039) { # 0..9
3069 !!!cp (994);
3070 $self->{state} = NCR_NUM_STATE;
3071 $self->{state_keyword} = $self->{next_char} - 0x0030;
3072 !!!next-input-character;
3073 redo A;
3074 } else {
3075 !!!parse-error (type => 'bare nero',
3076 line => $self->{line_prev},
3077 column => $self->{column_prev} - 1);
3078
3079 ## NOTE: According to the spec algorithm, nothing is returned,
3080 ## and then "&#" is appended to the parent element or the attribute
3081 ## value in the later processing.
3082
3083 if ($self->{prev_state} == DATA_STATE) {
3084 !!!cp (1019);
3085 $self->{state} = $self->{prev_state};
3086 ## Reconsume.
3087 !!!emit ({type => CHARACTER_TOKEN,
3088 data => '&#',
3089 line => $self->{line_prev},
3090 column => $self->{column_prev} - 1,
3091 });
3092 redo A;
3093 } else {
3094 !!!cp (993);
3095 $self->{current_attribute}->{value} .= '&#';
3096 $self->{state} = $self->{prev_state};
3097 ## Reconsume.
3098 redo A;
3099 }
3100 }
3101 } elsif ($self->{state} == NCR_NUM_STATE) {
3102 if (0x0030 <= $self->{next_char} and
3103 $self->{next_char} <= 0x0039) { # 0..9
3104 !!!cp (1012);
3105 $self->{state_keyword} *= 10;
3106 $self->{state_keyword} += $self->{next_char} - 0x0030;
3107
3108 ## Stay in the state.
3109 !!!next-input-character;
3110 redo A;
3111 } elsif ($self->{next_char} == 0x003B) { # ;
3112 !!!cp (1013);
3113 !!!next-input-character;
3114 #
3115 } else {
3116 !!!cp (1014);
3117 !!!parse-error (type => 'no refc');
3118 ## Reconsume.
3119 #
3120 }
3121
3122 my $code = $self->{state_keyword};
3123 my $l = $self->{line_prev};
3124 my $c = $self->{column_prev};
3125 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
3126 !!!cp (1015);
3127 !!!parse-error (type => 'invalid character reference',
3128 text => (sprintf 'U+%04X', $code),
3129 line => $l, column => $c);
3130 $code = 0xFFFD;
3131 } elsif ($code > 0x10FFFF) {
3132 !!!cp (1016);
3133 !!!parse-error (type => 'invalid character reference',
3134 text => (sprintf 'U-%08X', $code),
3135 line => $l, column => $c);
3136 $code = 0xFFFD;
3137 } elsif ($code == 0x000D) {
3138 !!!cp (1017);
3139 !!!parse-error (type => 'CR character reference',
3140 line => $l, column => $c);
3141 $code = 0x000A;
3142 } elsif (0x80 <= $code and $code <= 0x9F) {
3143 !!!cp (1018);
3144 !!!parse-error (type => 'C1 character reference',
3145 text => (sprintf 'U+%04X', $code),
3146 line => $l, column => $c);
3147 $code = $c1_entity_char->{$code};
3148 }
3149
3150 if ($self->{prev_state} == DATA_STATE) {
3151 !!!cp (992);
3152 $self->{state} = $self->{prev_state};
3153 ## Reconsume.
3154 !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
3155 line => $l, column => $c,
3156 });
3157 redo A;
3158 } else {
3159 !!!cp (991);
3160 $self->{current_attribute}->{value} .= chr $code;
3161 $self->{current_attribute}->{has_reference} = 1;
3162 $self->{state} = $self->{prev_state};
3163 ## Reconsume.
3164 redo A;
3165 }
3166 } elsif ($self->{state} == HEXREF_X_STATE) {
3167 if ((0x0030 <= $self->{next_char} and $self->{next_char} <= 0x0039) or
3168 (0x0041 <= $self->{next_char} and $self->{next_char} <= 0x0046) or
3169 (0x0061 <= $self->{next_char} and $self->{next_char} <= 0x0066)) {
3170 # 0..9, A..F, a..f
3171 !!!cp (990);
3172 $self->{state} = HEXREF_HEX_STATE;
3173 $self->{state_keyword} = 0;
3174 ## Reconsume.
3175 redo A;
3176 } else {
3177 !!!parse-error (type => 'bare hcro',
3178 line => $self->{line_prev},
3179 column => $self->{column_prev} - 2);
3180
3181 ## NOTE: According to the spec algorithm, nothing is returned,
3182 ## and then "&#" followed by "X" or "x" is appended to the parent
3183 ## element or the attribute value in the later processing.
3184
3185 if ($self->{prev_state} == DATA_STATE) {
3186 !!!cp (1005);
3187 $self->{state} = $self->{prev_state};
3188 ## Reconsume.
3189 !!!emit ({type => CHARACTER_TOKEN,
3190 data => '&' . $self->{state_keyword},
3191 line => $self->{line_prev},
3192 column => $self->{column_prev} - length $self->{state_keyword},
3193 });
3194 redo A;
3195 } else {
3196 !!!cp (989);
3197 $self->{current_attribute}->{value} .= '&' . $self->{state_keyword};
3198 $self->{state} = $self->{prev_state};
3199 ## Reconsume.
3200 redo A;
3201 }
3202 }
3203 } elsif ($self->{state} == HEXREF_HEX_STATE) {
3204 if (0x0030 <= $self->{next_char} and $self->{next_char} <= 0x0039) {
3205 # 0..9
3206 !!!cp (1002);
3207 $self->{state_keyword} *= 0x10;
3208 $self->{state_keyword} += $self->{next_char} - 0x0030;
3209 ## Stay in the state.
3210 !!!next-input-character;
3211 redo A;
3212 } elsif (0x0061 <= $self->{next_char} and
3213 $self->{next_char} <= 0x0066) { # a..f
3214 !!!cp (1003);
3215 $self->{state_keyword} *= 0x10;
3216 $self->{state_keyword} += $self->{next_char} - 0x0060 + 9;
3217 ## Stay in the state.
3218 !!!next-input-character;
3219 redo A;
3220 } elsif (0x0041 <= $self->{next_char} and
3221 $self->{next_char} <= 0x0046) { # A..F
3222 !!!cp (1004);
3223 $self->{state_keyword} *= 0x10;
3224 $self->{state_keyword} += $self->{next_char} - 0x0040 + 9;
3225 ## Stay in the state.
3226 !!!next-input-character;
3227 redo A;
3228 } elsif ($self->{next_char} == 0x003B) { # ;
3229 !!!cp (1006);
3230 !!!next-input-character;
3231 #
3232 } else {
3233 !!!cp (1007);
3234 !!!parse-error (type => 'no refc',
3235 line => $self->{line},
3236 column => $self->{column});
3237 ## Reconsume.
3238 #
3239 }
3240
3241 my $code = $self->{state_keyword};
3242 my $l = $self->{line_prev};
3243 my $c = $self->{column_prev};
3244 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
3245 !!!cp (1008);
3246 !!!parse-error (type => 'invalid character reference',
3247 text => (sprintf 'U+%04X', $code),
3248 line => $l, column => $c);
3249 $code = 0xFFFD;
3250 } elsif ($code > 0x10FFFF) {
3251 !!!cp (1009);
3252 !!!parse-error (type => 'invalid character reference',
3253 text => (sprintf 'U-%08X', $code),
3254 line => $l, column => $c);
3255 $code = 0xFFFD;
3256 } elsif ($code == 0x000D) {
3257 !!!cp (1010);
3258 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
3259 $code = 0x000A;
3260 } elsif (0x80 <= $code and $code <= 0x9F) {
3261 !!!cp (1011);
3262 !!!parse-error (type => 'C1 character reference', text => (sprintf 'U+%04X', $code), line => $l, column => $c);
3263 $code = $c1_entity_char->{$code};
3264 }
3265
3266 if ($self->{prev_state} == DATA_STATE) {
3267 !!!cp (988);
3268 $self->{state} = $self->{prev_state};
3269 ## Reconsume.
3270 !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
3271 line => $l, column => $c,
3272 });
3273 redo A;
3274 } else {
3275 !!!cp (987);
3276 $self->{current_attribute}->{value} .= chr $code;
3277 $self->{current_attribute}->{has_reference} = 1;
3278 $self->{state} = $self->{prev_state};
3279 ## Reconsume.
3280 redo A;
3281 }
3282 } elsif ($self->{state} == ENTITY_NAME_STATE) {
3283 if (length $self->{state_keyword} < 30 and
3284 ## NOTE: Some number greater than the maximum length of entity name
3285 ((0x0041 <= $self->{next_char} and # a
3286 $self->{next_char} <= 0x005A) or # x
3287 (0x0061 <= $self->{next_char} and # a
3288 $self->{next_char} <= 0x007A) or # z
3289 (0x0030 <= $self->{next_char} and # 0
3290 $self->{next_char} <= 0x0039) or # 9
3291 $self->{next_char} == 0x003B)) { # ;
3292 our $EntityChar;
3293 $self->{state_keyword} .= chr $self->{next_char};
3294 if (defined $EntityChar->{$self->{state_keyword}}) {
3295 if ($self->{next_char} == 0x003B) { # ;
3296 !!!cp (1020);
3297 $self->{entity__value} = $EntityChar->{$self->{state_keyword}};
3298 $self->{entity__match} = 1;
3299 !!!next-input-character;
3300 #
3301 } else {
3302 !!!cp (1021);
3303 $self->{entity__value} = $EntityChar->{$self->{state_keyword}};
3304 $self->{entity__match} = -1;
3305 ## Stay in the state.
3306 !!!next-input-character;
3307 redo A;
3308 }
3309 } else {
3310 !!!cp (1022);
3311 $self->{entity__value} .= chr $self->{next_char};
3312 $self->{entity__match} *= 2;
3313 ## Stay in the state.
3314 !!!next-input-character;
3315 redo A;
3316 }
3317 }
3318
3319 my $data;
3320 my $has_ref;
3321 if ($self->{entity__match} > 0) {
3322 !!!cp (1023);
3323 $data = $self->{entity__value};
3324 $has_ref = 1;
3325 #
3326 } elsif ($self->{entity__match} < 0) {
3327 !!!parse-error (type => 'no refc');
3328 if ($self->{prev_state} != DATA_STATE and # in attribute
3329 $self->{entity__match} < -1) {
3330 !!!cp (1024);
3331 $data = '&' . $self->{state_keyword};
3332 #
3333 } else {
3334 !!!cp (1025);
3335 $data = $self->{entity__value};
3336 $has_ref = 1;
3337 #
3338 }
3339 } else {
3340 !!!cp (1026);
3341 !!!parse-error (type => 'bare ero',
3342 line => $self->{line_prev},
3343 column => $self->{column_prev} - length $self->{state_keyword});
3344 $data = '&' . $self->{state_keyword};
3345 #
3346 }
3347
3348 ## NOTE: In these cases, when a character reference is found,
3349 ## it is consumed and a character token is returned, or, otherwise,
3350 ## nothing is consumed and returned, according to the spec algorithm.
3351 ## In this implementation, anything that has been examined by the
3352 ## tokenizer is appended to the parent element or the attribute value
3353 ## as string, either literal string when no character reference or
3354 ## entity-replaced string otherwise, in this stage, since any characters
3355 ## that would not be consumed are appended in the data state or in an
3356 ## appropriate attribute value state anyway.
3357
3358 if ($self->{prev_state} == DATA_STATE) {
3359 !!!cp (986);
3360 $self->{state} = $self->{prev_state};
3361 ## Reconsume.
3362 !!!emit ({type => CHARACTER_TOKEN,
3363 data => $data,
3364 line => $self->{line_prev},
3365 column => $self->{column_prev} + 1 - length $self->{state_keyword},
3366 });
3367 redo A;
3368 } else {
3369 !!!cp (985);
3370 $self->{current_attribute}->{value} .= $data;
3371 $self->{current_attribute}->{has_reference} = 1 if $has_ref;
3372 $self->{state} = $self->{prev_state};
3373 ## Reconsume.
3374 redo A;
3375 }
3376 } else {
3377 die "$0: $self->{state}: Unknown state";
3378 }
3379 } # A
3380
3381 die "$0: _get_next_token: unexpected case";
3382 } # _get_next_token
3383
3384 sub _initialize_tree_constructor ($) {
3385 my $self = shift;
3386 ## NOTE: $self->{document} MUST be specified before this method is called
3387 $self->{document}->strict_error_checking (0);
3388 ## TODO: Turn mutation events off # MUST
3389 ## TODO: Turn loose Document option (manakai extension) on
3390 $self->{document}->manakai_is_html (1); # MUST
3391 $self->{document}->set_user_data (manakai_source_line => 1);
3392 $self->{document}->set_user_data (manakai_source_column => 1);
3393 } # _initialize_tree_constructor
3394
3395 sub _terminate_tree_constructor ($) {
3396 my $self = shift;
3397 $self->{document}->strict_error_checking (1);
3398 ## TODO: Turn mutation events on
3399 } # _terminate_tree_constructor
3400
3401 ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
3402
3403 { # tree construction stage
3404 my $token;
3405
3406 sub _construct_tree ($) {
3407 my ($self) = @_;
3408
3409 ## When an interactive UA render the $self->{document} available
3410 ## to the user, or when it begin accepting user input, are
3411 ## not defined.
3412
3413 ## Append a character: collect it and all subsequent consecutive
3414 ## characters and insert one Text node whose data is concatenation
3415 ## of all those characters. # MUST
3416
3417 !!!next-token;
3418
3419 undef $self->{form_element};
3420 undef $self->{head_element};
3421 $self->{open_elements} = [];
3422 undef $self->{inner_html_node};
3423
3424 ## NOTE: The "initial" insertion mode.
3425 $self->_tree_construction_initial; # MUST
3426
3427 ## NOTE: The "before html" insertion mode.
3428 $self->_tree_construction_root_element;
3429 $self->{insertion_mode} = BEFORE_HEAD_IM;
3430
3431 ## NOTE: The "before head" insertion mode and so on.
3432 $self->_tree_construction_main;
3433 } # _construct_tree
3434
3435 sub _tree_construction_initial ($) {
3436 my $self = shift;
3437
3438 ## NOTE: "initial" insertion mode
3439
3440 INITIAL: {
3441 if ($token->{type} == DOCTYPE_TOKEN) {
3442 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
3443 ## error, switch to a conformance checking mode for another
3444 ## language.
3445 my $doctype_name = $token->{name};
3446 $doctype_name = '' unless defined $doctype_name;
3447 $doctype_name =~ tr/a-z/A-Z/; # ASCII case-insensitive
3448 if (not defined $token->{name} or # <!DOCTYPE>
3449 defined $token->{system_identifier}) {
3450 !!!cp ('t1');
3451 !!!parse-error (type => 'not HTML5', token => $token);
3452 } elsif ($doctype_name ne 'HTML') {
3453 !!!cp ('t2');
3454 !!!parse-error (type => 'not HTML5', token => $token);
3455 } elsif (defined $token->{public_identifier}) {
3456 if ($token->{public_identifier} eq 'XSLT-compat') {
3457 !!!cp ('t1.2');
3458 !!!parse-error (type => 'XSLT-compat', token => $token,
3459 level => $self->{level}->{should});
3460 } else {
3461 !!!parse-error (type => 'not HTML5', token => $token);
3462 }
3463 } else {
3464 !!!cp ('t3');
3465 #
3466 }
3467
3468 my $doctype = $self->{document}->create_document_type_definition
3469 ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
3470 ## NOTE: Default value for both |public_id| and |system_id| attributes
3471 ## are empty strings, so that we don't set any value in missing cases.
3472 $doctype->public_id ($token->{public_identifier})
3473 if defined $token->{public_identifier};
3474 $doctype->system_id ($token->{system_identifier})
3475 if defined $token->{system_identifier};
3476 ## NOTE: Other DocumentType attributes are null or empty lists.
3477 ## ISSUE: internalSubset = null??
3478 $self->{document}->append_child ($doctype);
3479
3480 if ($token->{quirks} or $doctype_name ne 'HTML') {
3481 !!!cp ('t4');
3482 $self->{document}->manakai_compat_mode ('quirks');
3483 } elsif (defined $token->{public_identifier}) {
3484 my $pubid = $token->{public_identifier};
3485 $pubid =~ tr/a-z/A-z/;
3486 my $prefix = [
3487 "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
3488 "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3489 "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3490 "-//IETF//DTD HTML 2.0 LEVEL 1//",
3491 "-//IETF//DTD HTML 2.0 LEVEL 2//",
3492 "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
3493 "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
3494 "-//IETF//DTD HTML 2.0 STRICT//",
3495 "-//IETF//DTD HTML 2.0//",
3496 "-//IETF//DTD HTML 2.1E//",
3497 "-//IETF//DTD HTML 3.0//",
3498 "-//IETF//DTD HTML 3.2 FINAL//",
3499 "-//IETF//DTD HTML 3.2//",
3500 "-//IETF//DTD HTML 3//",
3501 "-//IETF//DTD HTML LEVEL 0//",
3502 "-//IETF//DTD HTML LEVEL 1//",
3503 "-//IETF//DTD HTML LEVEL 2//",
3504 "-//IETF//DTD HTML LEVEL 3//",
3505 "-//IETF//DTD HTML STRICT LEVEL 0//",
3506 "-//IETF//DTD HTML STRICT LEVEL 1//",
3507 "-//IETF//DTD HTML STRICT LEVEL 2//",
3508 "-//IETF//DTD HTML STRICT LEVEL 3//",
3509 "-//IETF//DTD HTML STRICT//",
3510 "-//IETF//DTD HTML//",
3511 "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
3512 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
3513 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
3514 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
3515 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
3516 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
3517 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
3518 "-//NETSCAPE COMM. CORP.//DTD HTML//",
3519 "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
3520 "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
3521 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
3522 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
3523 "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
3524 "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
3525 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
3526 "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
3527 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
3528 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
3529 "-//W3C//DTD HTML 3 1995-03-24//",
3530 "-//W3C//DTD HTML 3.2 DRAFT//",
3531 "-//W3C//DTD HTML 3.2 FINAL//",
3532 "-//W3C//DTD HTML 3.2//",
3533 "-//W3C//DTD HTML 3.2S DRAFT//",
3534 "-//W3C//DTD HTML 4.0 FRAMESET//",
3535 "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
3536 "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
3537 "-//W3C//DTD HTML EXPERIMENTAL 970421//",
3538 "-//W3C//DTD W3 HTML//",
3539 "-//W3O//DTD W3 HTML 3.0//",
3540 "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
3541 "-//WEBTECHS//DTD MOZILLA HTML//",
3542 ]; # $prefix
3543 my $match;
3544 for (@$prefix) {
3545 if (substr ($prefix, 0, length $_) eq $_) {
3546 $match = 1;
3547 last;
3548 }
3549 }
3550 if ($match or
3551 $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
3552 $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
3553 $pubid eq "HTML") {
3554 !!!cp ('t5');
3555 $self->{document}->manakai_compat_mode ('quirks');
3556 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
3557 $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
3558 if (defined $token->{system_identifier}) {
3559 !!!cp ('t6');
3560 $self->{document}->manakai_compat_mode ('quirks');
3561 } else {
3562 !!!cp ('t7');
3563 $self->{document}->manakai_compat_mode ('limited quirks');
3564 }
3565 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
3566 $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
3567 !!!cp ('t8');
3568 $self->{document}->manakai_compat_mode ('limited quirks');
3569 } else {
3570 !!!cp ('t9');
3571 }
3572 } else {
3573 !!!cp ('t10');
3574 }
3575 if (defined $token->{system_identifier}) {
3576 my $sysid = $token->{system_identifier};
3577 $sysid =~ tr/A-Z/a-z/;
3578 if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
3579 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
3580 ## marked as quirks.
3581 $self->{document}->manakai_compat_mode ('quirks');
3582 !!!cp ('t11');
3583 } else {
3584 !!!cp ('t12');
3585 }
3586 } else {
3587 !!!cp ('t13');
3588 }
3589
3590 ## Go to the "before html" insertion mode.
3591 !!!next-token;
3592 return;
3593 } elsif ({
3594 START_TAG_TOKEN, 1,
3595 END_TAG_TOKEN, 1,
3596 END_OF_FILE_TOKEN, 1,
3597 }->{$token->{type}}) {
3598 !!!cp ('t14');
3599 !!!parse-error (type => 'no DOCTYPE', token => $token);
3600 $self->{document}->manakai_compat_mode ('quirks');
3601 ## Go to the "before html" insertion mode.
3602 ## reprocess
3603 !!!ack-later;
3604 return;
3605 } elsif ($token->{type} == CHARACTER_TOKEN) {
3606 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3607 ## Ignore the token
3608
3609 unless (length $token->{data}) {
3610 !!!cp ('t15');
3611 ## Stay in the insertion mode.
3612 !!!next-token;
3613 redo INITIAL;
3614 } else {
3615 !!!cp ('t16');
3616 }
3617 } else {
3618 !!!cp ('t17');
3619 }
3620
3621 !!!parse-error (type => 'no DOCTYPE', token => $token);
3622 $self->{document}->manakai_compat_mode ('quirks');
3623 ## Go to the "before html" insertion mode.
3624 ## reprocess
3625 return;
3626 } elsif ($token->{type} == COMMENT_TOKEN) {
3627 !!!cp ('t18');
3628 my $comment = $self->{document}->create_comment ($token->{data});
3629 $self->{document}->append_child ($comment);
3630
3631 ## Stay in the insertion mode.
3632 !!!next-token;
3633 redo INITIAL;
3634 } else {
3635 die "$0: $token->{type}: Unknown token type";
3636 }
3637 } # INITIAL
3638
3639 die "$0: _tree_construction_initial: This should be never reached";
3640 } # _tree_construction_initial
3641
3642 sub _tree_construction_root_element ($) {
3643 my $self = shift;
3644
3645 ## NOTE: "before html" insertion mode.
3646
3647 B: {
3648 if ($token->{type} == DOCTYPE_TOKEN) {
3649 !!!cp ('t19');
3650 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
3651 ## Ignore the token
3652 ## Stay in the insertion mode.
3653 !!!next-token;
3654 redo B;
3655 } elsif ($token->{type} == COMMENT_TOKEN) {
3656 !!!cp ('t20');
3657 my $comment = $self->{document}->create_comment ($token->{data});
3658 $self->{document}->append_child ($comment);
3659 ## Stay in the insertion mode.
3660 !!!next-token;
3661 redo B;
3662 } elsif ($token->{type} == CHARACTER_TOKEN) {
3663 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3664 ## Ignore the token.
3665
3666 unless (length $token->{data}) {
3667 !!!cp ('t21');
3668 ## Stay in the insertion mode.
3669 !!!next-token;
3670 redo B;
3671 } else {
3672 !!!cp ('t22');
3673 }
3674 } else {
3675 !!!cp ('t23');
3676 }
3677
3678 $self->{application_cache_selection}->(undef);
3679
3680 #
3681 } elsif ($token->{type} == START_TAG_TOKEN) {
3682 if ($token->{tag_name} eq 'html') {
3683 my $root_element;
3684 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
3685 $self->{document}->append_child ($root_element);
3686 push @{$self->{open_elements}},
3687 [$root_element, $el_category->{html}];
3688
3689 if ($token->{attributes}->{manifest}) {
3690 !!!cp ('t24');
3691 $self->{application_cache_selection}
3692 ->($token->{attributes}->{manifest}->{value});
3693 ## ISSUE: Spec is unclear on relative references.
3694 ## According to Hixie (#whatwg 2008-03-19), it should be
3695 ## resolved against the base URI of the document in HTML
3696 ## or xml:base of the element in XHTML.
3697 } else {
3698 !!!cp ('t25');
3699 $self->{application_cache_selection}->(undef);
3700 }
3701
3702 !!!nack ('t25c');
3703
3704 !!!next-token;
3705 return; ## Go to the "before head" insertion mode.
3706 } else {
3707 !!!cp ('t25.1');
3708 #
3709 }
3710 } elsif ({
3711 END_TAG_TOKEN, 1,
3712 END_OF_FILE_TOKEN, 1,
3713 }->{$token->{type}}) {
3714 !!!cp ('t26');
3715 #
3716 } else {
3717 die "$0: $token->{type}: Unknown token type";
3718 }
3719
3720 my $root_element;
3721 !!!create-element ($root_element, $HTML_NS, 'html',, $token);
3722 $self->{document}->append_child ($root_element);
3723 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
3724
3725 $self->{application_cache_selection}->(undef);
3726
3727 ## NOTE: Reprocess the token.
3728 !!!ack-later;
3729 return; ## Go to the "before head" insertion mode.
3730
3731 ## ISSUE: There is an issue in the spec
3732 } # B
3733
3734 die "$0: _tree_construction_root_element: This should never be reached";
3735 } # _tree_construction_root_element
3736
3737 sub _reset_insertion_mode ($) {
3738 my $self = shift;
3739
3740 ## Step 1
3741 my $last;
3742
3743 ## Step 2
3744 my $i = -1;
3745 my $node = $self->{open_elements}->[$i];
3746
3747 ## Step 3
3748 S3: {
3749 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
3750 $last = 1;
3751 if (defined $self->{inner_html_node}) {
3752 !!!cp ('t28');
3753 $node = $self->{inner_html_node};
3754 } else {
3755 die "_reset_insertion_mode: t27";
3756 }
3757 }
3758
3759 ## Step 4..14
3760 my $new_mode;
3761 if ($node->[1] & FOREIGN_EL) {
3762 !!!cp ('t28.1');
3763 ## NOTE: Strictly spaking, the line below only applies to MathML and
3764 ## SVG elements. Currently the HTML syntax supports only MathML and
3765 ## SVG elements as foreigners.
3766 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
3767 } elsif ($node->[1] & TABLE_CELL_EL) {
3768 if ($last) {
3769 !!!cp ('t28.2');
3770 #
3771 } else {
3772 !!!cp ('t28.3');
3773 $new_mode = IN_CELL_IM;
3774 }
3775 } else {
3776 !!!cp ('t28.4');
3777 $new_mode = {
3778 select => IN_SELECT_IM,
3779 ## NOTE: |option| and |optgroup| do not set
3780 ## insertion mode to "in select" by themselves.
3781 tr => IN_ROW_IM,
3782 tbody => IN_TABLE_BODY_IM,
3783 thead => IN_TABLE_BODY_IM,
3784 tfoot => IN_TABLE_BODY_IM,
3785 caption => IN_CAPTION_IM,
3786 colgroup => IN_COLUMN_GROUP_IM,
3787 table => IN_TABLE_IM,
3788 head => IN_BODY_IM, # not in head!
3789 body => IN_BODY_IM,
3790 frameset => IN_FRAMESET_IM,
3791 }->{$node->[0]->manakai_local_name};
3792 }
3793 $self->{insertion_mode} = $new_mode and return if defined $new_mode;
3794
3795 ## Step 15
3796 if ($node->[1] & HTML_EL) {
3797 unless (defined $self->{head_element}) {
3798 !!!cp ('t29');
3799 $self->{insertion_mode} = BEFORE_HEAD_IM;
3800 } else {
3801 ## ISSUE: Can this state be reached?
3802 !!!cp ('t30');
3803 $self->{insertion_mode} = AFTER_HEAD_IM;
3804 }
3805 return;
3806 } else {
3807 !!!cp ('t31');
3808 }
3809
3810 ## Step 16
3811 $self->{insertion_mode} = IN_BODY_IM and return if $last;
3812
3813 ## Step 17
3814 $i--;
3815 $node = $self->{open_elements}->[$i];
3816
3817 ## Step 18
3818 redo S3;
3819 } # S3
3820
3821 die "$0: _reset_insertion_mode: This line should never be reached";
3822 } # _reset_insertion_mode
3823
3824 sub _tree_construction_main ($) {
3825 my $self = shift;
3826
3827 my $active_formatting_elements = [];
3828
3829 my $reconstruct_active_formatting_elements = sub { # MUST
3830 my $insert = shift;
3831
3832 ## Step 1
3833 return unless @$active_formatting_elements;
3834
3835 ## Step 3
3836 my $i = -1;
3837 my $entry = $active_formatting_elements->[$i];
3838
3839 ## Step 2
3840 return if $entry->[0] eq '#marker';
3841 for (@{$self->{open_elements}}) {
3842 if ($entry->[0] eq $_->[0]) {
3843 !!!cp ('t32');
3844 return;
3845 }
3846 }
3847
3848 S4: {
3849 ## Step 4
3850 last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
3851
3852 ## Step 5
3853 $i--;
3854 $entry = $active_formatting_elements->[$i];
3855
3856 ## Step 6
3857 if ($entry->[0] eq '#marker') {
3858 !!!cp ('t33_1');
3859 #
3860 } else {
3861 my $in_open_elements;
3862 OE: for (@{$self->{open_elements}}) {
3863 if ($entry->[0] eq $_->[0]) {
3864 !!!cp ('t33');
3865 $in_open_elements = 1;
3866 last OE;
3867 }
3868 }
3869 if ($in_open_elements) {
3870 !!!cp ('t34');
3871 #
3872 } else {
3873 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
3874 !!!cp ('t35');
3875 redo S4;
3876 }
3877 }
3878
3879 ## Step 7
3880 $i++;
3881 $entry = $active_formatting_elements->[$i];
3882 } # S4
3883
3884 S7: {
3885 ## Step 8
3886 my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
3887
3888 ## Step 9
3889 $insert->($clone->[0]);
3890 push @{$self->{open_elements}}, $clone;
3891
3892 ## Step 10
3893 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
3894
3895 ## Step 11
3896 unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
3897 !!!cp ('t36');
3898 ## Step 7'
3899 $i++;
3900 $entry = $active_formatting_elements->[$i];
3901
3902 redo S7;
3903 }
3904
3905 !!!cp ('t37');
3906 } # S7
3907 }; # $reconstruct_active_formatting_elements
3908
3909 my $clear_up_to_marker = sub {
3910 for (reverse 0..$#$active_formatting_elements) {
3911 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3912 !!!cp ('t38');
3913 splice @$active_formatting_elements, $_;
3914 return;
3915 }
3916 }
3917
3918 !!!cp ('t39');
3919 }; # $clear_up_to_marker
3920
3921 my $insert;
3922
3923 my $parse_rcdata = sub ($) {
3924 my ($content_model_flag) = @_;
3925
3926 ## Step 1
3927 my $start_tag_name = $token->{tag_name};
3928 my $el;
3929 !!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token);
3930
3931 ## Step 2
3932 $insert->($el);
3933
3934 ## Step 3
3935 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3936 delete $self->{escape}; # MUST
3937
3938 ## Step 4
3939 my $text = '';
3940 !!!nack ('t40.1');
3941 !!!next-token;
3942 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
3943 !!!cp ('t40');
3944 $text .= $token->{data};
3945 !!!next-token;
3946 }
3947
3948 ## Step 5
3949 if (length $text) {
3950 !!!cp ('t41');
3951 my $text = $self->{document}->create_text_node ($text);
3952 $el->append_child ($text);
3953 }
3954
3955 ## Step 6
3956 $self->{content_model} = PCDATA_CONTENT_MODEL;
3957
3958 ## Step 7
3959 if ($token->{type} == END_TAG_TOKEN and
3960 $token->{tag_name} eq $start_tag_name) {
3961 !!!cp ('t42');
3962 ## Ignore the token
3963 } else {
3964 ## NOTE: An end-of-file token.
3965 if ($content_model_flag == CDATA_CONTENT_MODEL) {
3966 !!!cp ('t43');
3967 !!!parse-error (type => 'in CDATA:#eof', token => $token);
3968 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3969 !!!cp ('t44');
3970 !!!parse-error (type => 'in RCDATA:#eof', token => $token);
3971 } else {
3972 die "$0: $content_model_flag in parse_rcdata";
3973 }
3974 }
3975 !!!next-token;
3976 }; # $parse_rcdata
3977
3978 my $script_start_tag = sub () {
3979 my $script_el;
3980 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
3981 ## TODO: mark as "parser-inserted"
3982
3983 $self->{content_model} = CDATA_CONTENT_MODEL;
3984 delete $self->{escape}; # MUST
3985
3986 my $text = '';
3987 !!!nack ('t45.1');
3988 !!!next-token;
3989 while ($token->{type} == CHARACTER_TOKEN) {
3990 !!!cp ('t45');
3991 $text .= $token->{data};
3992 !!!next-token;
3993 } # stop if non-character token or tokenizer stops tokenising
3994 if (length $text) {
3995 !!!cp ('t46');
3996 $script_el->manakai_append_text ($text);
3997 }
3998
3999 $self->{content_model} = PCDATA_CONTENT_MODEL;
4000
4001 if ($token->{type} == END_TAG_TOKEN and
4002 $token->{tag_name} eq 'script') {
4003 !!!cp ('t47');
4004 ## Ignore the token
4005 } else {
4006 !!!cp ('t48');
4007 !!!parse-error (type => 'in CDATA:#eof', token => $token);
4008 ## ISSUE: And ignore?
4009 ## TODO: mark as "already executed"
4010 }
4011
4012 if (defined $self->{inner_html_node}) {
4013 !!!cp ('t49');
4014 ## TODO: mark as "already executed"
4015 } else {
4016 !!!cp ('t50');
4017 ## TODO: $old_insertion_point = current insertion point
4018 ## TODO: insertion point = just before the next input character
4019
4020 $insert->($script_el);
4021
4022 ## TODO: insertion point = $old_insertion_point (might be "undefined")
4023
4024 ## TODO: if there is a script that will execute as soon as the parser resume, then...
4025 }
4026
4027 !!!next-token;
4028 }; # $script_start_tag
4029
4030 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
4031 ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
4032 my $open_tables = [[$self->{open_elements}->[0]->[0]]];
4033
4034 my $formatting_end_tag = sub {
4035 my $end_tag_token = shift;
4036 my $tag_name = $end_tag_token->{tag_name};
4037
4038 ## NOTE: The adoption agency algorithm (AAA).
4039
4040 FET: {
4041 ## Step 1
4042 my $formatting_element;
4043 my $formatting_element_i_in_active;
4044 AFE: for (reverse 0..$#$active_formatting_elements) {
4045 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
4046 !!!cp ('t52');
4047 last AFE;
4048 } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
4049 eq $tag_name) {
4050 !!!cp ('t51');
4051 $formatting_element = $active_formatting_elements->[$_];
4052 $formatting_element_i_in_active = $_;
4053 last AFE;
4054 }
4055 } # AFE
4056 unless (defined $formatting_element) {
4057 !!!cp ('t53');
4058 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
4059 ## Ignore the token
4060 !!!next-token;
4061 return;
4062 }
4063 ## has an element in scope
4064 my $in_scope = 1;
4065 my $formatting_element_i_in_open;
4066 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4067 my $node = $self->{open_elements}->[$_];
4068 if ($node->[0] eq $formatting_element->[0]) {
4069 if ($in_scope) {
4070 !!!cp ('t54');
4071 $formatting_element_i_in_open = $_;
4072 last INSCOPE;
4073 } else { # in open elements but not in scope
4074 !!!cp ('t55');
4075 !!!parse-error (type => 'unmatched end tag',
4076 text => $token->{tag_name},
4077 token => $end_tag_token);
4078 ## Ignore the token
4079 !!!next-token;
4080 return;
4081 }
4082 } elsif ($node->[1] & SCOPING_EL) {
4083 !!!cp ('t56');
4084 $in_scope = 0;
4085 }
4086 } # INSCOPE
4087 unless (defined $formatting_element_i_in_open) {
4088 !!!cp ('t57');
4089 !!!parse-error (type => 'unmatched end tag',
4090 text => $token->{tag_name},
4091 token => $end_tag_token);
4092 pop @$active_formatting_elements; # $formatting_element
4093 !!!next-token; ## TODO: ok?
4094 return;
4095 }
4096 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
4097 !!!cp ('t58');
4098 !!!parse-error (type => 'not closed',
4099 text => $self->{open_elements}->[-1]->[0]
4100 ->manakai_local_name,
4101 token => $end_tag_token);
4102 }
4103
4104 ## Step 2
4105 my $furthest_block;
4106 my $furthest_block_i_in_open;
4107 OE: for (reverse 0..$#{$self->{open_elements}}) {
4108 my $node = $self->{open_elements}->[$_];
4109 if (not ($node->[1] & FORMATTING_EL) and
4110 #not $phrasing_category->{$node->[1]} and
4111 ($node->[1] & SPECIAL_EL or
4112 $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
4113 !!!cp ('t59');
4114 $furthest_block = $node;
4115 $furthest_block_i_in_open = $_;
4116 } elsif ($node->[0] eq $formatting_element->[0]) {
4117 !!!cp ('t60');
4118 last OE;
4119 }
4120 } # OE
4121
4122 ## Step 3
4123 unless (defined $furthest_block) { # MUST
4124 !!!cp ('t61');
4125 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
4126 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
4127 !!!next-token;
4128 return;
4129 }
4130
4131 ## Step 4
4132 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
4133
4134 ## Step 5
4135 my $furthest_block_parent = $furthest_block->[0]->parent_node;
4136 if (defined $furthest_block_parent) {
4137 !!!cp ('t62');
4138 $furthest_block_parent->remove_child ($furthest_block->[0]);
4139 }
4140
4141 ## Step 6
4142 my $bookmark_prev_el
4143 = $active_formatting_elements->[$formatting_element_i_in_active - 1]
4144 ->[0];
4145
4146 ## Step 7
4147 my $node = $furthest_block;
4148 my $node_i_in_open = $furthest_block_i_in_open;
4149 my $last_node = $furthest_block;
4150 S7: {
4151 ## Step 1
4152 $node_i_in_open--;
4153 $node = $self->{open_elements}->[$node_i_in_open];
4154
4155 ## Step 2
4156 my $node_i_in_active;
4157 S7S2: {
4158 for (reverse 0..$#$active_formatting_elements) {
4159 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4160 !!!cp ('t63');
4161 $node_i_in_active = $_;
4162 last S7S2;
4163 }
4164 }
4165 splice @{$self->{open_elements}}, $node_i_in_open, 1;
4166 redo S7;
4167 } # S7S2
4168
4169 ## Step 3
4170 last S7 if $node->[0] eq $formatting_element->[0];
4171
4172 ## Step 4
4173 if ($last_node->[0] eq $furthest_block->[0]) {
4174 !!!cp ('t64');
4175 $bookmark_prev_el = $node->[0];
4176 }
4177
4178 ## Step 5
4179 if ($node->[0]->has_child_nodes ()) {
4180 !!!cp ('t65');
4181 my $clone = [$node->[0]->clone_node (0), $node->[1]];
4182 $active_formatting_elements->[$node_i_in_active] = $clone;
4183 $self->{open_elements}->[$node_i_in_open] = $clone;
4184 $node = $clone;
4185 }
4186
4187 ## Step 6
4188 $node->[0]->append_child ($last_node->[0]);
4189
4190 ## Step 7
4191 $last_node = $node;
4192
4193 ## Step 8
4194 redo S7;
4195 } # S7
4196
4197 ## Step 8
4198 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
4199 my $foster_parent_element;
4200 my $next_sibling;
4201 OE: for (reverse 0..$#{$self->{open_elements}}) {
4202 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
4203 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4204 if (defined $parent and $parent->node_type == 1) {
4205 !!!cp ('t65.1');
4206 $foster_parent_element = $parent;
4207 $next_sibling = $self->{open_elements}->[$_]->[0];
4208 } else {
4209 !!!cp ('t65.2');
4210 $foster_parent_element
4211 = $self->{open_elements}->[$_ - 1]->[0];
4212 }
4213 last OE;
4214 }
4215 } # OE
4216 $foster_parent_element = $self->{open_elements}->[0]->[0]
4217 unless defined $foster_parent_element;
4218 $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
4219 $open_tables->[-1]->[1] = 1; # tainted
4220 } else {
4221 !!!cp ('t65.3');
4222 $common_ancestor_node->[0]->append_child ($last_node->[0]);
4223 }
4224
4225 ## Step 9
4226 my $clone = [$formatting_element->[0]->clone_node (0),
4227 $formatting_element->[1]];
4228
4229 ## Step 10
4230 my @cn = @{$furthest_block->[0]->child_nodes};
4231 $clone->[0]->append_child ($_) for @cn;
4232
4233 ## Step 11
4234 $furthest_block->[0]->append_child ($clone->[0]);
4235
4236 ## Step 12
4237 my $i;
4238 AFE: for (reverse 0..$#$active_formatting_elements) {
4239 if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
4240 !!!cp ('t66');
4241 splice @$active_formatting_elements, $_, 1;
4242 $i-- and last AFE if defined $i;
4243 } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
4244 !!!cp ('t67');
4245 $i = $_;
4246 }
4247 } # AFE
4248 splice @$active_formatting_elements, $i + 1, 0, $clone;
4249
4250 ## Step 13
4251 undef $i;
4252 OE: for (reverse 0..$#{$self->{open_elements}}) {
4253 if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
4254 !!!cp ('t68');
4255 splice @{$self->{open_elements}}, $_, 1;
4256 $i-- and last OE if defined $i;
4257 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
4258 !!!cp ('t69');
4259 $i = $_;
4260 }
4261 } # OE
4262 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
4263
4264 ## Step 14
4265 redo FET;
4266 } # FET
4267 }; # $formatting_end_tag
4268
4269 $insert = my $insert_to_current = sub {
4270 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
4271 }; # $insert_to_current
4272
4273 my $insert_to_foster = sub {
4274 my $child = shift;
4275 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
4276 # MUST
4277 my $foster_parent_element;
4278 my $next_sibling;
4279 OE: for (reverse 0..$#{$self->{open_elements}}) {
4280 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
4281 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4282 if (defined $parent and $parent->node_type == 1) {
4283 !!!cp ('t70');
4284 $foster_parent_element = $parent;
4285 $next_sibling = $self->{open_elements}->[$_]->[0];
4286 } else {
4287 !!!cp ('t71');
4288 $foster_parent_element
4289 = $self->{open_elements}->[$_ - 1]->[0];
4290 }
4291 last OE;
4292 }
4293 } # OE
4294 $foster_parent_element = $self->{open_elements}->[0]->[0]
4295 unless defined $foster_parent_element;
4296 $foster_parent_element->insert_before
4297 ($child, $next_sibling);
4298 $open_tables->[-1]->[1] = 1; # tainted
4299 } else {
4300 !!!cp ('t72');
4301 $self->{open_elements}->[-1]->[0]->append_child ($child);
4302 }
4303 }; # $insert_to_foster
4304
4305 B: while (1) {
4306 if ($token->{type} == DOCTYPE_TOKEN) {
4307 !!!cp ('t73');
4308 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
4309 ## Ignore the token
4310 ## Stay in the phase
4311 !!!next-token;
4312 next B;
4313 } elsif ($token->{type} == START_TAG_TOKEN and
4314 $token->{tag_name} eq 'html') {
4315 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4316 !!!cp ('t79');
4317 !!!parse-error (type => 'after html', text => 'html', token => $token);
4318 $self->{insertion_mode} = AFTER_BODY_IM;
4319 } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
4320 !!!cp ('t80');
4321 !!!parse-error (type => 'after html', text => 'html', token => $token);
4322 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4323 } else {
4324 !!!cp ('t81');
4325 }
4326
4327 !!!cp ('t82');
4328 !!!parse-error (type => 'not first start tag', token => $token);
4329 my $top_el = $self->{open_elements}->[0]->[0];
4330 for my $attr_name (keys %{$token->{attributes}}) {
4331 unless ($top_el->has_attribute_ns (undef, $attr_name)) {
4332 !!!cp ('t84');
4333 $top_el->set_attribute_ns
4334 (undef, [undef, $attr_name],
4335 $token->{attributes}->{$attr_name}->{value});
4336 }
4337 }
4338 !!!nack ('t84.1');
4339 !!!next-token;
4340 next B;
4341 } elsif ($token->{type} == COMMENT_TOKEN) {
4342 my $comment = $self->{document}->create_comment ($token->{data});
4343 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
4344 !!!cp ('t85');
4345 $self->{document}->append_child ($comment);
4346 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
4347 !!!cp ('t86');
4348 $self->{open_elements}->[0]->[0]->append_child ($comment);
4349 } else {
4350 !!!cp ('t87');
4351 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4352 }
4353 !!!next-token;
4354 next B;
4355 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
4356 if ($token->{type} == CHARACTER_TOKEN) {
4357 !!!cp ('t87.1');
4358 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4359 !!!next-token;
4360 next B;
4361 } elsif ($token->{type} == START_TAG_TOKEN) {
4362 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
4363 $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
4364 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
4365 ($token->{tag_name} eq 'svg' and
4366 $self->{open_elements}->[-1]->[1] & MML_AXML_EL)) {
4367 ## NOTE: "using the rules for secondary insertion mode"then"continue"
4368 !!!cp ('t87.2');
4369 #
4370 } elsif ({
4371 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
4372 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
4373 em => 1, embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1,
4374 h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
4375 img => 1, li => 1, listing => 1, menu => 1, meta => 1,
4376 nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
4377 small => 1, span => 1, strong => 1, strike => 1, sub => 1,
4378 sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
4379 }->{$token->{tag_name}}) {
4380 !!!cp ('t87.2');
4381 !!!parse-error (type => 'not closed',
4382 text => $self->{open_elements}->[-1]->[0]
4383 ->manakai_local_name,
4384 token => $token);
4385
4386 pop @{$self->{open_elements}}
4387 while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4388
4389 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4390 ## Reprocess.
4391 next B;
4392 } else {
4393 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
4394 my $tag_name = $token->{tag_name};
4395 if ($nsuri eq $SVG_NS) {
4396 $tag_name = {
4397 altglyph => 'altGlyph',
4398 altglyphdef => 'altGlyphDef',
4399 altglyphitem => 'altGlyphItem',
4400 animatecolor => 'animateColor',
4401 animatemotion => 'animateMotion',
4402 animatetransform => 'animateTransform',
4403 clippath => 'clipPath',
4404 feblend => 'feBlend',
4405 fecolormatrix => 'feColorMatrix',
4406 fecomponenttransfer => 'feComponentTransfer',
4407 fecomposite => 'feComposite',
4408 feconvolvematrix => 'feConvolveMatrix',
4409 fediffuselighting => 'feDiffuseLighting',
4410 fedisplacementmap => 'feDisplacementMap',
4411 fedistantlight => 'feDistantLight',
4412 feflood => 'feFlood',
4413 fefunca => 'feFuncA',
4414 fefuncb => 'feFuncB',
4415 fefuncg => 'feFuncG',
4416 fefuncr => 'feFuncR',
4417 fegaussianblur => 'feGaussianBlur',
4418 feimage => 'feImage',
4419 femerge => 'feMerge',
4420 femergenode => 'feMergeNode',
4421 femorphology => 'feMorphology',
4422 feoffset => 'feOffset',
4423 fepointlight => 'fePointLight',
4424 fespecularlighting => 'feSpecularLighting',
4425 fespotlight => 'feSpotLight',
4426 fetile => 'feTile',
4427 feturbulence => 'feTurbulence',
4428 foreignobject => 'foreignObject',
4429 glyphref => 'glyphRef',
4430 lineargradient => 'linearGradient',
4431 radialgradient => 'radialGradient',
4432 #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
4433 textpath => 'textPath',
4434 }->{$tag_name} || $tag_name;
4435 }
4436
4437 ## "adjust SVG attributes" (SVG only) - done in insert-element-f
4438
4439 ## "adjust foreign attributes" - done in insert-element-f
4440
4441 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
4442
4443 if ($self->{self_closing}) {
4444 pop @{$self->{open_elements}};
4445 !!!ack ('t87.3');
4446 } else {
4447 !!!cp ('t87.4');
4448 }
4449
4450 !!!next-token;
4451 next B;
4452 }
4453 } elsif ($token->{type} == END_TAG_TOKEN) {
4454 ## NOTE: "using the rules for secondary insertion mode" then "continue"
4455 !!!cp ('t87.5');
4456 #
4457 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4458 !!!cp ('t87.6');
4459 !!!parse-error (type => 'not closed',
4460 text => $self->{open_elements}->[-1]->[0]
4461 ->manakai_local_name,
4462 token => $token);
4463
4464 pop @{$self->{open_elements}}
4465 while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4466
4467 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4468 ## Reprocess.
4469 next B;
4470 } else {
4471 die "$0: $token->{type}: Unknown token type";
4472 }
4473 }
4474
4475 if ($self->{insertion_mode} & HEAD_IMS) {
4476 if ($token->{type} == CHARACTER_TOKEN) {
4477 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4478 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4479 !!!cp ('t88.2');
4480 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4481 } else {
4482 !!!cp ('t88.1');
4483 ## Ignore the token.
4484 !!!next-token;
4485 next B;
4486 }
4487 unless (length $token->{data}) {
4488 !!!cp ('t88');
4489 !!!next-token;
4490 next B;
4491 }
4492 }
4493
4494 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4495 !!!cp ('t89');
4496 ## As if <head>
4497 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4498 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4499 push @{$self->{open_elements}},
4500 [$self->{head_element}, $el_category->{head}];
4501
4502 ## Reprocess in the "in head" insertion mode...
4503 pop @{$self->{open_elements}};
4504
4505 ## Reprocess in the "after head" insertion mode...
4506 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4507 !!!cp ('t90');
4508 ## As if </noscript>
4509 pop @{$self->{open_elements}};
4510 !!!parse-error (type => 'in noscript:#text', token => $token);
4511
4512 ## Reprocess in the "in head" insertion mode...
4513 ## As if </head>
4514 pop @{$self->{open_elements}};
4515
4516 ## Reprocess in the "after head" insertion mode...
4517 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4518 !!!cp ('t91');
4519 pop @{$self->{open_elements}};
4520
4521 ## Reprocess in the "after head" insertion mode...
4522 } else {
4523 !!!cp ('t92');
4524 }
4525
4526 ## "after head" insertion mode
4527 ## As if <body>
4528 !!!insert-element ('body',, $token);
4529 $self->{insertion_mode} = IN_BODY_IM;
4530 ## reprocess
4531 next B;
4532 } elsif ($token->{type} == START_TAG_TOKEN) {
4533 if ($token->{tag_name} eq 'head') {
4534 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4535 !!!cp ('t93');
4536 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
4537 $self->{open_elements}->[-1]->[0]->append_child
4538 ($self->{head_element});
4539 push @{$self->{open_elements}},
4540 [$self->{head_element}, $el_category->{head}];
4541 $self->{insertion_mode} = IN_HEAD_IM;
4542 !!!nack ('t93.1');
4543 !!!next-token;
4544 next B;
4545 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4546 !!!cp ('t93.2');
4547 !!!parse-error (type => 'after head', text => 'head',
4548 token => $token);
4549 ## Ignore the token
4550 !!!nack ('t93.3');
4551 !!!next-token;
4552 next B;
4553 } else {
4554 !!!cp ('t95');
4555 !!!parse-error (type => 'in head:head',
4556 token => $token); # or in head noscript
4557 ## Ignore the token
4558 !!!nack ('t95.1');
4559 !!!next-token;
4560 next B;
4561 }
4562 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4563 !!!cp ('t96');
4564 ## As if <head>
4565 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4566 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4567 push @{$self->{open_elements}},
4568 [$self->{head_element}, $el_category->{head}];
4569
4570 $self->{insertion_mode} = IN_HEAD_IM;
4571 ## Reprocess in the "in head" insertion mode...
4572 } else {
4573 !!!cp ('t97');
4574 }
4575
4576 if ($token->{tag_name} eq 'base') {
4577 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4578 !!!cp ('t98');
4579 ## As if </noscript>
4580 pop @{$self->{open_elements}};
4581 !!!parse-error (type => 'in noscript', text => 'base',
4582 token => $token);
4583
4584 $self->{insertion_mode} = IN_HEAD_IM;
4585 ## Reprocess in the "in head" insertion mode...
4586 } else {
4587 !!!cp ('t99');
4588 }
4589
4590 ## NOTE: There is a "as if in head" code clone.
4591 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4592 !!!cp ('t100');
4593 !!!parse-error (type => 'after head',
4594 text => $token->{tag_name}, token => $token);
4595 push @{$self->{open_elements}},
4596 [$self->{head_element}, $el_category->{head}];
4597 } else {
4598 !!!cp ('t101');
4599 }
4600 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4601 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4602 pop @{$self->{open_elements}} # <head>
4603 if $self->{insertion_mode} == AFTER_HEAD_IM;
4604 !!!nack ('t101.1');
4605 !!!next-token;
4606 next B;
4607 } elsif ($token->{tag_name} eq 'link') {
4608 ## NOTE: There is a "as if in head" code clone.
4609 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4610 !!!cp ('t102');
4611 !!!parse-error (type => 'after head',
4612 text => $token->{tag_name}, token => $token);
4613 push @{$self->{open_elements}},
4614 [$self->{head_element}, $el_category->{head}];
4615 } else {
4616 !!!cp ('t103');
4617 }
4618 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4619 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4620 pop @{$self->{open_elements}} # <head>
4621 if $self->{insertion_mode} == AFTER_HEAD_IM;
4622 !!!ack ('t103.1');
4623 !!!next-token;
4624 next B;
4625 } elsif ($token->{tag_name} eq 'meta') {
4626 ## NOTE: There is a "as if in head" code clone.
4627 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4628 !!!cp ('t104');
4629 !!!parse-error (type => 'after head',
4630 text => $token->{tag_name}, token => $token);
4631 push @{$self->{open_elements}},
4632 [$self->{head_element}, $el_category->{head}];
4633 } else {
4634 !!!cp ('t105');
4635 }
4636 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4637 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4638
4639 unless ($self->{confident}) {
4640 if ($token->{attributes}->{charset}) {
4641 !!!cp ('t106');
4642 ## NOTE: Whether the encoding is supported or not is handled
4643 ## in the {change_encoding} callback.
4644 $self->{change_encoding}
4645 ->($self, $token->{attributes}->{charset}->{value},
4646 $token);
4647
4648 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4649 ->set_user_data (manakai_has_reference =>
4650 $token->{attributes}->{charset}
4651 ->{has_reference});
4652 } elsif ($token->{attributes}->{content}) {
4653 if ($token->{attributes}->{content}->{value}
4654 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4655 [\x09-\x0D\x20]*=
4656 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4657 ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
4658 !!!cp ('t107');
4659 ## NOTE: Whether the encoding is supported or not is handled
4660 ## in the {change_encoding} callback.
4661 $self->{change_encoding}
4662 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
4663 $token);
4664 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4665 ->set_user_data (manakai_has_reference =>
4666 $token->{attributes}->{content}
4667 ->{has_reference});
4668 } else {
4669 !!!cp ('t108');
4670 }
4671 }
4672 } else {
4673 if ($token->{attributes}->{charset}) {
4674 !!!cp ('t109');
4675 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4676 ->set_user_data (manakai_has_reference =>
4677 $token->{attributes}->{charset}
4678 ->{has_reference});
4679 }
4680 if ($token->{attributes}->{content}) {
4681 !!!cp ('t110');
4682 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4683 ->set_user_data (manakai_has_reference =>
4684 $token->{attributes}->{content}
4685 ->{has_reference});
4686 }
4687 }
4688
4689 pop @{$self->{open_elements}} # <head>
4690 if $self->{insertion_mode} == AFTER_HEAD_IM;
4691 !!!ack ('t110.1');
4692 !!!next-token;
4693 next B;
4694 } elsif ($token->{tag_name} eq 'title') {
4695 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4696 !!!cp ('t111');
4697 ## As if </noscript>
4698 pop @{$self->{open_elements}};
4699 !!!parse-error (type => 'in noscript', text => 'title',
4700 token => $token);
4701
4702 $self->{insertion_mode} = IN_HEAD_IM;
4703 ## Reprocess in the "in head" insertion mode...
4704 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4705 !!!cp ('t112');
4706 !!!parse-error (type => 'after head',
4707 text => $token->{tag_name}, token => $token);
4708 push @{$self->{open_elements}},
4709 [$self->{head_element}, $el_category->{head}];
4710 } else {
4711 !!!cp ('t113');
4712 }
4713
4714 ## NOTE: There is a "as if in head" code clone.
4715 my $parent = defined $self->{head_element} ? $self->{head_element}
4716 : $self->{open_elements}->[-1]->[0];
4717 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4718 pop @{$self->{open_elements}} # <head>
4719 if $self->{insertion_mode} == AFTER_HEAD_IM;
4720 next B;
4721 } elsif ($token->{tag_name} eq 'style' or
4722 $token->{tag_name} eq 'noframes') {
4723 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4724 ## insertion mode IN_HEAD_IM)
4725 ## NOTE: There is a "as if in head" code clone.
4726 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4727 !!!cp ('t114');
4728 !!!parse-error (type => 'after head',
4729 text => $token->{tag_name}, token => $token);
4730 push @{$self->{open_elements}},
4731 [$self->{head_element}, $el_category->{head}];
4732 } else {
4733 !!!cp ('t115');
4734 }
4735 $parse_rcdata->(CDATA_CONTENT_MODEL);
4736 pop @{$self->{open_elements}} # <head>
4737 if $self->{insertion_mode} == AFTER_HEAD_IM;
4738 next B;
4739 } elsif ($token->{tag_name} eq 'noscript') {
4740 if ($self->{insertion_mode} == IN_HEAD_IM) {
4741 !!!cp ('t116');
4742 ## NOTE: and scripting is disalbed
4743 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4744 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
4745 !!!nack ('t116.1');
4746 !!!next-token;
4747 next B;
4748 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4749 !!!cp ('t117');
4750 !!!parse-error (type => 'in noscript', text => 'noscript',
4751 token => $token);
4752 ## Ignore the token
4753 !!!nack ('t117.1');
4754 !!!next-token;
4755 next B;
4756 } else {
4757 !!!cp ('t118');
4758 #
4759 }
4760 } elsif ($token->{tag_name} eq 'script') {
4761 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4762 !!!cp ('t119');
4763 ## As if </noscript>
4764 pop @{$self->{open_elements}};
4765 !!!parse-error (type => 'in noscript', text => 'script',
4766 token => $token);
4767
4768 $self->{insertion_mode} = IN_HEAD_IM;
4769 ## Reprocess in the "in head" insertion mode...
4770 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4771 !!!cp ('t120');
4772 !!!parse-error (type => 'after head',
4773 text => $token->{tag_name}, token => $token);
4774 push @{$self->{open_elements}},
4775 [$self->{head_element}, $el_category->{head}];
4776 } else {
4777 !!!cp ('t121');
4778 }
4779
4780 ## NOTE: There is a "as if in head" code clone.
4781 $script_start_tag->();
4782 pop @{$self->{open_elements}} # <head>
4783 if $self->{insertion_mode} == AFTER_HEAD_IM;
4784 next B;
4785 } elsif ($token->{tag_name} eq 'body' or
4786 $token->{tag_name} eq 'frameset') {
4787 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4788 !!!cp ('t122');
4789 ## As if </noscript>
4790 pop @{$self->{open_elements}};
4791 !!!parse-error (type => 'in noscript',
4792 text => $token->{tag_name}, token => $token);
4793
4794 ## Reprocess in the "in head" insertion mode...
4795 ## As if </head>
4796 pop @{$self->{open_elements}};
4797
4798 ## Reprocess in the "after head" insertion mode...
4799 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4800 !!!cp ('t124');
4801 pop @{$self->{open_elements}};
4802
4803 ## Reprocess in the "after head" insertion mode...
4804 } else {
4805 !!!cp ('t125');
4806 }
4807
4808 ## "after head" insertion mode
4809 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4810 if ($token->{tag_name} eq 'body') {
4811 !!!cp ('t126');
4812 $self->{insertion_mode} = IN_BODY_IM;
4813 } elsif ($token->{tag_name} eq 'frameset') {
4814 !!!cp ('t127');
4815 $self->{insertion_mode} = IN_FRAMESET_IM;
4816 } else {
4817 die "$0: tag name: $self->{tag_name}";
4818 }
4819 !!!nack ('t127.1');
4820 !!!next-token;
4821 next B;
4822 } else {
4823 !!!cp ('t128');
4824 #
4825 }
4826
4827 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4828 !!!cp ('t129');
4829 ## As if </noscript>
4830 pop @{$self->{open_elements}};
4831 !!!parse-error (type => 'in noscript:/',
4832 text => $token->{tag_name}, token => $token);
4833
4834 ## Reprocess in the "in head" insertion mode...
4835 ## As if </head>
4836 pop @{$self->{open_elements}};
4837
4838 ## Reprocess in the "after head" insertion mode...
4839 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4840 !!!cp ('t130');
4841 ## As if </head>
4842 pop @{$self->{open_elements}};
4843
4844 ## Reprocess in the "after head" insertion mode...
4845 } else {
4846 !!!cp ('t131');
4847 }
4848
4849 ## "after head" insertion mode
4850 ## As if <body>
4851 !!!insert-element ('body',, $token);
4852 $self->{insertion_mode} = IN_BODY_IM;
4853 ## reprocess
4854 !!!ack-later;
4855 next B;
4856 } elsif ($token->{type} == END_TAG_TOKEN) {
4857 if ($token->{tag_name} eq 'head') {
4858 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4859 !!!cp ('t132');
4860 ## As if <head>
4861 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4862 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4863 push @{$self->{open_elements}},
4864 [$self->{head_element}, $el_category->{head}];
4865
4866 ## Reprocess in the "in head" insertion mode...
4867 pop @{$self->{open_elements}};
4868 $self->{insertion_mode} = AFTER_HEAD_IM;
4869 !!!next-token;
4870 next B;
4871 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4872 !!!cp ('t133');
4873 ## As if </noscript>
4874 pop @{$self->{open_elements}};
4875 !!!parse-error (type => 'in noscript:/',
4876 text => 'head', token => $token);
4877
4878 ## Reprocess in the "in head" insertion mode...
4879 pop @{$self->{open_elements}};
4880 $self->{insertion_mode} = AFTER_HEAD_IM;
4881 !!!next-token;
4882 next B;
4883 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4884 !!!cp ('t134');
4885 pop @{$self->{open_elements}};
4886 $self->{insertion_mode} = AFTER_HEAD_IM;
4887 !!!next-token;
4888 next B;
4889 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4890 !!!cp ('t134.1');
4891 !!!parse-error (type => 'unmatched end tag', text => 'head',
4892 token => $token);
4893 ## Ignore the token
4894 !!!next-token;
4895 next B;
4896 } else {
4897 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4898 }
4899 } elsif ($token->{tag_name} eq 'noscript') {
4900 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4901 !!!cp ('t136');
4902 pop @{$self->{open_elements}};
4903 $self->{insertion_mode} = IN_HEAD_IM;
4904 !!!next-token;
4905 next B;
4906 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
4907 $self->{insertion_mode} == AFTER_HEAD_IM) {
4908 !!!cp ('t137');
4909 !!!parse-error (type => 'unmatched end tag',
4910 text => 'noscript', token => $token);
4911 ## Ignore the token ## ISSUE: An issue in the spec.
4912 !!!next-token;
4913 next B;
4914 } else {
4915 !!!cp ('t138');
4916 #
4917 }
4918 } elsif ({
4919 body => 1, html => 1,
4920 }->{$token->{tag_name}}) {
4921 if ($self->{insertion_mode} == BEFORE_HEAD_IM or
4922 $self->{insertion_mode} == IN_HEAD_IM or
4923 $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4924 !!!cp ('t140');
4925 !!!parse-error (type => 'unmatched end tag',
4926 text => $token->{tag_name}, token => $token);
4927 ## Ignore the token
4928 !!!next-token;
4929 next B;
4930 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4931 !!!cp ('t140.1');
4932 !!!parse-error (type => 'unmatched end tag',
4933 text => $token->{tag_name}, token => $token);
4934 ## Ignore the token
4935 !!!next-token;
4936 next B;
4937 } else {
4938 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4939 }
4940 } elsif ($token->{tag_name} eq 'p') {
4941 !!!cp ('t142');
4942 !!!parse-error (type => 'unmatched end tag',
4943 text => $token->{tag_name}, token => $token);
4944 ## Ignore the token
4945 !!!next-token;
4946 next B;
4947 } elsif ($token->{tag_name} eq 'br') {
4948 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4949 !!!cp ('t142.2');
4950 ## (before head) as if <head>, (in head) as if </head>
4951 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4952 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4953 $self->{insertion_mode} = AFTER_HEAD_IM;
4954
4955 ## Reprocess in the "after head" insertion mode...
4956 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4957 !!!cp ('t143.2');
4958 ## As if </head>
4959 pop @{$self->{open_elements}};
4960 $self->{insertion_mode} = AFTER_HEAD_IM;
4961
4962 ## Reprocess in the "after head" insertion mode...
4963 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4964 !!!cp ('t143.3');
4965 ## ISSUE: Two parse errors for <head><noscript></br>
4966 !!!parse-error (type => 'unmatched end tag',
4967 text => 'br', token => $token);
4968 ## As if </noscript>
4969 pop @{$self->{open_elements}};
4970 $self->{insertion_mode} = IN_HEAD_IM;
4971
4972 ## Reprocess in the "in head" insertion mode...
4973 ## As if </head>
4974 pop @{$self->{open_elements}};
4975 $self->{insertion_mode} = AFTER_HEAD_IM;
4976
4977 ## Reprocess in the "after head" insertion mode...
4978 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4979 !!!cp ('t143.4');
4980 #
4981 } else {
4982 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4983 }
4984
4985 ## ISSUE: does not agree with IE7 - it doesn't ignore </br>.
4986 !!!parse-error (type => 'unmatched end tag',
4987 text => 'br', token => $token);
4988 ## Ignore the token
4989 !!!next-token;
4990 next B;
4991 } else {
4992 !!!cp ('t145');
4993 !!!parse-error (type => 'unmatched end tag',
4994 text => $token->{tag_name}, token => $token);
4995 ## Ignore the token
4996 !!!next-token;
4997 next B;
4998 }
4999
5000 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
5001 !!!cp ('t146');
5002 ## As if </noscript>
5003 pop @{$self->{open_elements}};
5004 !!!parse-error (type => 'in noscript:/',
5005 text => $token->{tag_name}, token => $token);
5006
5007 ## Reprocess in the "in head" insertion mode...
5008 ## As if </head>
5009 pop @{$self->{open_elements}};
5010
5011 ## Reprocess in the "after head" insertion mode...
5012 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
5013 !!!cp ('t147');
5014 ## As if </head>
5015 pop @{$self->{open_elements}};
5016
5017 ## Reprocess in the "after head" insertion mode...
5018 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
5019 ## ISSUE: This case cannot be reached?
5020 !!!cp ('t148');
5021 !!!parse-error (type => 'unmatched end tag',
5022 text => $token->{tag_name}, token => $token);
5023 ## Ignore the token ## ISSUE: An issue in the spec.
5024 !!!next-token;
5025 next B;
5026 } else {
5027 !!!cp ('t149');
5028 }
5029
5030 ## "after head" insertion mode
5031 ## As if <body>
5032 !!!insert-element ('body',, $token);
5033 $self->{insertion_mode} = IN_BODY_IM;
5034 ## reprocess
5035 next B;
5036 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5037 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
5038 !!!cp ('t149.1');
5039
5040 ## NOTE: As if <head>
5041 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
5042 $self->{open_elements}->[-1]->[0]->append_child
5043 ($self->{head_element});
5044 #push @{$self->{open_elements}},
5045 # [$self->{head_element}, $el_category->{head}];
5046 #$self->{insertion_mode} = IN_HEAD_IM;
5047 ## NOTE: Reprocess.
5048
5049 ## NOTE: As if </head>
5050 #pop @{$self->{open_elements}};
5051 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
5052 ## NOTE: Reprocess.
5053
5054 #
5055 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
5056 !!!cp ('t149.2');
5057
5058 ## NOTE: As if </head>
5059 pop @{$self->{open_elements}};
5060 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
5061 ## NOTE: Reprocess.
5062
5063 #
5064 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
5065 !!!cp ('t149.3');
5066
5067 !!!parse-error (type => 'in noscript:#eof', token => $token);
5068
5069 ## As if </noscript>
5070 pop @{$self->{open_elements}};
5071 #$self->{insertion_mode} = IN_HEAD_IM;
5072 ## NOTE: Reprocess.
5073
5074 ## NOTE: As if </head>
5075 pop @{$self->{open_elements}};
5076 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
5077 ## NOTE: Reprocess.
5078
5079 #
5080 } else {
5081 !!!cp ('t149.4');
5082 #
5083 }
5084
5085 ## NOTE: As if <body>
5086 !!!insert-element ('body',, $token);
5087 $self->{insertion_mode} = IN_BODY_IM;
5088 ## NOTE: Reprocess.
5089 next B;
5090 } else {
5091 die "$0: $token->{type}: Unknown token type";
5092 }
5093
5094 ## ISSUE: An issue in the spec.
5095 } elsif ($self->{insertion_mode} & BODY_IMS) {
5096 if ($token->{type} == CHARACTER_TOKEN) {
5097 !!!cp ('t150');
5098 ## NOTE: There is a code clone of "character in body".
5099 $reconstruct_active_formatting_elements->($insert_to_current);
5100
5101 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5102
5103 !!!next-token;
5104 next B;
5105 } elsif ($token->{type} == START_TAG_TOKEN) {
5106 if ({
5107 caption => 1, col => 1, colgroup => 1, tbody => 1,
5108 td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
5109 }->{$token->{tag_name}}) {
5110 if ($self->{insertion_mode} == IN_CELL_IM) {
5111 ## have an element in table scope
5112 for (reverse 0..$#{$self->{open_elements}}) {
5113 my $node = $self->{open_elements}->[$_];
5114 if ($node->[1] & TABLE_CELL_EL) {
5115 !!!cp ('t151');
5116
5117 ## Close the cell
5118 !!!back-token; # <x>
5119 $token = {type => END_TAG_TOKEN,
5120 tag_name => $node->[0]->manakai_local_name,
5121 line => $token->{line},
5122 column => $token->{column}};
5123 next B;
5124 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5125 !!!cp ('t152');
5126 ## ISSUE: This case can never be reached, maybe.
5127 last;
5128 }
5129 }
5130
5131 !!!cp ('t153');
5132 !!!parse-error (type => 'start tag not allowed',
5133 text => $token->{tag_name}, token => $token);
5134 ## Ignore the token
5135 !!!nack ('t153.1');
5136 !!!next-token;
5137 next B;
5138 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
5139 !!!parse-error (type => 'not closed', text => 'caption',
5140 token => $token);
5141
5142 ## NOTE: As if </caption>.
5143 ## have a table element in table scope
5144 my $i;
5145 INSCOPE: {
5146 for (reverse 0..$#{$self->{open_elements}}) {
5147 my $node = $self->{open_elements}->[$_];
5148 if ($node->[1] & CAPTION_EL) {
5149 !!!cp ('t155');
5150 $i = $_;
5151 last INSCOPE;
5152 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5153 !!!cp ('t156');
5154 last;
5155 }
5156 }
5157
5158 !!!cp ('t157');
5159 !!!parse-error (type => 'start tag not allowed',
5160 text => $token->{tag_name}, token => $token);
5161 ## Ignore the token
5162 !!!nack ('t157.1');
5163 !!!next-token;
5164 next B;
5165 } # INSCOPE
5166
5167 ## generate implied end tags
5168 while ($self->{open_elements}->[-1]->[1]
5169 & END_TAG_OPTIONAL_EL) {
5170 !!!cp ('t158');
5171 pop @{$self->{open_elements}};
5172 }
5173
5174 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5175 !!!cp ('t159');
5176 !!!parse-error (type => 'not closed',
5177 text => $self->{open_elements}->[-1]->[0]
5178 ->manakai_local_name,
5179 token => $token);
5180 } else {
5181 !!!cp ('t160');
5182 }
5183
5184 splice @{$self->{open_elements}}, $i;
5185
5186 $clear_up_to_marker->();
5187
5188 $self->{insertion_mode} = IN_TABLE_IM;
5189
5190 ## reprocess
5191 !!!ack-later;
5192 next B;
5193 } else {
5194 !!!cp ('t161');
5195 #
5196 }
5197 } else {
5198 !!!cp ('t162');
5199 #
5200 }
5201 } elsif ($token->{type} == END_TAG_TOKEN) {
5202 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
5203 if ($self->{insertion_mode} == IN_CELL_IM) {
5204 ## have an element in table scope
5205 my $i;
5206 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5207 my $node = $self->{open_elements}->[$_];
5208 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5209 !!!cp ('t163');
5210 $i = $_;
5211 last INSCOPE;
5212 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5213 !!!cp ('t164');
5214 last INSCOPE;
5215 }
5216 } # INSCOPE
5217 unless (defined $i) {
5218 !!!cp ('t165');
5219 !!!parse-error (type => 'unmatched end tag',
5220 text => $token->{tag_name},
5221 token => $token);
5222 ## Ignore the token
5223 !!!next-token;
5224 next B;
5225 }
5226
5227 ## generate implied end tags
5228 while ($self->{open_elements}->[-1]->[1]
5229 & END_TAG_OPTIONAL_EL) {
5230 !!!cp ('t166');
5231 pop @{$self->{open_elements}};
5232 }
5233
5234 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5235 ne $token->{tag_name}) {
5236 !!!cp ('t167');
5237 !!!parse-error (type => 'not closed',
5238 text => $self->{open_elements}->[-1]->[0]
5239 ->manakai_local_name,
5240 token => $token);
5241 } else {
5242 !!!cp ('t168');
5243 }
5244
5245 splice @{$self->{open_elements}}, $i;
5246
5247 $clear_up_to_marker->();
5248
5249 $self->{insertion_mode} = IN_ROW_IM;
5250
5251 !!!next-token;
5252 next B;
5253 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
5254 !!!cp ('t169');
5255 !!!parse-error (type => 'unmatched end tag',
5256 text => $token->{tag_name}, token => $token);
5257 ## Ignore the token
5258 !!!next-token;
5259 next B;
5260 } else {
5261 !!!cp ('t170');
5262 #
5263 }
5264 } elsif ($token->{tag_name} eq 'caption') {
5265 if ($self->{insertion_mode} == IN_CAPTION_IM) {
5266 ## have a table element in table scope
5267 my $i;
5268 INSCOPE: {
5269 for (reverse 0..$#{$self->{open_elements}}) {
5270 my $node = $self->{open_elements}->[$_];
5271 if ($node->[1] & CAPTION_EL) {
5272 !!!cp ('t171');
5273 $i = $_;
5274 last INSCOPE;
5275 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5276 !!!cp ('t172');
5277 last;
5278 }
5279 }
5280
5281 !!!cp ('t173');
5282 !!!parse-error (type => 'unmatched end tag',
5283 text => $token->{tag_name}, token => $token);
5284 ## Ignore the token
5285 !!!next-token;
5286 next B;
5287 } # INSCOPE
5288
5289 ## generate implied end tags
5290 while ($self->{open_elements}->[-1]->[1]
5291 & END_TAG_OPTIONAL_EL) {
5292 !!!cp ('t174');
5293 pop @{$self->{open_elements}};
5294 }
5295
5296 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5297 !!!cp ('t175');
5298 !!!parse-error (type => 'not closed',
5299 text => $self->{open_elements}->[-1]->[0]
5300 ->manakai_local_name,
5301 token => $token);
5302 } else {
5303 !!!cp ('t176');
5304 }
5305
5306 splice @{$self->{open_elements}}, $i;
5307
5308 $clear_up_to_marker->();
5309
5310 $self->{insertion_mode} = IN_TABLE_IM;
5311
5312 !!!next-token;
5313 next B;
5314 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
5315 !!!cp ('t177');
5316 !!!parse-error (type => 'unmatched end tag',
5317 text => $token->{tag_name}, token => $token);
5318 ## Ignore the token
5319 !!!next-token;
5320 next B;
5321 } else {
5322 !!!cp ('t178');
5323 #
5324 }
5325 } elsif ({
5326 table => 1, tbody => 1, tfoot => 1,
5327 thead => 1, tr => 1,
5328 }->{$token->{tag_name}} and
5329 $self->{insertion_mode} == IN_CELL_IM) {
5330 ## have an element in table scope
5331 my $i;
5332 my $tn;
5333 INSCOPE: {
5334 for (reverse 0..$#{$self->{open_elements}}) {
5335 my $node = $self->{open_elements}->[$_];
5336 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5337 !!!cp ('t179');
5338 $i = $_;
5339
5340 ## Close the cell
5341 !!!back-token; # </x>
5342 $token = {type => END_TAG_TOKEN, tag_name => $tn,
5343 line => $token->{line},
5344 column => $token->{column}};
5345 next B;
5346 } elsif ($node->[1] & TABLE_CELL_EL) {
5347 !!!cp ('t180');
5348 $tn = $node->[0]->manakai_local_name;
5349 ## NOTE: There is exactly one |td| or |th| element
5350 ## in scope in the stack of open elements by definition.
5351 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5352 ## ISSUE: Can this be reached?
5353 !!!cp ('t181');
5354 last;
5355 }
5356 }
5357
5358 !!!cp ('t182');
5359 !!!parse-error (type => 'unmatched end tag',
5360 text => $token->{tag_name}, token => $token);
5361 ## Ignore the token
5362 !!!next-token;
5363 next B;
5364 } # INSCOPE
5365 } elsif ($token->{tag_name} eq 'table' and
5366 $self->{insertion_mode} == IN_CAPTION_IM) {
5367 !!!parse-error (type => 'not closed', text => 'caption',
5368 token => $token);
5369
5370 ## As if </caption>
5371 ## have a table element in table scope
5372 my $i;
5373 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5374 my $node = $self->{open_elements}->[$_];
5375 if ($node->[1] & CAPTION_EL) {
5376 !!!cp ('t184');
5377 $i = $_;
5378 last INSCOPE;
5379 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5380 !!!cp ('t185');
5381 last INSCOPE;
5382 }
5383 } # INSCOPE
5384 unless (defined $i) {
5385 !!!cp ('t186');
5386 !!!parse-error (type => 'unmatched end tag',
5387 text => 'caption', token => $token);
5388 ## Ignore the token
5389 !!!next-token;
5390 next B;
5391 }
5392
5393 ## generate implied end tags
5394 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5395 !!!cp ('t187');
5396 pop @{$self->{open_elements}};
5397 }
5398
5399 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5400 !!!cp ('t188');
5401 !!!parse-error (type => 'not closed',
5402 text => $self->{open_elements}->[-1]->[0]
5403 ->manakai_local_name,
5404 token => $token);
5405 } else {
5406 !!!cp ('t189');
5407 }
5408
5409 splice @{$self->{open_elements}}, $i;
5410
5411 $clear_up_to_marker->();
5412
5413 $self->{insertion_mode} = IN_TABLE_IM;
5414
5415 ## reprocess
5416 next B;
5417 } elsif ({
5418 body => 1, col => 1, colgroup => 1, html => 1,
5419 }->{$token->{tag_name}}) {
5420 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
5421 !!!cp ('t190');
5422 !!!parse-error (type => 'unmatched end tag',
5423 text => $token->{tag_name}, token => $token);
5424 ## Ignore the token
5425 !!!next-token;
5426 next B;
5427 } else {
5428 !!!cp ('t191');
5429 #
5430 }
5431 } elsif ({
5432 tbody => 1, tfoot => 1,
5433 thead => 1, tr => 1,
5434 }->{$token->{tag_name}} and
5435 $self->{insertion_mode} == IN_CAPTION_IM) {
5436 !!!cp ('t192');
5437 !!!parse-error (type => 'unmatched end tag',
5438 text => $token->{tag_name}, token => $token);
5439 ## Ignore the token
5440 !!!next-token;
5441 next B;
5442 } else {
5443 !!!cp ('t193');
5444 #
5445 }
5446 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5447 for my $entry (@{$self->{open_elements}}) {
5448 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
5449 !!!cp ('t75');
5450 !!!parse-error (type => 'in body:#eof', token => $token);
5451 last;
5452 }
5453 }
5454
5455 ## Stop parsing.
5456 last B;
5457 } else {
5458 die "$0: $token->{type}: Unknown token type";
5459 }
5460
5461 $insert = $insert_to_current;
5462 #
5463 } elsif ($self->{insertion_mode} & TABLE_IMS) {
5464 if ($token->{type} == CHARACTER_TOKEN) {
5465 if (not $open_tables->[-1]->[1] and # tainted
5466 $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5467 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5468
5469 unless (length $token->{data}) {
5470 !!!cp ('t194');
5471 !!!next-token;
5472 next B;
5473 } else {
5474 !!!cp ('t195');
5475 }
5476 }
5477
5478 !!!parse-error (type => 'in table:#text', token => $token);
5479
5480 ## As if in body, but insert into foster parent element
5481 ## ISSUE: Spec says that "whenever a node would be inserted
5482 ## into the current node" while characters might not be
5483 ## result in a new Text node.
5484 $reconstruct_active_formatting_elements->($insert_to_foster);
5485
5486 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
5487 # MUST
5488 my $foster_parent_element;
5489 my $next_sibling;
5490 my $prev_sibling;
5491 OE: for (reverse 0..$#{$self->{open_elements}}) {
5492 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
5493 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5494 if (defined $parent and $parent->node_type == 1) {
5495 !!!cp ('t196');
5496 $foster_parent_element = $parent;
5497 $next_sibling = $self->{open_elements}->[$_]->[0];
5498 $prev_sibling = $next_sibling->previous_sibling;
5499 } else {
5500 !!!cp ('t197');
5501 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5502 $prev_sibling = $foster_parent_element->last_child;
5503 }
5504 last OE;
5505 }
5506 } # OE
5507 $foster_parent_element = $self->{open_elements}->[0]->[0] and
5508 $prev_sibling = $foster_parent_element->last_child
5509 unless defined $foster_parent_element;
5510 if (defined $prev_sibling and
5511 $prev_sibling->node_type == 3) {
5512 !!!cp ('t198');
5513 $prev_sibling->manakai_append_text ($token->{data});
5514 } else {
5515 !!!cp ('t199');
5516 $foster_parent_element->insert_before
5517 ($self->{document}->create_text_node ($token->{data}),
5518 $next_sibling);
5519 }
5520 $open_tables->[-1]->[1] = 1; # tainted
5521 } else {
5522 !!!cp ('t200');
5523 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5524 }
5525
5526 !!!next-token;
5527 next B;
5528 } elsif ($token->{type} == START_TAG_TOKEN) {
5529 if ({
5530 tr => ($self->{insertion_mode} != IN_ROW_IM),
5531 th => 1, td => 1,
5532 }->{$token->{tag_name}}) {
5533 if ($self->{insertion_mode} == IN_TABLE_IM) {
5534 ## Clear back to table context
5535 while (not ($self->{open_elements}->[-1]->[1]
5536 & TABLE_SCOPING_EL)) {
5537 !!!cp ('t201');
5538 pop @{$self->{open_elements}};
5539 }
5540
5541 !!!insert-element ('tbody',, $token);
5542 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5543 ## reprocess in the "in table body" insertion mode...
5544 }
5545
5546 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5547 unless ($token->{tag_name} eq 'tr') {
5548 !!!cp ('t202');
5549 !!!parse-error (type => 'missing start tag:tr', token => $token);
5550 }
5551
5552 ## Clear back to table body context
5553 while (not ($self->{open_elements}->[-1]->[1]
5554 & TABLE_ROWS_SCOPING_EL)) {
5555 !!!cp ('t203');
5556 ## ISSUE: Can this case be reached?
5557 pop @{$self->{open_elements}};
5558 }
5559
5560 $self->{insertion_mode} = IN_ROW_IM;
5561 if ($token->{tag_name} eq 'tr') {
5562 !!!cp ('t204');
5563 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5564 !!!nack ('t204');
5565 !!!next-token;
5566 next B;
5567 } else {
5568 !!!cp ('t205');
5569 !!!insert-element ('tr',, $token);
5570 ## reprocess in the "in row" insertion mode
5571 }
5572 } else {
5573 !!!cp ('t206');
5574 }
5575
5576 ## Clear back to table row context
5577 while (not ($self->{open_elements}->[-1]->[1]
5578 & TABLE_ROW_SCOPING_EL)) {
5579 !!!cp ('t207');
5580 pop @{$self->{open_elements}};
5581 }
5582
5583 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5584 $self->{insertion_mode} = IN_CELL_IM;
5585
5586 push @$active_formatting_elements, ['#marker', ''];
5587
5588 !!!nack ('t207.1');
5589 !!!next-token;
5590 next B;
5591 } elsif ({
5592 caption => 1, col => 1, colgroup => 1,
5593 tbody => 1, tfoot => 1, thead => 1,
5594 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5595 }->{$token->{tag_name}}) {
5596 if ($self->{insertion_mode} == IN_ROW_IM) {
5597 ## As if </tr>
5598 ## have an element in table scope
5599 my $i;
5600 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5601 my $node = $self->{open_elements}->[$_];
5602 if ($node->[1] & TABLE_ROW_EL) {
5603 !!!cp ('t208');
5604 $i = $_;
5605 last INSCOPE;
5606 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5607 !!!cp ('t209');
5608 last INSCOPE;
5609 }
5610 } # INSCOPE
5611 unless (defined $i) {
5612 !!!cp ('t210');
5613 ## TODO: This type is wrong.
5614 !!!parse-error (type => 'unmacthed end tag',
5615 text => $token->{tag_name}, token => $token);
5616 ## Ignore the token
5617 !!!nack ('t210.1');
5618 !!!next-token;
5619 next B;
5620 }
5621
5622 ## Clear back to table row context
5623 while (not ($self->{open_elements}->[-1]->[1]
5624 & TABLE_ROW_SCOPING_EL)) {
5625 !!!cp ('t211');
5626 ## ISSUE: Can this case be reached?
5627 pop @{$self->{open_elements}};
5628 }
5629
5630 pop @{$self->{open_elements}}; # tr
5631 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5632 if ($token->{tag_name} eq 'tr') {
5633 !!!cp ('t212');
5634 ## reprocess
5635 !!!ack-later;
5636 next B;
5637 } else {
5638 !!!cp ('t213');
5639 ## reprocess in the "in table body" insertion mode...
5640 }
5641 }
5642
5643 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5644 ## have an element in table scope
5645 my $i;
5646 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5647 my $node = $self->{open_elements}->[$_];
5648 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5649 !!!cp ('t214');
5650 $i = $_;
5651 last INSCOPE;
5652 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5653 !!!cp ('t215');
5654 last INSCOPE;
5655 }
5656 } # INSCOPE
5657 unless (defined $i) {
5658 !!!cp ('t216');
5659 ## TODO: This erorr type is wrong.
5660 !!!parse-error (type => 'unmatched end tag',
5661 text => $token->{tag_name}, token => $token);
5662 ## Ignore the token
5663 !!!nack ('t216.1');
5664 !!!next-token;
5665 next B;
5666 }
5667
5668 ## Clear back to table body context
5669 while (not ($self->{open_elements}->[-1]->[1]
5670 & TABLE_ROWS_SCOPING_EL)) {
5671 !!!cp ('t217');
5672 ## ISSUE: Can this state be reached?
5673 pop @{$self->{open_elements}};
5674 }
5675
5676 ## As if <{current node}>
5677 ## have an element in table scope
5678 ## true by definition
5679
5680 ## Clear back to table body context
5681 ## nop by definition
5682
5683 pop @{$self->{open_elements}};
5684 $self->{insertion_mode} = IN_TABLE_IM;
5685 ## reprocess in "in table" insertion mode...
5686 } else {
5687 !!!cp ('t218');
5688 }
5689
5690 if ($token->{tag_name} eq 'col') {
5691 ## Clear back to table context
5692 while (not ($self->{open_elements}->[-1]->[1]
5693 & TABLE_SCOPING_EL)) {
5694 !!!cp ('t219');
5695 ## ISSUE: Can this state be reached?
5696 pop @{$self->{open_elements}};
5697 }
5698
5699 !!!insert-element ('colgroup',, $token);
5700 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
5701 ## reprocess
5702 !!!ack-later;
5703 next B;
5704 } elsif ({
5705 caption => 1,
5706 colgroup => 1,
5707 tbody => 1, tfoot => 1, thead => 1,
5708 }->{$token->{tag_name}}) {
5709 ## Clear back to table context
5710 while (not ($self->{open_elements}->[-1]->[1]
5711 & TABLE_SCOPING_EL)) {
5712 !!!cp ('t220');
5713 ## ISSUE: Can this state be reached?
5714 pop @{$self->{open_elements}};
5715 }
5716
5717 push @$active_formatting_elements, ['#marker', '']
5718 if $token->{tag_name} eq 'caption';
5719
5720 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5721 $self->{insertion_mode} = {
5722 caption => IN_CAPTION_IM,
5723 colgroup => IN_COLUMN_GROUP_IM,
5724 tbody => IN_TABLE_BODY_IM,
5725 tfoot => IN_TABLE_BODY_IM,
5726 thead => IN_TABLE_BODY_IM,
5727 }->{$token->{tag_name}};
5728 !!!next-token;
5729 !!!nack ('t220.1');
5730 next B;
5731 } else {
5732 die "$0: in table: <>: $token->{tag_name}";
5733 }
5734 } elsif ($token->{tag_name} eq 'table') {
5735 !!!parse-error (type => 'not closed',
5736 text => $self->{open_elements}->[-1]->[0]
5737 ->manakai_local_name,
5738 token => $token);
5739
5740 ## As if </table>
5741 ## have a table element in table scope
5742 my $i;
5743 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5744 my $node = $self->{open_elements}->[$_];
5745 if ($node->[1] & TABLE_EL) {
5746 !!!cp ('t221');
5747 $i = $_;
5748 last INSCOPE;
5749 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5750 !!!cp ('t222');
5751 last INSCOPE;
5752 }
5753 } # INSCOPE
5754 unless (defined $i) {
5755 !!!cp ('t223');
5756 ## TODO: The following is wrong, maybe.
5757 !!!parse-error (type => 'unmatched end tag', text => 'table',
5758 token => $token);
5759 ## Ignore tokens </table><table>
5760 !!!nack ('t223.1');
5761 !!!next-token;
5762 next B;
5763 }
5764
5765 ## TODO: Followings are removed from the latest spec.
5766 ## generate implied end tags
5767 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5768 !!!cp ('t224');
5769 pop @{$self->{open_elements}};
5770 }
5771
5772 unless ($self->{open_elements}->[-1]->[1] & TABLE_EL) {
5773 !!!cp ('t225');
5774 ## NOTE: |<table><tr><table>|
5775 !!!parse-error (type => 'not closed',
5776 text => $self->{open_elements}->[-1]->[0]
5777 ->manakai_local_name,
5778 token => $token);
5779 } else {
5780 !!!cp ('t226');
5781 }
5782
5783 splice @{$self->{open_elements}}, $i;
5784 pop @{$open_tables};
5785
5786 $self->_reset_insertion_mode;
5787
5788 ## reprocess
5789 !!!ack-later;
5790 next B;
5791 } elsif ($token->{tag_name} eq 'style') {
5792 if (not $open_tables->[-1]->[1]) { # tainted
5793 !!!cp ('t227.8');
5794 ## NOTE: This is a "as if in head" code clone.
5795 $parse_rcdata->(CDATA_CONTENT_MODEL);
5796 next B;
5797 } else {
5798 !!!cp ('t227.7');
5799 #
5800 }
5801 } elsif ($token->{tag_name} eq 'script') {
5802 if (not $open_tables->[-1]->[1]) { # tainted
5803 !!!cp ('t227.6');
5804 ## NOTE: This is a "as if in head" code clone.
5805 $script_start_tag->();
5806 next B;
5807 } else {
5808 !!!cp ('t227.5');
5809 #
5810 }
5811 } elsif ($token->{tag_name} eq 'input') {
5812 if (not $open_tables->[-1]->[1]) { # tainted
5813 if ($token->{attributes}->{type}) { ## TODO: case
5814 my $type = lc $token->{attributes}->{type}->{value};
5815 if ($type eq 'hidden') {
5816 !!!cp ('t227.3');
5817 !!!parse-error (type => 'in table',
5818 text => $token->{tag_name}, token => $token);
5819
5820 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5821
5822 ## TODO: form element pointer
5823
5824 pop @{$self->{open_elements}};
5825
5826 !!!next-token;
5827 !!!ack ('t227.2.1');
5828 next B;
5829 } else {
5830 !!!cp ('t227.2');
5831 #
5832 }
5833 } else {
5834 !!!cp ('t227.1');
5835 #
5836 }
5837 } else {
5838 !!!cp ('t227.4');
5839 #
5840 }
5841 } else {
5842 !!!cp ('t227');
5843 #
5844 }
5845
5846 !!!parse-error (type => 'in table', text => $token->{tag_name},
5847 token => $token);
5848
5849 $insert = $insert_to_foster;
5850 #
5851 } elsif ($token->{type} == END_TAG_TOKEN) {
5852 if ($token->{tag_name} eq 'tr' and
5853 $self->{insertion_mode} == IN_ROW_IM) {
5854 ## have an element in table scope
5855 my $i;
5856 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5857 my $node = $self->{open_elements}->[$_];
5858 if ($node->[1] & TABLE_ROW_EL) {
5859 !!!cp ('t228');
5860 $i = $_;
5861 last INSCOPE;
5862 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5863 !!!cp ('t229');
5864 last INSCOPE;
5865 }
5866 } # INSCOPE
5867 unless (defined $i) {
5868 !!!cp ('t230');
5869 !!!parse-error (type => 'unmatched end tag',
5870 text => $token->{tag_name}, token => $token);
5871 ## Ignore the token
5872 !!!nack ('t230.1');
5873 !!!next-token;
5874 next B;
5875 } else {
5876 !!!cp ('t232');
5877 }
5878
5879 ## Clear back to table row context
5880 while (not ($self->{open_elements}->[-1]->[1]
5881 & TABLE_ROW_SCOPING_EL)) {
5882 !!!cp ('t231');
5883 ## ISSUE: Can this state be reached?
5884 pop @{$self->{open_elements}};
5885 }
5886
5887 pop @{$self->{open_elements}}; # tr
5888 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5889 !!!next-token;
5890 !!!nack ('t231.1');
5891 next B;
5892 } elsif ($token->{tag_name} eq 'table') {
5893 if ($self->{insertion_mode} == IN_ROW_IM) {
5894 ## As if </tr>
5895 ## have an element in table scope
5896 my $i;
5897 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5898 my $node = $self->{open_elements}->[$_];
5899 if ($node->[1] & TABLE_ROW_EL) {
5900 !!!cp ('t233');
5901 $i = $_;
5902 last INSCOPE;
5903 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5904 !!!cp ('t234');
5905 last INSCOPE;
5906 }
5907 } # INSCOPE
5908 unless (defined $i) {
5909 !!!cp ('t235');
5910 ## TODO: The following is wrong.
5911 !!!parse-error (type => 'unmatched end tag',
5912 text => $token->{type}, token => $token);
5913 ## Ignore the token
5914 !!!nack ('t236.1');
5915 !!!next-token;
5916 next B;
5917 }
5918
5919 ## Clear back to table row context
5920 while (not ($self->{open_elements}->[-1]->[1]
5921 & TABLE_ROW_SCOPING_EL)) {
5922 !!!cp ('t236');
5923 ## ISSUE: Can this state be reached?
5924 pop @{$self->{open_elements}};
5925 }
5926
5927 pop @{$self->{open_elements}}; # tr
5928 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5929 ## reprocess in the "in table body" insertion mode...
5930 }
5931
5932 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5933 ## have an element in table scope
5934 my $i;
5935 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5936 my $node = $self->{open_elements}->[$_];
5937 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5938 !!!cp ('t237');
5939 $i = $_;
5940 last INSCOPE;
5941 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5942 !!!cp ('t238');
5943 last INSCOPE;
5944 }
5945 } # INSCOPE
5946 unless (defined $i) {
5947 !!!cp ('t239');
5948 !!!parse-error (type => 'unmatched end tag',
5949 text => $token->{tag_name}, token => $token);
5950 ## Ignore the token
5951 !!!nack ('t239.1');
5952 !!!next-token;
5953 next B;
5954 }
5955
5956 ## Clear back to table body context
5957 while (not ($self->{open_elements}->[-1]->[1]
5958 & TABLE_ROWS_SCOPING_EL)) {
5959 !!!cp ('t240');
5960 pop @{$self->{open_elements}};
5961 }
5962
5963 ## As if <{current node}>
5964 ## have an element in table scope
5965 ## true by definition
5966
5967 ## Clear back to table body context
5968 ## nop by definition
5969
5970 pop @{$self->{open_elements}};
5971 $self->{insertion_mode} = IN_TABLE_IM;
5972 ## reprocess in the "in table" insertion mode...
5973 }
5974
5975 ## NOTE: </table> in the "in table" insertion mode.
5976 ## When you edit the code fragment below, please ensure that
5977 ## the code for <table> in the "in table" insertion mode
5978 ## is synced with it.
5979
5980 ## have a table element in table scope
5981 my $i;
5982 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5983 my $node = $self->{open_elements}->[$_];
5984 if ($node->[1] & TABLE_EL) {
5985 !!!cp ('t241');
5986 $i = $_;
5987 last INSCOPE;
5988 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5989 !!!cp ('t242');
5990 last INSCOPE;
5991 }
5992 } # INSCOPE
5993 unless (defined $i) {
5994 !!!cp ('t243');
5995 !!!parse-error (type => 'unmatched end tag',
5996 text => $token->{tag_name}, token => $token);
5997 ## Ignore the token
5998 !!!nack ('t243.1');
5999 !!!next-token;
6000 next B;
6001 }
6002
6003 splice @{$self->{open_elements}}, $i;
6004 pop @{$open_tables};
6005
6006 $self->_reset_insertion_mode;
6007
6008 !!!next-token;
6009 next B;
6010 } elsif ({
6011 tbody => 1, tfoot => 1, thead => 1,
6012 }->{$token->{tag_name}} and
6013 $self->{insertion_mode} & ROW_IMS) {
6014 if ($self->{insertion_mode} == IN_ROW_IM) {
6015 ## have an element in table scope
6016 my $i;
6017 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6018 my $node = $self->{open_elements}->[$_];
6019 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6020 !!!cp ('t247');
6021 $i = $_;
6022 last INSCOPE;
6023 } elsif ($node->[1] & TABLE_SCOPING_EL) {
6024 !!!cp ('t248');
6025 last INSCOPE;
6026 }
6027 } # INSCOPE
6028 unless (defined $i) {
6029 !!!cp ('t249');
6030 !!!parse-error (type => 'unmatched end tag',
6031 text => $token->{tag_name}, token => $token);
6032 ## Ignore the token
6033 !!!nack ('t249.1');
6034 !!!next-token;
6035 next B;
6036 }
6037
6038 ## As if </tr>
6039 ## have an element in table scope
6040 my $i;
6041 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6042 my $node = $self->{open_elements}->[$_];
6043 if ($node->[1] & TABLE_ROW_EL) {
6044 !!!cp ('t250');
6045 $i = $_;
6046 last INSCOPE;
6047 } elsif ($node->[1] & TABLE_SCOPING_EL) {
6048 !!!cp ('t251');
6049 last INSCOPE;
6050 }
6051 } # INSCOPE
6052 unless (defined $i) {
6053 !!!cp ('t252');
6054 !!!parse-error (type => 'unmatched end tag',
6055 text => 'tr', token => $token);
6056 ## Ignore the token
6057 !!!nack ('t252.1');
6058 !!!next-token;
6059 next B;
6060 }
6061
6062 ## Clear back to table row context
6063 while (not ($self->{open_elements}->[-1]->[1]
6064 & TABLE_ROW_SCOPING_EL)) {
6065 !!!cp ('t253');
6066 ## ISSUE: Can this case be reached?
6067 pop @{$self->{open_elements}};
6068 }
6069
6070 pop @{$self->{open_elements}}; # tr
6071 $self->{insertion_mode} = IN_TABLE_BODY_IM;
6072 ## reprocess in the "in table body" insertion mode...
6073 }
6074
6075 ## have an element in table scope
6076 my $i;
6077 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6078 my $node = $self->{open_elements}->[$_];
6079 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6080 !!!cp ('t254');
6081 $i = $_;
6082 last INSCOPE;
6083 } elsif ($node->[1] & TABLE_SCOPING_EL) {
6084 !!!cp ('t255');
6085 last INSCOPE;
6086 }
6087 } # INSCOPE
6088 unless (defined $i) {
6089 !!!cp ('t256');
6090 !!!parse-error (type => 'unmatched end tag',
6091 text => $token->{tag_name}, token => $token);
6092 ## Ignore the token
6093 !!!nack ('t256.1');
6094 !!!next-token;
6095 next B;
6096 }
6097
6098 ## Clear back to table body context
6099 while (not ($self->{open_elements}->[-1]->[1]
6100 & TABLE_ROWS_SCOPING_EL)) {
6101 !!!cp ('t257');
6102 ## ISSUE: Can this case be reached?
6103 pop @{$self->{open_elements}};
6104 }
6105
6106 pop @{$self->{open_elements}};
6107 $self->{insertion_mode} = IN_TABLE_IM;
6108 !!!nack ('t257.1');
6109 !!!next-token;
6110 next B;
6111 } elsif ({
6112 body => 1, caption => 1, col => 1, colgroup => 1,
6113 html => 1, td => 1, th => 1,
6114 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
6115 tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
6116 }->{$token->{tag_name}}) {
6117 !!!cp ('t258');
6118 !!!parse-error (type => 'unmatched end tag',
6119 text => $token->{tag_name}, token => $token);
6120 ## Ignore the token
6121 !!!nack ('t258.1');
6122 !!!next-token;
6123 next B;
6124 } else {
6125 !!!cp ('t259');
6126 !!!parse-error (type => 'in table:/',
6127 text => $token->{tag_name}, token => $token);
6128
6129 $insert = $insert_to_foster;
6130 #
6131 }
6132 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6133 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6134 @{$self->{open_elements}} == 1) { # redundant, maybe
6135 !!!parse-error (type => 'in body:#eof', token => $token);
6136 !!!cp ('t259.1');
6137 #
6138 } else {
6139 !!!cp ('t259.2');
6140 #
6141 }
6142
6143 ## Stop parsing
6144 last B;
6145 } else {
6146 die "$0: $token->{type}: Unknown token type";
6147 }
6148 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6149 if ($token->{type} == CHARACTER_TOKEN) {
6150 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6151 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6152 unless (length $token->{data}) {
6153 !!!cp ('t260');
6154 !!!next-token;
6155 next B;
6156 }
6157 }
6158
6159 !!!cp ('t261');
6160 #
6161 } elsif ($token->{type} == START_TAG_TOKEN) {
6162 if ($token->{tag_name} eq 'col') {
6163 !!!cp ('t262');
6164 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6165 pop @{$self->{open_elements}};
6166 !!!ack ('t262.1');
6167 !!!next-token;
6168 next B;
6169 } else {
6170 !!!cp ('t263');
6171 #
6172 }
6173 } elsif ($token->{type} == END_TAG_TOKEN) {
6174 if ($token->{tag_name} eq 'colgroup') {
6175 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
6176 !!!cp ('t264');
6177 !!!parse-error (type => 'unmatched end tag',
6178 text => 'colgroup', token => $token);
6179 ## Ignore the token
6180 !!!next-token;
6181 next B;
6182 } else {
6183 !!!cp ('t265');
6184 pop @{$self->{open_elements}}; # colgroup
6185 $self->{insertion_mode} = IN_TABLE_IM;
6186 !!!next-token;
6187 next B;
6188 }
6189 } elsif ($token->{tag_name} eq 'col') {
6190 !!!cp ('t266');
6191 !!!parse-error (type => 'unmatched end tag',
6192 text => 'col', token => $token);
6193 ## Ignore the token
6194 !!!next-token;
6195 next B;
6196 } else {
6197 !!!cp ('t267');
6198 #
6199 }
6200 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6201 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
6202 @{$self->{open_elements}} == 1) { # redundant, maybe
6203 !!!cp ('t270.2');
6204 ## Stop parsing.
6205 last B;
6206 } else {
6207 ## NOTE: As if </colgroup>.
6208 !!!cp ('t270.1');
6209 pop @{$self->{open_elements}}; # colgroup
6210 $self->{insertion_mode} = IN_TABLE_IM;
6211 ## Reprocess.
6212 next B;
6213 }
6214 } else {
6215 die "$0: $token->{type}: Unknown token type";
6216 }
6217
6218 ## As if </colgroup>
6219 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
6220 !!!cp ('t269');
6221 ## TODO: Wrong error type?
6222 !!!parse-error (type => 'unmatched end tag',
6223 text => 'colgroup', token => $token);
6224 ## Ignore the token
6225 !!!nack ('t269.1');
6226 !!!next-token;
6227 next B;
6228 } else {
6229 !!!cp ('t270');
6230 pop @{$self->{open_elements}}; # colgroup
6231 $self->{insertion_mode} = IN_TABLE_IM;
6232 !!!ack-later;
6233 ## reprocess
6234 next B;
6235 }
6236 } elsif ($self->{insertion_mode} & SELECT_IMS) {
6237 if ($token->{type} == CHARACTER_TOKEN) {
6238 !!!cp ('t271');
6239 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6240 !!!next-token;
6241 next B;
6242 } elsif ($token->{type} == START_TAG_TOKEN) {
6243 if ($token->{tag_name} eq 'option') {
6244 if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6245 !!!cp ('t272');
6246 ## As if </option>
6247 pop @{$self->{open_elements}};
6248 } else {
6249 !!!cp ('t273');
6250 }
6251
6252 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6253 !!!nack ('t273.1');
6254 !!!next-token;
6255 next B;
6256 } elsif ($token->{tag_name} eq 'optgroup') {
6257 if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6258 !!!cp ('t274');
6259 ## As if </option>
6260 pop @{$self->{open_elements}};
6261 } else {
6262 !!!cp ('t275');
6263 }
6264
6265 if ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
6266 !!!cp ('t276');
6267 ## As if </optgroup>
6268 pop @{$self->{open_elements}};
6269 } else {
6270 !!!cp ('t277');
6271 }
6272
6273 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6274 !!!nack ('t277.1');
6275 !!!next-token;
6276 next B;
6277 } elsif ({
6278 select => 1, input => 1, textarea => 1,
6279 }->{$token->{tag_name}} or
6280 ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
6281 {
6282 caption => 1, table => 1,
6283 tbody => 1, tfoot => 1, thead => 1,
6284 tr => 1, td => 1, th => 1,
6285 }->{$token->{tag_name}})) {
6286 ## TODO: The type below is not good - <select> is replaced by </select>
6287 !!!parse-error (type => 'not closed', text => 'select',
6288 token => $token);
6289 ## NOTE: As if the token were </select> (<select> case) or
6290 ## as if there were </select> (otherwise).
6291 ## have an element in table scope
6292 my $i;
6293 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6294 my $node = $self->{open_elements}->[$_];
6295 if ($node->[1] & SELECT_EL) {
6296 !!!cp ('t278');
6297 $i = $_;
6298 last INSCOPE;
6299 } elsif ($node->[1] & TABLE_SCOPING_EL) {
6300 !!!cp ('t279');
6301 last INSCOPE;
6302 }
6303 } # INSCOPE
6304 unless (defined $i) {
6305 !!!cp ('t280');
6306 !!!parse-error (type => 'unmatched end tag',
6307 text => 'select', token => $token);
6308 ## Ignore the token
6309 !!!nack ('t280.1');
6310 !!!next-token;
6311 next B;
6312 }
6313
6314 !!!cp ('t281');
6315 splice @{$self->{open_elements}}, $i;
6316
6317 $self->_reset_insertion_mode;
6318
6319 if ($token->{tag_name} eq 'select') {
6320 !!!nack ('t281.2');
6321 !!!next-token;
6322 next B;
6323 } else {
6324 !!!cp ('t281.1');
6325 !!!ack-later;
6326 ## Reprocess the token.
6327 next B;
6328 }
6329 } else {
6330 !!!cp ('t282');
6331 !!!parse-error (type => 'in select',
6332 text => $token->{tag_name}, token => $token);
6333 ## Ignore the token
6334 !!!nack ('t282.1');
6335 !!!next-token;
6336 next B;
6337 }
6338 } elsif ($token->{type} == END_TAG_TOKEN) {
6339 if ($token->{tag_name} eq 'optgroup') {
6340 if ($self->{open_elements}->[-1]->[1] & OPTION_EL and
6341 $self->{open_elements}->[-2]->[1] & OPTGROUP_EL) {
6342 !!!cp ('t283');
6343 ## As if </option>
6344 splice @{$self->{open_elements}}, -2;
6345 } elsif ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
6346 !!!cp ('t284');
6347 pop @{$self->{open_elements}};
6348 } else {
6349 !!!cp ('t285');
6350 !!!parse-error (type => 'unmatched end tag',
6351 text => $token->{tag_name}, token => $token);
6352 ## Ignore the token
6353 }
6354 !!!nack ('t285.1');
6355 !!!next-token;
6356 next B;
6357 } elsif ($token->{tag_name} eq 'option') {
6358 if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6359 !!!cp ('t286');
6360 pop @{$self->{open_elements}};
6361 } else {
6362 !!!cp ('t287');
6363 !!!parse-error (type => 'unmatched end tag',
6364 text => $token->{tag_name}, token => $token);
6365 ## Ignore the token
6366 }
6367 !!!nack ('t287.1');
6368 !!!next-token;
6369 next B;
6370 } elsif ($token->{tag_name} eq 'select') {
6371 ## have an element in table scope
6372 my $i;
6373 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6374 my $node = $self->{open_elements}->[$_];
6375 if ($node->[1] & SELECT_EL) {
6376 !!!cp ('t288');
6377 $i = $_;
6378 last INSCOPE;
6379 } elsif ($node->[1] & TABLE_SCOPING_EL) {
6380 !!!cp ('t289');
6381 last INSCOPE;
6382 }
6383 } # INSCOPE
6384 unless (defined $i) {
6385 !!!cp ('t290');
6386 !!!parse-error (type => 'unmatched end tag',
6387 text => $token->{tag_name}, token => $token);
6388 ## Ignore the token
6389 !!!nack ('t290.1');
6390 !!!next-token;
6391 next B;
6392 }
6393
6394 !!!cp ('t291');
6395 splice @{$self->{open_elements}}, $i;
6396
6397 $self->_reset_insertion_mode;
6398
6399 !!!nack ('t291.1');
6400 !!!next-token;
6401 next B;
6402 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
6403 {
6404 caption => 1, table => 1, tbody => 1,
6405 tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6406 }->{$token->{tag_name}}) {
6407 ## TODO: The following is wrong?
6408 !!!parse-error (type => 'unmatched end tag',
6409 text => $token->{tag_name}, token => $token);
6410
6411 ## have an element in table scope
6412 my $i;
6413 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6414 my $node = $self->{open_elements}->[$_];
6415 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6416 !!!cp ('t292');
6417 $i = $_;
6418 last INSCOPE;
6419 } elsif ($node->[1] & TABLE_SCOPING_EL) {
6420 !!!cp ('t293');
6421 last INSCOPE;
6422 }
6423 } # INSCOPE
6424 unless (defined $i) {
6425 !!!cp ('t294');
6426 ## Ignore the token
6427 !!!nack ('t294.1');
6428 !!!next-token;
6429 next B;
6430 }
6431
6432 ## As if </select>
6433 ## have an element in table scope
6434 undef $i;
6435 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6436 my $node = $self->{open_elements}->[$_];
6437 if ($node->[1] & SELECT_EL) {
6438 !!!cp ('t295');
6439 $i = $_;
6440 last INSCOPE;
6441 } elsif ($node->[1] & TABLE_SCOPING_EL) {
6442 ## ISSUE: Can this state be reached?
6443 !!!cp ('t296');
6444 last INSCOPE;
6445 }
6446 } # INSCOPE
6447 unless (defined $i) {
6448 !!!cp ('t297');
6449 ## TODO: The following error type is correct?
6450 !!!parse-error (type => 'unmatched end tag',
6451 text => 'select', token => $token);
6452 ## Ignore the </select> token
6453 !!!nack ('t297.1');
6454 !!!next-token; ## TODO: ok?
6455 next B;
6456 }
6457
6458 !!!cp ('t298');
6459 splice @{$self->{open_elements}}, $i;
6460
6461 $self->_reset_insertion_mode;
6462
6463 !!!ack-later;
6464 ## reprocess
6465 next B;
6466 } else {
6467 !!!cp ('t299');
6468 !!!parse-error (type => 'in select:/',
6469 text => $token->{tag_name}, token => $token);
6470 ## Ignore the token
6471 !!!nack ('t299.3');
6472 !!!next-token;
6473 next B;
6474 }
6475 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6476 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6477 @{$self->{open_elements}} == 1) { # redundant, maybe
6478 !!!cp ('t299.1');
6479 !!!parse-error (type => 'in body:#eof', token => $token);
6480 } else {
6481 !!!cp ('t299.2');
6482 }
6483
6484 ## Stop parsing.
6485 last B;
6486 } else {
6487 die "$0: $token->{type}: Unknown token type";
6488 }
6489 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
6490 if ($token->{type} == CHARACTER_TOKEN) {
6491 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6492 my $data = $1;
6493 ## As if in body
6494 $reconstruct_active_formatting_elements->($insert_to_current);
6495
6496 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6497
6498 unless (length $token->{data}) {
6499 !!!cp ('t300');
6500 !!!next-token;
6501 next B;
6502 }
6503 }
6504
6505 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6506 !!!cp ('t301');
6507 !!!parse-error (type => 'after html:#text', token => $token);
6508
6509 ## Reprocess in the "after body" insertion mode.
6510 } else {
6511 !!!cp ('t302');
6512 }
6513
6514 ## "after body" insertion mode
6515 !!!parse-error (type => 'after body:#text', token => $token);
6516
6517 $self->{insertion_mode} = IN_BODY_IM;
6518 ## reprocess
6519 next B;
6520 } elsif ($token->{type} == START_TAG_TOKEN) {
6521 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6522 !!!cp ('t303');
6523 !!!parse-error (type => 'after html',
6524 text => $token->{tag_name}, token => $token);
6525
6526 ## Reprocess in the "after body" insertion mode.
6527 } else {
6528 !!!cp ('t304');
6529 }
6530
6531 ## "after body" insertion mode
6532 !!!parse-error (type => 'after body',
6533 text => $token->{tag_name}, token => $token);
6534
6535 $self->{insertion_mode} = IN_BODY_IM;
6536 !!!ack-later;
6537 ## reprocess
6538 next B;
6539 } elsif ($token->{type} == END_TAG_TOKEN) {
6540 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6541 !!!cp ('t305');
6542 !!!parse-error (type => 'after html:/',
6543 text => $token->{tag_name}, token => $token);
6544
6545 $self->{insertion_mode} = AFTER_BODY_IM;
6546 ## Reprocess in the "after body" insertion mode.
6547 } else {
6548 !!!cp ('t306');
6549 }
6550
6551 ## "after body" insertion mode
6552 if ($token->{tag_name} eq 'html') {
6553 if (defined $self->{inner_html_node}) {
6554 !!!cp ('t307');
6555 !!!parse-error (type => 'unmatched end tag',
6556 text => 'html', token => $token);
6557 ## Ignore the token
6558 !!!next-token;
6559 next B;
6560 } else {
6561 !!!cp ('t308');
6562 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
6563 !!!next-token;
6564 next B;
6565 }
6566 } else {
6567 !!!cp ('t309');
6568 !!!parse-error (type => 'after body:/',
6569 text => $token->{tag_name}, token => $token);
6570
6571 $self->{insertion_mode} = IN_BODY_IM;
6572 ## reprocess
6573 next B;
6574 }
6575 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6576 !!!cp ('t309.2');
6577 ## Stop parsing
6578 last B;
6579 } else {
6580 die "$0: $token->{type}: Unknown token type";
6581 }
6582 } elsif ($self->{insertion_mode} & FRAME_IMS) {
6583 if ($token->{type} == CHARACTER_TOKEN) {
6584 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6585 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6586
6587 unless (length $token->{data}) {
6588 !!!cp ('t310');
6589 !!!next-token;
6590 next B;
6591 }
6592 }
6593
6594 if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
6595 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6596 !!!cp ('t311');
6597 !!!parse-error (type => 'in frameset:#text', token => $token);
6598 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6599 !!!cp ('t312');
6600 !!!parse-error (type => 'after frameset:#text', token => $token);
6601 } else { # "after after frameset"
6602 !!!cp ('t313');
6603 !!!parse-error (type => 'after html:#text', token => $token);
6604 }
6605
6606 ## Ignore the token.
6607 if (length $token->{data}) {
6608 !!!cp ('t314');
6609 ## reprocess the rest of characters
6610 } else {
6611 !!!cp ('t315');
6612 !!!next-token;
6613 }
6614 next B;
6615 }
6616
6617 die qq[$0: Character "$token->{data}"];
6618 } elsif ($token->{type} == START_TAG_TOKEN) {
6619 if ($token->{tag_name} eq 'frameset' and
6620 $self->{insertion_mode} == IN_FRAMESET_IM) {
6621 !!!cp ('t318');
6622 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6623 !!!nack ('t318.1');
6624 !!!next-token;
6625 next B;
6626 } elsif ($token->{tag_name} eq 'frame' and
6627 $self->{insertion_mode} == IN_FRAMESET_IM) {
6628 !!!cp ('t319');
6629 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6630 pop @{$self->{open_elements}};
6631 !!!ack ('t319.1');
6632 !!!next-token;
6633 next B;
6634 } elsif ($token->{tag_name} eq 'noframes') {
6635 !!!cp ('t320');
6636 ## NOTE: As if in head.
6637 $parse_rcdata->(CDATA_CONTENT_MODEL);
6638 next B;
6639
6640 ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
6641 ## has no parse error.
6642 } else {
6643 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6644 !!!cp ('t321');
6645 !!!parse-error (type => 'in frameset',
6646 text => $token->{tag_name}, token => $token);
6647 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6648 !!!cp ('t322');
6649 !!!parse-error (type => 'after frameset',
6650 text => $token->{tag_name}, token => $token);
6651 } else { # "after after frameset"
6652 !!!cp ('t322.2');
6653 !!!parse-error (type => 'after after frameset',
6654 text => $token->{tag_name}, token => $token);
6655 }
6656 ## Ignore the token
6657 !!!nack ('t322.1');
6658 !!!next-token;
6659 next B;
6660 }
6661 } elsif ($token->{type} == END_TAG_TOKEN) {
6662 if ($token->{tag_name} eq 'frameset' and
6663 $self->{insertion_mode} == IN_FRAMESET_IM) {
6664 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
6665 @{$self->{open_elements}} == 1) {
6666 !!!cp ('t325');
6667 !!!parse-error (type => 'unmatched end tag',
6668 text => $token->{tag_name}, token => $token);
6669 ## Ignore the token
6670 !!!next-token;
6671 } else {
6672 !!!cp ('t326');
6673 pop @{$self->{open_elements}};
6674 !!!next-token;
6675 }
6676
6677 if (not defined $self->{inner_html_node} and
6678 not ($self->{open_elements}->[-1]->[1] & FRAMESET_EL)) {
6679 !!!cp ('t327');
6680 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6681 } else {
6682 !!!cp ('t328');
6683 }
6684 next B;
6685 } elsif ($token->{tag_name} eq 'html' and
6686 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
6687 !!!cp ('t329');
6688 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
6689 !!!next-token;
6690 next B;
6691 } else {
6692 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6693 !!!cp ('t330');
6694 !!!parse-error (type => 'in frameset:/',
6695 text => $token->{tag_name}, token => $token);
6696 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6697 !!!cp ('t330.1');
6698 !!!parse-error (type => 'after frameset:/',
6699 text => $token->{tag_name}, token => $token);
6700 } else { # "after after html"
6701 !!!cp ('t331');
6702 !!!parse-error (type => 'after after frameset:/',
6703 text => $token->{tag_name}, token => $token);
6704 }
6705 ## Ignore the token
6706 !!!next-token;
6707 next B;
6708 }
6709 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6710 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6711 @{$self->{open_elements}} == 1) { # redundant, maybe
6712 !!!cp ('t331.1');
6713 !!!parse-error (type => 'in body:#eof', token => $token);
6714 } else {
6715 !!!cp ('t331.2');
6716 }
6717
6718 ## Stop parsing
6719 last B;
6720 } else {
6721 die "$0: $token->{type}: Unknown token type";
6722 }
6723
6724 ## ISSUE: An issue in spec here
6725 } else {
6726 die "$0: $self->{insertion_mode}: Unknown insertion mode";
6727 }
6728
6729 ## "in body" insertion mode
6730 if ($token->{type} == START_TAG_TOKEN) {
6731 if ($token->{tag_name} eq 'script') {
6732 !!!cp ('t332');
6733 ## NOTE: This is an "as if in head" code clone
6734 $script_start_tag->();
6735 next B;
6736 } elsif ($token->{tag_name} eq 'style') {
6737 !!!cp ('t333');
6738 ## NOTE: This is an "as if in head" code clone
6739 $parse_rcdata->(CDATA_CONTENT_MODEL);
6740 next B;
6741 } elsif ({
6742 base => 1, link => 1,
6743 }->{$token->{tag_name}}) {
6744 !!!cp ('t334');
6745 ## NOTE: This is an "as if in head" code clone, only "-t" differs
6746 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6747 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6748 !!!ack ('t334.1');
6749 !!!next-token;
6750 next B;
6751 } elsif ($token->{tag_name} eq 'meta') {
6752 ## NOTE: This is an "as if in head" code clone, only "-t" differs
6753 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6754 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6755
6756 unless ($self->{confident}) {
6757 if ($token->{attributes}->{charset}) {
6758 !!!cp ('t335');
6759 ## NOTE: Whether the encoding is supported or not is handled
6760 ## in the {change_encoding} callback.
6761 $self->{change_encoding}
6762 ->($self, $token->{attributes}->{charset}->{value}, $token);
6763
6764 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6765 ->set_user_data (manakai_has_reference =>
6766 $token->{attributes}->{charset}
6767 ->{has_reference});
6768 } elsif ($token->{attributes}->{content}) {
6769 if ($token->{attributes}->{content}->{value}
6770 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
6771 [\x09-\x0D\x20]*=
6772 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
6773 ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
6774 !!!cp ('t336');
6775 ## NOTE: Whether the encoding is supported or not is handled
6776 ## in the {change_encoding} callback.
6777 $self->{change_encoding}
6778 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
6779 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6780 ->set_user_data (manakai_has_reference =>
6781 $token->{attributes}->{content}
6782 ->{has_reference});
6783 }
6784 }
6785 } else {
6786 if ($token->{attributes}->{charset}) {
6787 !!!cp ('t337');
6788 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6789 ->set_user_data (manakai_has_reference =>
6790 $token->{attributes}->{charset}
6791 ->{has_reference});
6792 }
6793 if ($token->{attributes}->{content}) {
6794 !!!cp ('t338');
6795 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6796 ->set_user_data (manakai_has_reference =>
6797 $token->{attributes}->{content}
6798 ->{has_reference});
6799 }
6800 }
6801
6802 !!!ack ('t338.1');
6803 !!!next-token;
6804 next B;
6805 } elsif ($token->{tag_name} eq 'title') {
6806 !!!cp ('t341');
6807 ## NOTE: This is an "as if in head" code clone
6808 $parse_rcdata->(RCDATA_CONTENT_MODEL);
6809 next B;
6810 } elsif ($token->{tag_name} eq 'body') {
6811 !!!parse-error (type => 'in body', text => 'body', token => $token);
6812
6813 if (@{$self->{open_elements}} == 1 or
6814 not ($self->{open_elements}->[1]->[1] & BODY_EL)) {
6815 !!!cp ('t342');
6816 ## Ignore the token
6817 } else {
6818 my $body_el = $self->{open_elements}->[1]->[0];
6819 for my $attr_name (keys %{$token->{attributes}}) {
6820 unless ($body_el->has_attribute_ns (undef, $attr_name)) {
6821 !!!cp ('t343');
6822 $body_el->set_attribute_ns
6823 (undef, [undef, $attr_name],
6824 $token->{attributes}->{$attr_name}->{value});
6825 }
6826 }
6827 }
6828 !!!nack ('t343.1');
6829 !!!next-token;
6830 next B;
6831 } elsif ({
6832 address => 1, blockquote => 1, center => 1, dir => 1,
6833 div => 1, dl => 1, fieldset => 1,
6834 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6835 menu => 1, ol => 1, p => 1, ul => 1,
6836 pre => 1, listing => 1,
6837 form => 1,
6838 table => 1,
6839 hr => 1,
6840 }->{$token->{tag_name}}) {
6841 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
6842 !!!cp ('t350');
6843 !!!parse-error (type => 'in form:form', token => $token);
6844 ## Ignore the token
6845 !!!nack ('t350.1');
6846 !!!next-token;
6847 next B;
6848 }
6849
6850 ## has a p element in scope
6851 INSCOPE: for (reverse @{$self->{open_elements}}) {
6852 if ($_->[1] & P_EL) {
6853 !!!cp ('t344');
6854 !!!back-token; # <form>
6855 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6856 line => $token->{line}, column => $token->{column}};
6857 next B;
6858 } elsif ($_->[1] & SCOPING_EL) {
6859 !!!cp ('t345');
6860 last INSCOPE;
6861 }
6862 } # INSCOPE
6863
6864 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6865 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
6866 !!!nack ('t346.1');
6867 !!!next-token;
6868 if ($token->{type} == CHARACTER_TOKEN) {
6869 $token->{data} =~ s/^\x0A//;
6870 unless (length $token->{data}) {
6871 !!!cp ('t346');
6872 !!!next-token;
6873 } else {
6874 !!!cp ('t349');
6875 }
6876 } else {
6877 !!!cp ('t348');
6878 }
6879 } elsif ($token->{tag_name} eq 'form') {
6880 !!!cp ('t347.1');
6881 $self->{form_element} = $self->{open_elements}->[-1]->[0];
6882
6883 !!!nack ('t347.2');
6884 !!!next-token;
6885 } elsif ($token->{tag_name} eq 'table') {
6886 !!!cp ('t382');
6887 push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
6888
6889 $self->{insertion_mode} = IN_TABLE_IM;
6890
6891 !!!nack ('t382.1');
6892 !!!next-token;
6893 } elsif ($token->{tag_name} eq 'hr') {
6894 !!!cp ('t386');
6895 pop @{$self->{open_elements}};
6896
6897 !!!nack ('t386.1');
6898 !!!next-token;
6899 } else {
6900 !!!nack ('t347.1');
6901 !!!next-token;
6902 }
6903 next B;
6904 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
6905 ## has a p element in scope
6906 INSCOPE: for (reverse @{$self->{open_elements}}) {
6907 if ($_->[1] & P_EL) {
6908 !!!cp ('t353');
6909 !!!back-token; # <x>
6910 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6911 line => $token->{line}, column => $token->{column}};
6912 next B;
6913 } elsif ($_->[1] & SCOPING_EL) {
6914 !!!cp ('t354');
6915 last INSCOPE;
6916 }
6917 } # INSCOPE
6918
6919 ## Step 1
6920 my $i = -1;
6921 my $node = $self->{open_elements}->[$i];
6922 my $li_or_dtdd = {li => {li => 1},
6923 dt => {dt => 1, dd => 1},
6924 dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
6925 LI: {
6926 ## Step 2
6927 if ($li_or_dtdd->{$node->[0]->manakai_local_name}) {
6928 if ($i != -1) {
6929 !!!cp ('t355');
6930 !!!parse-error (type => 'not closed',
6931 text => $self->{open_elements}->[-1]->[0]
6932 ->manakai_local_name,
6933 token => $token);
6934 } else {
6935 !!!cp ('t356');
6936 }
6937 splice @{$self->{open_elements}}, $i;
6938 last LI;
6939 } else {
6940 !!!cp ('t357');
6941 }
6942
6943 ## Step 3
6944 if (not ($node->[1] & FORMATTING_EL) and
6945 #not $phrasing_category->{$node->[1]} and
6946 ($node->[1] & SPECIAL_EL or
6947 $node->[1] & SCOPING_EL) and
6948 not ($node->[1] & ADDRESS_EL) and
6949 not ($node->[1] & DIV_EL)) {
6950 !!!cp ('t358');
6951 last LI;
6952 }
6953
6954 !!!cp ('t359');
6955 ## Step 4
6956 $i--;
6957 $node = $self->{open_elements}->[$i];
6958 redo LI;
6959 } # LI
6960
6961 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6962 !!!nack ('t359.1');
6963 !!!next-token;
6964 next B;
6965 } elsif ($token->{tag_name} eq 'plaintext') {
6966 ## has a p element in scope
6967 INSCOPE: for (reverse @{$self->{open_elements}}) {
6968 if ($_->[1] & P_EL) {
6969 !!!cp ('t367');
6970 !!!back-token; # <plaintext>
6971 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6972 line => $token->{line}, column => $token->{column}};
6973 next B;
6974 } elsif ($_->[1] & SCOPING_EL) {
6975 !!!cp ('t368');
6976 last INSCOPE;
6977 }
6978 } # INSCOPE
6979
6980 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6981
6982 $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
6983
6984 !!!nack ('t368.1');
6985 !!!next-token;
6986 next B;
6987 } elsif ($token->{tag_name} eq 'a') {
6988 AFE: for my $i (reverse 0..$#$active_formatting_elements) {
6989 my $node = $active_formatting_elements->[$i];
6990 if ($node->[1] & A_EL) {
6991 !!!cp ('t371');
6992 !!!parse-error (type => 'in a:a', token => $token);
6993
6994 !!!back-token; # <a>
6995 $token = {type => END_TAG_TOKEN, tag_name => 'a',
6996 line => $token->{line}, column => $token->{column}};
6997 $formatting_end_tag->($token);
6998
6999 AFE2: for (reverse 0..$#$active_formatting_elements) {
7000 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
7001 !!!cp ('t372');
7002 splice @$active_formatting_elements, $_, 1;
7003 last AFE2;
7004 }
7005 } # AFE2
7006 OE: for (reverse 0..$#{$self->{open_elements}}) {
7007 if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
7008 !!!cp ('t373');
7009 splice @{$self->{open_elements}}, $_, 1;
7010 last OE;
7011 }
7012 } # OE
7013 last AFE;
7014 } elsif ($node->[0] eq '#marker') {
7015 !!!cp ('t374');
7016 last AFE;
7017 }
7018 } # AFE
7019
7020 $reconstruct_active_formatting_elements->($insert_to_current);
7021
7022 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7023 push @$active_formatting_elements, $self->{open_elements}->[-1];
7024
7025 !!!nack ('t374.1');
7026 !!!next-token;
7027 next B;
7028 } elsif ($token->{tag_name} eq 'nobr') {
7029 $reconstruct_active_formatting_elements->($insert_to_current);
7030
7031 ## has a |nobr| element in scope
7032 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7033 my $node = $self->{open_elements}->[$_];
7034 if ($node->[1] & NOBR_EL) {
7035 !!!cp ('t376');
7036 !!!parse-error (type => 'in nobr:nobr', token => $token);
7037 !!!back-token; # <nobr>
7038 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
7039 line => $token->{line}, column => $token->{column}};
7040 next B;
7041 } elsif ($node->[1] & SCOPING_EL) {
7042 !!!cp ('t377');
7043 last INSCOPE;
7044 }
7045 } # INSCOPE
7046
7047 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7048 push @$active_formatting_elements, $self->{open_elements}->[-1];
7049
7050 !!!nack ('t377.1');
7051 !!!next-token;
7052 next B;
7053 } elsif ($token->{tag_name} eq 'button') {
7054 ## has a button element in scope
7055 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7056 my $node = $self->{open_elements}->[$_];
7057 if ($node->[1] & BUTTON_EL) {
7058 !!!cp ('t378');
7059 !!!parse-error (type => 'in button:button', token => $token);
7060 !!!back-token; # <button>
7061 $token = {type => END_TAG_TOKEN, tag_name => 'button',
7062 line => $token->{line}, column => $token->{column}};
7063 next B;
7064 } elsif ($node->[1] & SCOPING_EL) {
7065 !!!cp ('t379');
7066 last INSCOPE;
7067 }
7068 } # INSCOPE
7069
7070 $reconstruct_active_formatting_elements->($insert_to_current);
7071
7072 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7073
7074 ## TODO: associate with $self->{form_element} if defined
7075
7076 push @$active_formatting_elements, ['#marker', ''];
7077
7078 !!!nack ('t379.1');
7079 !!!next-token;
7080 next B;
7081 } elsif ({
7082 xmp => 1,
7083 iframe => 1,
7084 noembed => 1,
7085 noframes => 1, ## NOTE: This is an "as if in head" code clone.
7086 noscript => 0, ## TODO: 1 if scripting is enabled
7087 }->{$token->{tag_name}}) {
7088 if ($token->{tag_name} eq 'xmp') {
7089 !!!cp ('t381');
7090 $reconstruct_active_formatting_elements->($insert_to_current);
7091 } else {
7092 !!!cp ('t399');
7093 }
7094 ## NOTE: There is an "as if in body" code clone.
7095 $parse_rcdata->(CDATA_CONTENT_MODEL);
7096 next B;
7097 } elsif ($token->{tag_name} eq 'isindex') {
7098 !!!parse-error (type => 'isindex', token => $token);
7099
7100 if (defined $self->{form_element}) {
7101 !!!cp ('t389');
7102 ## Ignore the token
7103 !!!nack ('t389'); ## NOTE: Not acknowledged.
7104 !!!next-token;
7105 next B;
7106 } else {
7107 !!!ack ('t391.1');
7108
7109 my $at = $token->{attributes};
7110 my $form_attrs;
7111 $form_attrs->{action} = $at->{action} if $at->{action};
7112 my $prompt_attr = $at->{prompt};
7113 $at->{name} = {name => 'name', value => 'isindex'};
7114 delete $at->{action};
7115 delete $at->{prompt};
7116 my @tokens = (
7117 {type => START_TAG_TOKEN, tag_name => 'form',
7118 attributes => $form_attrs,
7119 line => $token->{line}, column => $token->{column}},
7120 {type => START_TAG_TOKEN, tag_name => 'hr',
7121 line => $token->{line}, column => $token->{column}},
7122 {type => START_TAG_TOKEN, tag_name => 'p',
7123 line => $token->{line}, column => $token->{column}},
7124 {type => START_TAG_TOKEN, tag_name => 'label',
7125 line => $token->{line}, column => $token->{column}},
7126 );
7127 if ($prompt_attr) {
7128 !!!cp ('t390');
7129 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
7130 #line => $token->{line}, column => $token->{column},
7131 };
7132 } else {
7133 !!!cp ('t391');
7134 push @tokens, {type => CHARACTER_TOKEN,
7135 data => 'This is a searchable index. Insert your search keywords here: ',
7136 #line => $token->{line}, column => $token->{column},
7137 }; # SHOULD
7138 ## TODO: make this configurable
7139 }
7140 push @tokens,
7141 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
7142 line => $token->{line}, column => $token->{column}},
7143 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
7144 {type => END_TAG_TOKEN, tag_name => 'label',
7145 line => $token->{line}, column => $token->{column}},
7146 {type => END_TAG_TOKEN, tag_name => 'p',
7147 line => $token->{line}, column => $token->{column}},
7148 {type => START_TAG_TOKEN, tag_name => 'hr',
7149 line => $token->{line}, column => $token->{column}},
7150 {type => END_TAG_TOKEN, tag_name => 'form',
7151 line => $token->{line}, column => $token->{column}};
7152 !!!back-token (@tokens);
7153 !!!next-token;
7154 next B;
7155 }
7156 } elsif ($token->{tag_name} eq 'textarea') {
7157 my $tag_name = $token->{tag_name};
7158 my $el;
7159 !!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
7160
7161 ## TODO: $self->{form_element} if defined
7162 $self->{content_model} = RCDATA_CONTENT_MODEL;
7163 delete $self->{escape}; # MUST
7164
7165 $insert->($el);
7166
7167 my $text = '';
7168 !!!nack ('t392.1');
7169 !!!next-token;
7170 if ($token->{type} == CHARACTER_TOKEN) {
7171 $token->{data} =~ s/^\x0A//;
7172 unless (length $token->{data}) {
7173 !!!cp ('t392');
7174 !!!next-token;
7175 } else {
7176 !!!cp ('t393');
7177 }
7178 } else {
7179 !!!cp ('t394');
7180 }
7181 while ($token->{type} == CHARACTER_TOKEN) {
7182 !!!cp ('t395');
7183 $text .= $token->{data};
7184 !!!next-token;
7185 }
7186 if (length $text) {
7187 !!!cp ('t396');
7188 $el->manakai_append_text ($text);
7189 }
7190
7191 $self->{content_model} = PCDATA_CONTENT_MODEL;
7192
7193 if ($token->{type} == END_TAG_TOKEN and
7194 $token->{tag_name} eq $tag_name) {
7195 !!!cp ('t397');
7196 ## Ignore the token
7197 } else {
7198 !!!cp ('t398');
7199 !!!parse-error (type => 'in RCDATA:#eof', token => $token);
7200 }
7201 !!!next-token;
7202 next B;
7203 } elsif ($token->{tag_name} eq 'rt' or
7204 $token->{tag_name} eq 'rp') {
7205 ## has a |ruby| element in scope
7206 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7207 my $node = $self->{open_elements}->[$_];
7208 if ($node->[1] & RUBY_EL) {
7209 !!!cp ('t398.1');
7210 ## generate implied end tags
7211 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7212 !!!cp ('t398.2');
7213 pop @{$self->{open_elements}};
7214 }
7215 unless ($self->{open_elements}->[-1]->[1] & RUBY_EL) {
7216 !!!cp ('t398.3');
7217 !!!parse-error (type => 'not closed',
7218 text => $self->{open_elements}->[-1]->[0]
7219 ->manakai_local_name,
7220 token => $token);
7221 pop @{$self->{open_elements}}
7222 while not $self->{open_elements}->[-1]->[1] & RUBY_EL;
7223 }
7224 last INSCOPE;
7225 } elsif ($node->[1] & SCOPING_EL) {
7226 !!!cp ('t398.4');
7227 last INSCOPE;
7228 }
7229 } # INSCOPE
7230
7231 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7232
7233 !!!nack ('t398.5');
7234 !!!next-token;
7235 redo B;
7236 } elsif ($token->{tag_name} eq 'math' or
7237 $token->{tag_name} eq 'svg') {
7238 $reconstruct_active_formatting_elements->($insert_to_current);
7239
7240 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
7241
7242 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
7243
7244 ## "adjust foreign attributes" - done in insert-element-f
7245
7246 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
7247
7248 if ($self->{self_closing}) {
7249 pop @{$self->{open_elements}};
7250 !!!ack ('t398.1');
7251 } else {
7252 !!!cp ('t398.2');
7253 $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
7254 ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
7255 ## mode, "in body" (not "in foreign content") secondary insertion
7256 ## mode, maybe.
7257 }
7258
7259 !!!next-token;
7260 next B;
7261 } elsif ({
7262 caption => 1, col => 1, colgroup => 1, frame => 1,
7263 frameset => 1, head => 1, option => 1, optgroup => 1,
7264 tbody => 1, td => 1, tfoot => 1, th => 1,
7265 thead => 1, tr => 1,
7266 }->{$token->{tag_name}}) {
7267 !!!cp ('t401');
7268 !!!parse-error (type => 'in body',
7269 text => $token->{tag_name}, token => $token);
7270 ## Ignore the token
7271 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
7272 !!!next-token;
7273 next B;
7274
7275 ## ISSUE: An issue on HTML5 new elements in the spec.
7276 } else {
7277 if ($token->{tag_name} eq 'image') {
7278 !!!cp ('t384');
7279 !!!parse-error (type => 'image', token => $token);
7280 $token->{tag_name} = 'img';
7281 } else {
7282 !!!cp ('t385');
7283 }
7284
7285 ## NOTE: There is an "as if <br>" code clone.
7286 $reconstruct_active_formatting_elements->($insert_to_current);
7287
7288 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7289
7290 if ({
7291 applet => 1, marquee => 1, object => 1,
7292 }->{$token->{tag_name}}) {
7293 !!!cp ('t380');
7294 push @$active_formatting_elements, ['#marker', ''];
7295 !!!nack ('t380.1');
7296 } elsif ({
7297 b => 1, big => 1, em => 1, font => 1, i => 1,
7298 s => 1, small => 1, strile => 1,
7299 strong => 1, tt => 1, u => 1,
7300 }->{$token->{tag_name}}) {
7301 !!!cp ('t375');
7302 push @$active_formatting_elements, $self->{open_elements}->[-1];
7303 !!!nack ('t375.1');
7304 } elsif ($token->{tag_name} eq 'input') {
7305 !!!cp ('t388');
7306 ## TODO: associate with $self->{form_element} if defined
7307 pop @{$self->{open_elements}};
7308 !!!ack ('t388.2');
7309 } elsif ({
7310 area => 1, basefont => 1, bgsound => 1, br => 1,
7311 embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
7312 #image => 1,
7313 }->{$token->{tag_name}}) {
7314 !!!cp ('t388.1');
7315 pop @{$self->{open_elements}};
7316 !!!ack ('t388.3');
7317 } elsif ($token->{tag_name} eq 'select') {
7318 ## TODO: associate with $self->{form_element} if defined
7319
7320 if ($self->{insertion_mode} & TABLE_IMS or
7321 $self->{insertion_mode} & BODY_TABLE_IMS or
7322 $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
7323 !!!cp ('t400.1');
7324 $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
7325 } else {
7326 !!!cp ('t400.2');
7327 $self->{insertion_mode} = IN_SELECT_IM;
7328 }
7329 !!!nack ('t400.3');
7330 } else {
7331 !!!nack ('t402');
7332 }
7333
7334 !!!next-token;
7335 next B;
7336 }
7337 } elsif ($token->{type} == END_TAG_TOKEN) {
7338 if ($token->{tag_name} eq 'body') {
7339 ## has a |body| element in scope
7340 my $i;
7341 INSCOPE: {
7342 for (reverse @{$self->{open_elements}}) {
7343 if ($_->[1] & BODY_EL) {
7344 !!!cp ('t405');
7345 $i = $_;
7346 last INSCOPE;
7347 } elsif ($_->[1] & SCOPING_EL) {
7348 !!!cp ('t405.1');
7349 last;
7350 }
7351 }
7352
7353 !!!parse-error (type => 'start tag not allowed',
7354 text => $token->{tag_name}, token => $token);
7355 ## NOTE: Ignore the token.
7356 !!!next-token;
7357 next B;
7358 } # INSCOPE
7359
7360 for (@{$self->{open_elements}}) {
7361 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
7362 !!!cp ('t403');
7363 !!!parse-error (type => 'not closed',
7364 text => $_->[0]->manakai_local_name,
7365 token => $token);
7366 last;
7367 } else {
7368 !!!cp ('t404');
7369 }
7370 }
7371
7372 $self->{insertion_mode} = AFTER_BODY_IM;
7373 !!!next-token;
7374 next B;
7375 } elsif ($token->{tag_name} eq 'html') {
7376 ## TODO: Update this code. It seems that the code below is not
7377 ## up-to-date, though it has same effect as speced.
7378 if (@{$self->{open_elements}} > 1 and
7379 $self->{open_elements}->[1]->[1] & BODY_EL) {
7380 ## ISSUE: There is an issue in the spec.
7381 unless ($self->{open_elements}->[-1]->[1] & BODY_EL) {
7382 !!!cp ('t406');
7383 !!!parse-error (type => 'not closed',
7384 text => $self->{open_elements}->[1]->[0]
7385 ->manakai_local_name,
7386 token => $token);
7387 } else {
7388 !!!cp ('t407');
7389 }
7390 $self->{insertion_mode} = AFTER_BODY_IM;
7391 ## reprocess
7392 next B;
7393 } else {
7394 !!!cp ('t408');
7395 !!!parse-error (type => 'unmatched end tag',
7396 text => $token->{tag_name}, token => $token);
7397 ## Ignore the token
7398 !!!next-token;
7399 next B;
7400 }
7401 } elsif ({
7402 address => 1, blockquote => 1, center => 1, dir => 1,
7403 div => 1, dl => 1, fieldset => 1, listing => 1,
7404 menu => 1, ol => 1, pre => 1, ul => 1,
7405 dd => 1, dt => 1, li => 1,
7406 applet => 1, button => 1, marquee => 1, object => 1,
7407 }->{$token->{tag_name}}) {
7408 ## has an element in scope
7409 my $i;
7410 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7411 my $node = $self->{open_elements}->[$_];
7412 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7413 !!!cp ('t410');
7414 $i = $_;
7415 last INSCOPE;
7416 } elsif ($node->[1] & SCOPING_EL) {
7417 !!!cp ('t411');
7418 last INSCOPE;
7419 }
7420 } # INSCOPE
7421
7422 unless (defined $i) { # has an element in scope
7423 !!!cp ('t413');
7424 !!!parse-error (type => 'unmatched end tag',
7425 text => $token->{tag_name}, token => $token);
7426 ## NOTE: Ignore the token.
7427 } else {
7428 ## Step 1. generate implied end tags
7429 while ({
7430 ## END_TAG_OPTIONAL_EL
7431 dd => ($token->{tag_name} ne 'dd'),
7432 dt => ($token->{tag_name} ne 'dt'),
7433 li => ($token->{tag_name} ne 'li'),
7434 p => 1,
7435 rt => 1,
7436 rp => 1,
7437 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
7438 !!!cp ('t409');
7439 pop @{$self->{open_elements}};
7440 }
7441
7442 ## Step 2.
7443 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7444 ne $token->{tag_name}) {
7445 !!!cp ('t412');
7446 !!!parse-error (type => 'not closed',
7447 text => $self->{open_elements}->[-1]->[0]
7448 ->manakai_local_name,
7449 token => $token);
7450 } else {
7451 !!!cp ('t414');
7452 }
7453
7454 ## Step 3.
7455 splice @{$self->{open_elements}}, $i;
7456
7457 ## Step 4.
7458 $clear_up_to_marker->()
7459 if {
7460 applet => 1, button => 1, marquee => 1, object => 1,
7461 }->{$token->{tag_name}};
7462 }
7463 !!!next-token;
7464 next B;
7465 } elsif ($token->{tag_name} eq 'form') {
7466 undef $self->{form_element};
7467
7468 ## has an element in scope
7469 my $i;
7470 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7471 my $node = $self->{open_elements}->[$_];
7472 if ($node->[1] & FORM_EL) {
7473 !!!cp ('t418');
7474 $i = $_;
7475 last INSCOPE;
7476 } elsif ($node->[1] & SCOPING_EL) {
7477 !!!cp ('t419');
7478 last INSCOPE;
7479 }
7480 } # INSCOPE
7481
7482 unless (defined $i) { # has an element in scope
7483 !!!cp ('t421');
7484 !!!parse-error (type => 'unmatched end tag',
7485 text => $token->{tag_name}, token => $token);
7486 ## NOTE: Ignore the token.
7487 } else {
7488 ## Step 1. generate implied end tags
7489 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7490 !!!cp ('t417');
7491 pop @{$self->{open_elements}};
7492 }
7493
7494 ## Step 2.
7495 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7496 ne $token->{tag_name}) {
7497 !!!cp ('t417.1');
7498 !!!parse-error (type => 'not closed',
7499 text => $self->{open_elements}->[-1]->[0]
7500 ->manakai_local_name,
7501 token => $token);
7502 } else {
7503 !!!cp ('t420');
7504 }
7505
7506 ## Step 3.
7507 splice @{$self->{open_elements}}, $i;
7508 }
7509
7510 !!!next-token;
7511 next B;
7512 } elsif ({
7513 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
7514 }->{$token->{tag_name}}) {
7515 ## has an element in scope
7516 my $i;
7517 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7518 my $node = $self->{open_elements}->[$_];
7519 if ($node->[1] & HEADING_EL) {
7520 !!!cp ('t423');
7521 $i = $_;
7522 last INSCOPE;
7523 } elsif ($node->[1] & SCOPING_EL) {
7524 !!!cp ('t424');
7525 last INSCOPE;
7526 }
7527 } # INSCOPE
7528
7529 unless (defined $i) { # has an element in scope
7530 !!!cp ('t425.1');
7531 !!!parse-error (type => 'unmatched end tag',
7532 text => $token->{tag_name}, token => $token);
7533 ## NOTE: Ignore the token.
7534 } else {
7535 ## Step 1. generate implied end tags
7536 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7537 !!!cp ('t422');
7538 pop @{$self->{open_elements}};
7539 }
7540
7541 ## Step 2.
7542 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7543 ne $token->{tag_name}) {
7544 !!!cp ('t425');
7545 !!!parse-error (type => 'unmatched end tag',
7546 text => $token->{tag_name}, token => $token);
7547 } else {
7548 !!!cp ('t426');
7549 }
7550
7551 ## Step 3.
7552 splice @{$self->{open_elements}}, $i;
7553 }
7554
7555 !!!next-token;
7556 next B;
7557 } elsif ($token->{tag_name} eq 'p') {
7558 ## has an element in scope
7559 my $i;
7560 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7561 my $node = $self->{open_elements}->[$_];
7562 if ($node->[1] & P_EL) {
7563 !!!cp ('t410.1');
7564 $i = $_;
7565 last INSCOPE;
7566 } elsif ($node->[1] & SCOPING_EL) {
7567 !!!cp ('t411.1');
7568 last INSCOPE;
7569 }
7570 } # INSCOPE
7571
7572 if (defined $i) {
7573 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7574 ne $token->{tag_name}) {
7575 !!!cp ('t412.1');
7576 !!!parse-error (type => 'not closed',
7577 text => $self->{open_elements}->[-1]->[0]
7578 ->manakai_local_name,
7579 token => $token);
7580 } else {
7581 !!!cp ('t414.1');
7582 }
7583
7584 splice @{$self->{open_elements}}, $i;
7585 } else {
7586 !!!cp ('t413.1');
7587 !!!parse-error (type => 'unmatched end tag',
7588 text => $token->{tag_name}, token => $token);
7589
7590 !!!cp ('t415.1');
7591 ## As if <p>, then reprocess the current token
7592 my $el;
7593 !!!create-element ($el, $HTML_NS, 'p',, $token);
7594 $insert->($el);
7595 ## NOTE: Not inserted into |$self->{open_elements}|.
7596 }
7597
7598 !!!next-token;
7599 next B;
7600 } elsif ({
7601 a => 1,
7602 b => 1, big => 1, em => 1, font => 1, i => 1,
7603 nobr => 1, s => 1, small => 1, strile => 1,
7604 strong => 1, tt => 1, u => 1,
7605 }->{$token->{tag_name}}) {
7606 !!!cp ('t427');
7607 $formatting_end_tag->($token);
7608 next B;
7609 } elsif ($token->{tag_name} eq 'br') {
7610 !!!cp ('t428');
7611 !!!parse-error (type => 'unmatched end tag',
7612 text => 'br', token => $token);
7613
7614 ## As if <br>
7615 $reconstruct_active_formatting_elements->($insert_to_current);
7616
7617 my $el;
7618 !!!create-element ($el, $HTML_NS, 'br',, $token);
7619 $insert->($el);
7620
7621 ## Ignore the token.
7622 !!!next-token;
7623 next B;
7624 } elsif ({
7625 caption => 1, col => 1, colgroup => 1, frame => 1,
7626 frameset => 1, head => 1, option => 1, optgroup => 1,
7627 tbody => 1, td => 1, tfoot => 1, th => 1,
7628 thead => 1, tr => 1,
7629 area => 1, basefont => 1, bgsound => 1,
7630 embed => 1, hr => 1, iframe => 1, image => 1,
7631 img => 1, input => 1, isindex => 1, noembed => 1,
7632 noframes => 1, param => 1, select => 1, spacer => 1,
7633 table => 1, textarea => 1, wbr => 1,
7634 noscript => 0, ## TODO: if scripting is enabled
7635 }->{$token->{tag_name}}) {
7636 !!!cp ('t429');
7637 !!!parse-error (type => 'unmatched end tag',
7638 text => $token->{tag_name}, token => $token);
7639 ## Ignore the token
7640 !!!next-token;
7641 next B;
7642
7643 ## ISSUE: Issue on HTML5 new elements in spec
7644
7645 } else {
7646 ## Step 1
7647 my $node_i = -1;
7648 my $node = $self->{open_elements}->[$node_i];
7649
7650 ## Step 2
7651 S2: {
7652 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7653 ## Step 1
7654 ## generate implied end tags
7655 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7656 !!!cp ('t430');
7657 ## NOTE: |<ruby><rt></ruby>|.
7658 ## ISSUE: <ruby><rt></rt> will also take this code path,
7659 ## which seems wrong.
7660 pop @{$self->{open_elements}};
7661 $node_i++;
7662 }
7663
7664 ## Step 2
7665 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7666 ne $token->{tag_name}) {
7667 !!!cp ('t431');
7668 ## NOTE: <x><y></x>
7669 !!!parse-error (type => 'not closed',
7670 text => $self->{open_elements}->[-1]->[0]
7671 ->manakai_local_name,
7672 token => $token);
7673 } else {
7674 !!!cp ('t432');
7675 }
7676
7677 ## Step 3
7678 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
7679
7680 !!!next-token;
7681 last S2;
7682 } else {
7683 ## Step 3
7684 if (not ($node->[1] & FORMATTING_EL) and
7685 #not $phrasing_category->{$node->[1]} and
7686 ($node->[1] & SPECIAL_EL or
7687 $node->[1] & SCOPING_EL)) {
7688 !!!cp ('t433');
7689 !!!parse-error (type => 'unmatched end tag',
7690 text => $token->{tag_name}, token => $token);
7691 ## Ignore the token
7692 !!!next-token;
7693 last S2;
7694 }
7695
7696 !!!cp ('t434');
7697 }
7698
7699 ## Step 4
7700 $node_i--;
7701 $node = $self->{open_elements}->[$node_i];
7702
7703 ## Step 5;
7704 redo S2;
7705 } # S2
7706 next B;
7707 }
7708 }
7709 next B;
7710 } continue { # B
7711 if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
7712 ## NOTE: The code below is executed in cases where it does not have
7713 ## to be, but it it is harmless even in those cases.
7714 ## has an element in scope
7715 INSCOPE: {
7716 for (reverse 0..$#{$self->{open_elements}}) {
7717 my $node = $self->{open_elements}->[$_];
7718 if ($node->[1] & FOREIGN_EL) {
7719 last INSCOPE;
7720 } elsif ($node->[1] & SCOPING_EL) {
7721 last;
7722 }
7723 }
7724
7725 ## NOTE: No foreign element in scope.
7726 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
7727 } # INSCOPE
7728 }
7729 } # B
7730
7731 ## Stop parsing # MUST
7732
7733 ## TODO: script stuffs
7734 } # _tree_construct_main
7735
7736 sub set_inner_html ($$$;$) {
7737 my $class = shift;
7738 my $node = shift;
7739 my $s = \$_[0];
7740 my $onerror = $_[1];
7741 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
7742
7743 ## ISSUE: Should {confident} be true?
7744
7745 my $nt = $node->node_type;
7746 if ($nt == 9) {
7747 # MUST
7748
7749 ## Step 1 # MUST
7750 ## TODO: If the document has an active parser, ...
7751 ## ISSUE: There is an issue in the spec.
7752
7753 ## Step 2 # MUST
7754 my @cn = @{$node->child_nodes};
7755 for (@cn) {
7756 $node->remove_child ($_);
7757 }
7758
7759 ## Step 3, 4, 5 # MUST
7760 $class->parse_char_string ($$s => $node, $onerror, $get_wrapper);
7761 } elsif ($nt == 1) {
7762 ## TODO: If non-html element
7763
7764 ## NOTE: Most of this code is copied from |parse_string|
7765
7766 ## TODO: Support for $get_wrapper
7767
7768 ## Step 1 # MUST
7769 my $this_doc = $node->owner_document;
7770 my $doc = $this_doc->implementation->create_document;
7771 $doc->manakai_is_html (1);
7772 my $p = $class->new;
7773 $p->{document} = $doc;
7774
7775 ## Step 8 # MUST
7776 my $i = 0;
7777 $p->{line_prev} = $p->{line} = 1;
7778 $p->{column_prev} = $p->{column} = 0;
7779 $p->{set_next_char} = sub {
7780 my $self = shift;
7781
7782 pop @{$self->{prev_char}};
7783 unshift @{$self->{prev_char}}, $self->{next_char};
7784
7785 $self->{next_char} = -1 and return if $i >= length $$s;
7786 $self->{next_char} = ord substr $$s, $i++, 1;
7787
7788 ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
7789 $p->{column}++;
7790
7791 if ($self->{next_char} == 0x000A) { # LF
7792 $p->{line}++;
7793 $p->{column} = 0;
7794 !!!cp ('i1');
7795 } elsif ($self->{next_char} == 0x000D) { # CR
7796 $i++ if substr ($$s, $i, 1) eq "\x0A";
7797 $self->{next_char} = 0x000A; # LF # MUST
7798 $p->{line}++;
7799 $p->{column} = 0;
7800 !!!cp ('i2');
7801 } elsif ($self->{next_char} > 0x10FFFF) {
7802 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7803 !!!cp ('i3');
7804 } elsif ($self->{next_char} == 0x0000) { # NULL
7805 !!!cp ('i4');
7806 !!!parse-error (type => 'NULL');
7807 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7808 } elsif ($self->{next_char} <= 0x0008 or
7809 (0x000E <= $self->{next_char} and
7810 $self->{next_char} <= 0x001F) or
7811 (0x007F <= $self->{next_char} and
7812 $self->{next_char} <= 0x009F) or
7813 (0xD800 <= $self->{next_char} and
7814 $self->{next_char} <= 0xDFFF) or
7815 (0xFDD0 <= $self->{next_char} and
7816 $self->{next_char} <= 0xFDDF) or
7817 {
7818 0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
7819 0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
7820 0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
7821 0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
7822 0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
7823 0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
7824 0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
7825 0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
7826 0x10FFFE => 1, 0x10FFFF => 1,
7827 }->{$self->{next_char}}) {
7828 !!!cp ('i4.1');
7829 if ($self->{next_char} < 0x10000) {
7830 !!!parse-error (type => 'control char',
7831 text => (sprintf 'U+%04X', $self->{next_char}));
7832 } else {
7833 !!!parse-error (type => 'control char',
7834 text => (sprintf 'U-%08X', $self->{next_char}));
7835 }
7836 }
7837 };
7838 $p->{prev_char} = [-1, -1, -1];
7839 $p->{next_char} = -1;
7840
7841 $p->{read_until} = sub {
7842 ## TODO: ...
7843 return 0;
7844 }; # $p->{read_until};
7845
7846 my $ponerror = $onerror || sub {
7847 my (%opt) = @_;
7848 my $line = $opt{line};
7849 my $column = $opt{column};
7850 if (defined $opt{token} and defined $opt{token}->{line}) {
7851 $line = $opt{token}->{line};
7852 $column = $opt{token}->{column};
7853 }
7854 warn "Parse error ($opt{type}) at line $line column $column\n";
7855 };
7856 $p->{parse_error} = sub {
7857 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
7858 };
7859
7860 $p->_initialize_tokenizer;
7861 $p->_initialize_tree_constructor;
7862
7863 ## Step 2
7864 my $node_ln = $node->manakai_local_name;
7865 $p->{content_model} = {
7866 title => RCDATA_CONTENT_MODEL,
7867 textarea => RCDATA_CONTENT_MODEL,
7868 style => CDATA_CONTENT_MODEL,
7869 script => CDATA_CONTENT_MODEL,
7870 xmp => CDATA_CONTENT_MODEL,
7871 iframe => CDATA_CONTENT_MODEL,
7872 noembed => CDATA_CONTENT_MODEL,
7873 noframes => CDATA_CONTENT_MODEL,
7874 noscript => CDATA_CONTENT_MODEL,
7875 plaintext => PLAINTEXT_CONTENT_MODEL,
7876 }->{$node_ln};
7877 $p->{content_model} = PCDATA_CONTENT_MODEL
7878 unless defined $p->{content_model};
7879 ## ISSUE: What is "the name of the element"? local name?
7880
7881 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
7882 ## TODO: Foreign element OK?
7883
7884 ## Step 3
7885 my $root = $doc->create_element_ns
7886 ('http://www.w3.org/1999/xhtml', [undef, 'html']);
7887
7888 ## Step 4 # MUST
7889 $doc->append_child ($root);
7890
7891 ## Step 5 # MUST
7892 push @{$p->{open_elements}}, [$root, $el_category->{html}];
7893
7894 undef $p->{head_element};
7895
7896 ## Step 6 # MUST
7897 $p->_reset_insertion_mode;
7898
7899 ## Step 7 # MUST
7900 my $anode = $node;
7901 AN: while (defined $anode) {
7902 if ($anode->node_type == 1) {
7903 my $nsuri = $anode->namespace_uri;
7904 if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
7905 if ($anode->manakai_local_name eq 'form') {
7906 !!!cp ('i5');
7907 $p->{form_element} = $anode;
7908 last AN;
7909 }
7910 }
7911 }
7912 $anode = $anode->parent_node;
7913 } # AN
7914
7915 ## Step 9 # MUST
7916 {
7917 my $self = $p;
7918 !!!next-token;
7919 }
7920 $p->_tree_construction_main;
7921
7922 ## Step 10 # MUST
7923 my @cn = @{$node->child_nodes};
7924 for (@cn) {
7925 $node->remove_child ($_);
7926 }
7927 ## ISSUE: mutation events? read-only?
7928
7929 ## Step 11 # MUST
7930 @cn = @{$root->child_nodes};
7931 for (@cn) {
7932 $this_doc->adopt_node ($_);
7933 $node->append_child ($_);
7934 }
7935 ## ISSUE: mutation events?
7936
7937 $p->_terminate_tree_constructor;
7938
7939 delete $p->{parse_error}; # delete loop
7940 } else {
7941 die "$0: |set_inner_html| is not defined for node of type $nt";
7942 }
7943 } # set_inner_html
7944
7945 } # tree construction stage
7946
7947 package Whatpm::HTML::RestartParser;
7948 push our @ISA, 'Error';
7949
7950 1;
7951 # $Date: 2008/09/14 06:58:28 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24