/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.224 - (show annotations) (download) (as text)
Sun Aug 16 05:15:09 2009 UTC (15 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.223: +45 -51 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	16 Aug 2009 05:12:20 -0000
	* tree-test-1.dat: Changed test results for </br> in the after
	head parsing mode (HTML5 revision 2586).

2009-08-16  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	16 Aug 2009 05:13:22 -0000
	* HTML.pm.src: The </br> end tags before the <body> start tag
	should equally survive as the first br element child of the body
	element (HTML5 revision 2586).

2009-08-16  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::HTML;
2 use strict;
3 our $VERSION=do{my @r=(q$Revision: 1.223 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 use Error qw(:try);
5
6 use Whatpm::HTML::Tokenizer;
7
8 ## NOTE: This module don't check all HTML5 parse errors; character
9 ## encoding related parse errors are expected to be handled by relevant
10 ## modules.
11 ## Parse errors for control characters that are not allowed in HTML5
12 ## documents, for surrogate code points, and for noncharacter code
13 ## points, as well as U+FFFD substitions for characters whose code points
14 ## is higher than U+10FFFF may be detected by combining the parser with
15 ## the checker implemented by Whatpm::Charset::UnicodeChecker (for its
16 ## usage example, see |t/HTML-tree.t| in the Whatpm package or the
17 ## WebHACC::Language::HTML module in the WebHACC package).
18
19 ## ISSUE:
20 ## var doc = implementation.createDocument (null, null, null);
21 ## doc.write ('');
22 ## alert (doc.compatMode);
23
24 require IO::Handle;
25
26 ## Namespace URLs
27
28 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
29 my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
30 my $SVG_NS = q<http://www.w3.org/2000/svg>;
31 my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
32 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
33 my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
34
35 ## Element categories
36
37 ## Bits 12-15
38 sub SPECIAL_EL () { 0b1_000000000000000 }
39 sub SCOPING_EL () { 0b1_00000000000000 }
40 sub FORMATTING_EL () { 0b1_0000000000000 }
41 sub PHRASING_EL () { 0b1_000000000000 }
42
43 ## Bits 10-11
44 #sub FOREIGN_EL () { 0b1_00000000000 } # see Whatpm::HTML::Tokenizer
45 sub FOREIGN_FLOW_CONTENT_EL () { 0b1_0000000000 }
46
47 ## Bits 6-9
48 sub TABLE_SCOPING_EL () { 0b1_000000000 }
49 sub TABLE_ROWS_SCOPING_EL () { 0b1_00000000 }
50 sub TABLE_ROW_SCOPING_EL () { 0b1_0000000 }
51 sub TABLE_ROWS_EL () { 0b1_000000 }
52
53 ## Bit 5
54 sub ADDRESS_DIV_P_EL () { 0b1_00000 }
55
56 ## NOTE: Used in </body> and EOF algorithms.
57 ## Bit 4
58 sub ALL_END_TAG_OPTIONAL_EL () { 0b1_0000 }
59
60 ## NOTE: Used in "generate implied end tags" algorithm.
61 ## NOTE: There is a code where a modified version of
62 ## END_TAG_OPTIONAL_EL is used in "generate implied end tags"
63 ## implementation (search for the algorithm name).
64 ## Bit 3
65 sub END_TAG_OPTIONAL_EL () { 0b1_000 }
66
67 ## Bits 0-2
68
69 sub MISC_SPECIAL_EL () { SPECIAL_EL | 0b000 }
70 sub FORM_EL () { SPECIAL_EL | 0b001 }
71 sub FRAMESET_EL () { SPECIAL_EL | 0b010 }
72 sub HEADING_EL () { SPECIAL_EL | 0b011 }
73 sub SELECT_EL () { SPECIAL_EL | 0b100 }
74 sub SCRIPT_EL () { SPECIAL_EL | 0b101 }
75
76 sub ADDRESS_DIV_EL () { SPECIAL_EL | ADDRESS_DIV_P_EL | 0b001 }
77 sub BODY_EL () { SPECIAL_EL | ALL_END_TAG_OPTIONAL_EL | 0b001 }
78
79 sub DTDD_EL () {
80 SPECIAL_EL |
81 END_TAG_OPTIONAL_EL |
82 ALL_END_TAG_OPTIONAL_EL |
83 0b010
84 }
85 sub LI_EL () {
86 SPECIAL_EL |
87 END_TAG_OPTIONAL_EL |
88 ALL_END_TAG_OPTIONAL_EL |
89 0b100
90 }
91 sub P_EL () {
92 SPECIAL_EL |
93 ADDRESS_DIV_P_EL |
94 END_TAG_OPTIONAL_EL |
95 ALL_END_TAG_OPTIONAL_EL |
96 0b001
97 }
98
99 sub TABLE_ROW_EL () {
100 SPECIAL_EL |
101 TABLE_ROWS_EL |
102 TABLE_ROW_SCOPING_EL |
103 ALL_END_TAG_OPTIONAL_EL |
104 0b001
105 }
106 sub TABLE_ROW_GROUP_EL () {
107 SPECIAL_EL |
108 TABLE_ROWS_EL |
109 TABLE_ROWS_SCOPING_EL |
110 ALL_END_TAG_OPTIONAL_EL |
111 0b001
112 }
113
114 sub MISC_SCOPING_EL () { SCOPING_EL | 0b000 }
115 sub BUTTON_EL () { SCOPING_EL | 0b001 }
116 sub CAPTION_EL () { SCOPING_EL | 0b010 }
117 sub HTML_EL () {
118 SCOPING_EL |
119 TABLE_SCOPING_EL |
120 TABLE_ROWS_SCOPING_EL |
121 TABLE_ROW_SCOPING_EL |
122 ALL_END_TAG_OPTIONAL_EL |
123 0b001
124 }
125 sub TABLE_EL () {
126 SCOPING_EL |
127 TABLE_ROWS_EL |
128 TABLE_SCOPING_EL |
129 0b001
130 }
131 sub TABLE_CELL_EL () {
132 SCOPING_EL |
133 TABLE_ROW_SCOPING_EL |
134 ALL_END_TAG_OPTIONAL_EL |
135 0b001
136 }
137
138 sub MISC_FORMATTING_EL () { FORMATTING_EL | 0b000 }
139 sub A_EL () { FORMATTING_EL | 0b001 }
140 sub NOBR_EL () { FORMATTING_EL | 0b010 }
141
142 sub RUBY_EL () { PHRASING_EL | 0b001 }
143
144 ## ISSUE: ALL_END_TAG_OPTIONAL_EL?
145 sub OPTGROUP_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b001 }
146 sub OPTION_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b010 }
147 sub RUBY_COMPONENT_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b100 }
148
149 sub MML_AXML_EL () { PHRASING_EL | FOREIGN_EL | 0b001 }
150
151 my $el_category = {
152 a => A_EL,
153 address => ADDRESS_DIV_EL,
154 applet => MISC_SCOPING_EL,
155 area => MISC_SPECIAL_EL,
156 article => MISC_SPECIAL_EL,
157 aside => MISC_SPECIAL_EL,
158 b => FORMATTING_EL,
159 base => MISC_SPECIAL_EL,
160 basefont => MISC_SPECIAL_EL,
161 bgsound => MISC_SPECIAL_EL,
162 big => FORMATTING_EL,
163 blockquote => MISC_SPECIAL_EL,
164 body => BODY_EL,
165 br => MISC_SPECIAL_EL,
166 button => BUTTON_EL,
167 caption => CAPTION_EL,
168 center => MISC_SPECIAL_EL,
169 col => MISC_SPECIAL_EL,
170 colgroup => MISC_SPECIAL_EL,
171 command => MISC_SPECIAL_EL,
172 datagrid => MISC_SPECIAL_EL,
173 dd => DTDD_EL,
174 details => MISC_SPECIAL_EL,
175 dialog => MISC_SPECIAL_EL,
176 dir => MISC_SPECIAL_EL,
177 div => ADDRESS_DIV_EL,
178 dl => MISC_SPECIAL_EL,
179 dt => DTDD_EL,
180 em => FORMATTING_EL,
181 embed => MISC_SPECIAL_EL,
182 eventsource => MISC_SPECIAL_EL,
183 fieldset => MISC_SPECIAL_EL,
184 figure => MISC_SPECIAL_EL,
185 font => FORMATTING_EL,
186 footer => MISC_SPECIAL_EL,
187 form => FORM_EL,
188 frame => MISC_SPECIAL_EL,
189 frameset => FRAMESET_EL,
190 h1 => HEADING_EL,
191 h2 => HEADING_EL,
192 h3 => HEADING_EL,
193 h4 => HEADING_EL,
194 h5 => HEADING_EL,
195 h6 => HEADING_EL,
196 head => MISC_SPECIAL_EL,
197 header => MISC_SPECIAL_EL,
198 hr => MISC_SPECIAL_EL,
199 html => HTML_EL,
200 i => FORMATTING_EL,
201 iframe => MISC_SPECIAL_EL,
202 img => MISC_SPECIAL_EL,
203 #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
204 input => MISC_SPECIAL_EL,
205 isindex => MISC_SPECIAL_EL,
206 li => LI_EL,
207 link => MISC_SPECIAL_EL,
208 listing => MISC_SPECIAL_EL,
209 marquee => MISC_SCOPING_EL,
210 menu => MISC_SPECIAL_EL,
211 meta => MISC_SPECIAL_EL,
212 nav => MISC_SPECIAL_EL,
213 nobr => NOBR_EL,
214 noembed => MISC_SPECIAL_EL,
215 noframes => MISC_SPECIAL_EL,
216 noscript => MISC_SPECIAL_EL,
217 object => MISC_SCOPING_EL,
218 ol => MISC_SPECIAL_EL,
219 optgroup => OPTGROUP_EL,
220 option => OPTION_EL,
221 p => P_EL,
222 param => MISC_SPECIAL_EL,
223 plaintext => MISC_SPECIAL_EL,
224 pre => MISC_SPECIAL_EL,
225 rp => RUBY_COMPONENT_EL,
226 rt => RUBY_COMPONENT_EL,
227 ruby => RUBY_EL,
228 s => FORMATTING_EL,
229 script => MISC_SPECIAL_EL,
230 select => SELECT_EL,
231 section => MISC_SPECIAL_EL,
232 small => FORMATTING_EL,
233 spacer => MISC_SPECIAL_EL,
234 strike => FORMATTING_EL,
235 strong => FORMATTING_EL,
236 style => MISC_SPECIAL_EL,
237 table => TABLE_EL,
238 tbody => TABLE_ROW_GROUP_EL,
239 td => TABLE_CELL_EL,
240 textarea => MISC_SPECIAL_EL,
241 tfoot => TABLE_ROW_GROUP_EL,
242 th => TABLE_CELL_EL,
243 thead => TABLE_ROW_GROUP_EL,
244 title => MISC_SPECIAL_EL,
245 tr => TABLE_ROW_EL,
246 tt => FORMATTING_EL,
247 u => FORMATTING_EL,
248 ul => MISC_SPECIAL_EL,
249 wbr => MISC_SPECIAL_EL,
250 };
251
252 my $el_category_f = {
253 $MML_NS => {
254 'annotation-xml' => MML_AXML_EL,
255 mi => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
256 mo => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
257 mn => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
258 ms => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
259 mtext => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
260 },
261 $SVG_NS => {
262 foreignObject => SCOPING_EL | FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
263 desc => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
264 title => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
265 },
266 ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
267 };
268
269 my $svg_attr_name = {
270 attributename => 'attributeName',
271 attributetype => 'attributeType',
272 basefrequency => 'baseFrequency',
273 baseprofile => 'baseProfile',
274 calcmode => 'calcMode',
275 clippathunits => 'clipPathUnits',
276 contentscripttype => 'contentScriptType',
277 contentstyletype => 'contentStyleType',
278 diffuseconstant => 'diffuseConstant',
279 edgemode => 'edgeMode',
280 externalresourcesrequired => 'externalResourcesRequired',
281 filterres => 'filterRes',
282 filterunits => 'filterUnits',
283 glyphref => 'glyphRef',
284 gradienttransform => 'gradientTransform',
285 gradientunits => 'gradientUnits',
286 kernelmatrix => 'kernelMatrix',
287 kernelunitlength => 'kernelUnitLength',
288 keypoints => 'keyPoints',
289 keysplines => 'keySplines',
290 keytimes => 'keyTimes',
291 lengthadjust => 'lengthAdjust',
292 limitingconeangle => 'limitingConeAngle',
293 markerheight => 'markerHeight',
294 markerunits => 'markerUnits',
295 markerwidth => 'markerWidth',
296 maskcontentunits => 'maskContentUnits',
297 maskunits => 'maskUnits',
298 numoctaves => 'numOctaves',
299 pathlength => 'pathLength',
300 patterncontentunits => 'patternContentUnits',
301 patterntransform => 'patternTransform',
302 patternunits => 'patternUnits',
303 pointsatx => 'pointsAtX',
304 pointsaty => 'pointsAtY',
305 pointsatz => 'pointsAtZ',
306 preservealpha => 'preserveAlpha',
307 preserveaspectratio => 'preserveAspectRatio',
308 primitiveunits => 'primitiveUnits',
309 refx => 'refX',
310 refy => 'refY',
311 repeatcount => 'repeatCount',
312 repeatdur => 'repeatDur',
313 requiredextensions => 'requiredExtensions',
314 requiredfeatures => 'requiredFeatures',
315 specularconstant => 'specularConstant',
316 specularexponent => 'specularExponent',
317 spreadmethod => 'spreadMethod',
318 startoffset => 'startOffset',
319 stddeviation => 'stdDeviation',
320 stitchtiles => 'stitchTiles',
321 surfacescale => 'surfaceScale',
322 systemlanguage => 'systemLanguage',
323 tablevalues => 'tableValues',
324 targetx => 'targetX',
325 targety => 'targetY',
326 textlength => 'textLength',
327 viewbox => 'viewBox',
328 viewtarget => 'viewTarget',
329 xchannelselector => 'xChannelSelector',
330 ychannelselector => 'yChannelSelector',
331 zoomandpan => 'zoomAndPan',
332 };
333
334 my $foreign_attr_xname = {
335 'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
336 'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
337 'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
338 'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
339 'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
340 'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
341 'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
342 'xml:base' => [$XML_NS, ['xml', 'base']],
343 'xml:lang' => [$XML_NS, ['xml', 'lang']],
344 'xml:space' => [$XML_NS, ['xml', 'space']],
345 'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
346 'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
347 };
348
349 ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
350
351 ## TODO: Invoke the reset algorithm when a resettable element is
352 ## created (cf. HTML5 revision 2259).
353
354 sub parse_byte_string ($$$$;$) {
355 my $self = shift;
356 my $charset_name = shift;
357 open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
358 return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
359 } # parse_byte_string
360
361 sub parse_byte_stream ($$$$;$$) {
362 # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
363 my $self = ref $_[0] ? shift : shift->new;
364 my $charset_name = shift;
365 my $byte_stream = $_[0];
366
367 my $onerror = $_[2] || sub {
368 my (%opt) = @_;
369 warn "Parse error ($opt{type})\n";
370 };
371 $self->{parse_error} = $onerror; # updated later by parse_char_string
372
373 my $get_wrapper = $_[3] || sub ($) {
374 return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
375 };
376
377 ## HTML5 encoding sniffing algorithm
378 require Message::Charset::Info;
379 my $charset;
380 my $buffer;
381 my ($char_stream, $e_status);
382
383 SNIFFING: {
384 ## NOTE: By setting |allow_fallback| option true when the
385 ## |get_decode_handle| method is invoked, we ignore what the HTML5
386 ## spec requires, i.e. unsupported encoding should be ignored.
387 ## TODO: We should not do this unless the parser is invoked
388 ## in the conformance checking mode, in which this behavior
389 ## would be useful.
390
391 ## Step 1
392 if (defined $charset_name) {
393 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
394 ## TODO: Is this ok? Transfer protocol's parameter should be
395 ## interpreted in its semantics?
396
397 ($char_stream, $e_status) = $charset->get_decode_handle
398 ($byte_stream, allow_error_reporting => 1,
399 allow_fallback => 1);
400 if ($char_stream) {
401 $self->{confident} = 1;
402 last SNIFFING;
403 } else {
404 !!!parse-error (type => 'charset:not supported',
405 layer => 'encode',
406 line => 1, column => 1,
407 value => $charset_name,
408 level => $self->{level}->{uncertain});
409 }
410 }
411
412 ## Step 2
413 my $byte_buffer = '';
414 for (1..1024) {
415 my $char = $byte_stream->getc;
416 last unless defined $char;
417 $byte_buffer .= $char;
418 } ## TODO: timeout
419
420 ## Step 3
421 if ($byte_buffer =~ /^\xFE\xFF/) {
422 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
423 ($char_stream, $e_status) = $charset->get_decode_handle
424 ($byte_stream, allow_error_reporting => 1,
425 allow_fallback => 1, byte_buffer => \$byte_buffer);
426 $self->{confident} = 1;
427 last SNIFFING;
428 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
429 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
430 ($char_stream, $e_status) = $charset->get_decode_handle
431 ($byte_stream, allow_error_reporting => 1,
432 allow_fallback => 1, byte_buffer => \$byte_buffer);
433 $self->{confident} = 1;
434 last SNIFFING;
435 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
436 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
437 ($char_stream, $e_status) = $charset->get_decode_handle
438 ($byte_stream, allow_error_reporting => 1,
439 allow_fallback => 1, byte_buffer => \$byte_buffer);
440 $self->{confident} = 1;
441 last SNIFFING;
442 }
443
444 ## Step 4
445 ## TODO: <meta charset>
446
447 ## Step 5
448 ## TODO: from history
449
450 ## Step 6
451 require Whatpm::Charset::UniversalCharDet;
452 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
453 ($byte_buffer);
454 if (defined $charset_name) {
455 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
456
457 require Whatpm::Charset::DecodeHandle;
458 $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
459 ($byte_stream);
460 ($char_stream, $e_status) = $charset->get_decode_handle
461 ($buffer, allow_error_reporting => 1,
462 allow_fallback => 1, byte_buffer => \$byte_buffer);
463 if ($char_stream) {
464 $buffer->{buffer} = $byte_buffer;
465 !!!parse-error (type => 'sniffing:chardet',
466 text => $charset_name,
467 level => $self->{level}->{info},
468 layer => 'encode',
469 line => 1, column => 1);
470 $self->{confident} = 0;
471 last SNIFFING;
472 }
473 }
474
475 ## Step 7: default
476 ## TODO: Make this configurable.
477 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
478 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
479 ## detectable in the step 6.
480 require Whatpm::Charset::DecodeHandle;
481 $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
482 ($byte_stream);
483 ($char_stream, $e_status)
484 = $charset->get_decode_handle ($buffer,
485 allow_error_reporting => 1,
486 allow_fallback => 1,
487 byte_buffer => \$byte_buffer);
488 $buffer->{buffer} = $byte_buffer;
489 !!!parse-error (type => 'sniffing:default',
490 text => 'windows-1252',
491 level => $self->{level}->{info},
492 line => 1, column => 1,
493 layer => 'encode');
494 $self->{confident} = 0;
495 } # SNIFFING
496
497 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
498 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
499 !!!parse-error (type => 'chardecode:fallback',
500 #text => $self->{input_encoding},
501 level => $self->{level}->{uncertain},
502 line => 1, column => 1,
503 layer => 'encode');
504 } elsif (not ($e_status &
505 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
506 $self->{input_encoding} = $charset->get_iana_name;
507 !!!parse-error (type => 'chardecode:no error',
508 text => $self->{input_encoding},
509 level => $self->{level}->{uncertain},
510 line => 1, column => 1,
511 layer => 'encode');
512 } else {
513 $self->{input_encoding} = $charset->get_iana_name;
514 }
515
516 $self->{change_encoding} = sub {
517 my $self = shift;
518 $charset_name = shift;
519 my $token = shift;
520
521 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
522 ($char_stream, $e_status) = $charset->get_decode_handle
523 ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
524 byte_buffer => \ $buffer->{buffer});
525
526 if ($char_stream) { # if supported
527 ## "Change the encoding" algorithm:
528
529 ## Step 1
530 if (defined $self->{input_encoding} and
531 $self->{input_encoding} eq $charset_name) {
532 !!!parse-error (type => 'charset label:matching',
533 text => $charset_name,
534 level => $self->{level}->{info});
535 $self->{confident} = 1;
536 return;
537 }
538
539 ## Step 2 (HTML5 revision 3205)
540 if (defined $self->{input_encoding} and
541 Message::Charset::Info->get_by_html_name ($self->{input_encoding})
542 ->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
543 $self->{confident} = 1;
544 return;
545 }
546
547 ## Step 3
548 if ($charset->{category} &
549 Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
550 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
551 ($char_stream, $e_status) = $charset->get_decode_handle
552 ($byte_stream,
553 byte_buffer => \ $buffer->{buffer});
554 }
555 $charset_name = $charset->get_iana_name;
556
557 !!!parse-error (type => 'charset label detected',
558 text => $self->{input_encoding},
559 value => $charset_name,
560 level => $self->{level}->{warn},
561 token => $token);
562
563 ## Step 4
564 # if (can) {
565 ## change the encoding on the fly.
566 #$self->{confident} = 1;
567 #return;
568 # }
569
570 ## Step 5
571 throw Whatpm::HTML::RestartParser ();
572 }
573 }; # $self->{change_encoding}
574
575 my $char_onerror = sub {
576 my (undef, $type, %opt) = @_;
577 !!!parse-error (layer => 'encode',
578 line => $self->{line}, column => $self->{column} + 1,
579 %opt, type => $type);
580 if ($opt{octets}) {
581 ${$opt{octets}} = "\x{FFFD}"; # relacement character
582 }
583 };
584
585 my $wrapped_char_stream = $get_wrapper->($char_stream);
586 $wrapped_char_stream->onerror ($char_onerror);
587
588 my @args = ($_[1], $_[2]); # $doc, $onerror - $get_wrapper = undef;
589 my $return;
590 try {
591 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
592 } catch Whatpm::HTML::RestartParser with {
593 ## NOTE: Invoked after {change_encoding}.
594
595 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
596 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
597 !!!parse-error (type => 'chardecode:fallback',
598 level => $self->{level}->{uncertain},
599 #text => $self->{input_encoding},
600 line => 1, column => 1,
601 layer => 'encode');
602 } elsif (not ($e_status &
603 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
604 $self->{input_encoding} = $charset->get_iana_name;
605 !!!parse-error (type => 'chardecode:no error',
606 text => $self->{input_encoding},
607 level => $self->{level}->{uncertain},
608 line => 1, column => 1,
609 layer => 'encode');
610 } else {
611 $self->{input_encoding} = $charset->get_iana_name;
612 }
613 $self->{confident} = 1;
614
615 $wrapped_char_stream = $get_wrapper->($char_stream);
616 $wrapped_char_stream->onerror ($char_onerror);
617
618 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
619 };
620 return $return;
621 } # parse_byte_stream
622
623 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
624 ## and the HTML layer MUST ignore it. However, we does strip BOM in
625 ## the encoding layer and the HTML layer does not ignore any U+FEFF,
626 ## because the core part of our HTML parser expects a string of character,
627 ## not a string of bytes or code units or anything which might contain a BOM.
628 ## Therefore, any parser interface that accepts a string of bytes,
629 ## such as |parse_byte_string| in this module, must ensure that it does
630 ## strip the BOM and never strip any ZWNBSP.
631
632 sub parse_char_string ($$$;$$) {
633 #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
634 my $self = shift;
635 my $s = ref $_[0] ? $_[0] : \($_[0]);
636 require Whatpm::Charset::DecodeHandle;
637 my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
638 return $self->parse_char_stream ($input, @_[1..$#_]);
639 } # parse_char_string
640 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
641
642 sub parse_char_stream ($$$;$$) {
643 my $self = ref $_[0] ? shift : shift->new;
644 my $input = $_[0];
645 $self->{document} = $_[1];
646 @{$self->{document}->child_nodes} = ();
647
648 ## NOTE: |set_inner_html| copies most of this method's code
649
650 $self->{confident} = 1 unless exists $self->{confident};
651 $self->{document}->input_encoding ($self->{input_encoding})
652 if defined $self->{input_encoding};
653 ## TODO: |{input_encoding}| is needless?
654
655 $self->{line_prev} = $self->{line} = 1;
656 $self->{column_prev} = -1;
657 $self->{column} = 0;
658 $self->{set_nc} = sub {
659 my $self = shift;
660
661 my $char = '';
662 if (defined $self->{next_nc}) {
663 $char = $self->{next_nc};
664 delete $self->{next_nc};
665 $self->{nc} = ord $char;
666 } else {
667 $self->{char_buffer} = '';
668 $self->{char_buffer_pos} = 0;
669
670 my $count = $input->manakai_read_until
671 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/, $self->{char_buffer_pos});
672 if ($count) {
673 $self->{line_prev} = $self->{line};
674 $self->{column_prev} = $self->{column};
675 $self->{column}++;
676 $self->{nc}
677 = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
678 return;
679 }
680
681 if ($input->read ($char, 1)) {
682 $self->{nc} = ord $char;
683 } else {
684 $self->{nc} = -1;
685 return;
686 }
687 }
688
689 ($self->{line_prev}, $self->{column_prev})
690 = ($self->{line}, $self->{column});
691 $self->{column}++;
692
693 if ($self->{nc} == 0x000A) { # LF
694 !!!cp ('j1');
695 $self->{line}++;
696 $self->{column} = 0;
697 } elsif ($self->{nc} == 0x000D) { # CR
698 !!!cp ('j2');
699 ## TODO: support for abort/streaming
700 my $next = '';
701 if ($input->read ($next, 1) and $next ne "\x0A") {
702 $self->{next_nc} = $next;
703 }
704 $self->{nc} = 0x000A; # LF # MUST
705 $self->{line}++;
706 $self->{column} = 0;
707 } elsif ($self->{nc} == 0x0000) { # NULL
708 !!!cp ('j4');
709 !!!parse-error (type => 'NULL');
710 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
711 }
712 };
713
714 $self->{read_until} = sub {
715 #my ($scalar, $specials_range, $offset) = @_;
716 return 0 if defined $self->{next_nc};
717
718 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
719 my $offset = $_[2] || 0;
720
721 if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
722 pos ($self->{char_buffer}) = $self->{char_buffer_pos};
723 if ($self->{char_buffer} =~ /\G(?>$pattern)+/) {
724 substr ($_[0], $offset)
725 = substr ($self->{char_buffer}, $-[0], $+[0] - $-[0]);
726 my $count = $+[0] - $-[0];
727 if ($count) {
728 $self->{column} += $count;
729 $self->{char_buffer_pos} += $count;
730 $self->{line_prev} = $self->{line};
731 $self->{column_prev} = $self->{column} - 1;
732 $self->{nc} = -1;
733 }
734 return $count;
735 } else {
736 return 0;
737 }
738 } else {
739 my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
740 if ($count) {
741 $self->{column} += $count;
742 $self->{line_prev} = $self->{line};
743 $self->{column_prev} = $self->{column} - 1;
744 $self->{nc} = -1;
745 }
746 return $count;
747 }
748 }; # $self->{read_until}
749
750 my $onerror = $_[2] || sub {
751 my (%opt) = @_;
752 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
753 my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
754 warn "Parse error ($opt{type}) at line $line column $column\n";
755 };
756 $self->{parse_error} = sub {
757 $onerror->(line => $self->{line}, column => $self->{column}, @_);
758 };
759
760 my $char_onerror = sub {
761 my (undef, $type, %opt) = @_;
762 !!!parse-error (layer => 'encode',
763 line => $self->{line}, column => $self->{column} + 1,
764 %opt, type => $type);
765 }; # $char_onerror
766
767 if ($_[3]) {
768 $input = $_[3]->($input);
769 $input->onerror ($char_onerror);
770 } else {
771 $input->onerror ($char_onerror) unless defined $input->onerror;
772 }
773
774 $self->_initialize_tokenizer;
775 $self->_initialize_tree_constructor;
776 $self->_construct_tree;
777 $self->_terminate_tree_constructor;
778
779 delete $self->{parse_error}; # remove loop
780
781 return $self->{document};
782 } # parse_char_stream
783
784 sub new ($) {
785 my $class = shift;
786 my $self = bless {
787 level => {must => 'm',
788 should => 's',
789 warn => 'w',
790 info => 'i',
791 uncertain => 'u'},
792 }, $class;
793 $self->{set_nc} = sub {
794 $self->{nc} = -1;
795 };
796 $self->{parse_error} = sub {
797 #
798 };
799 $self->{change_encoding} = sub {
800 # if ($_[0] is a supported encoding) {
801 # run "change the encoding" algorithm;
802 # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
803 # }
804 };
805 $self->{application_cache_selection} = sub {
806 #
807 };
808 return $self;
809 } # new
810
811 ## Insertion modes
812
813 sub AFTER_HTML_IMS () { 0b100 }
814 sub HEAD_IMS () { 0b1000 }
815 sub BODY_IMS () { 0b10000 }
816 sub BODY_TABLE_IMS () { 0b100000 }
817 sub TABLE_IMS () { 0b1000000 }
818 sub ROW_IMS () { 0b10000000 }
819 sub BODY_AFTER_IMS () { 0b100000000 }
820 sub FRAME_IMS () { 0b1000000000 }
821 sub SELECT_IMS () { 0b10000000000 }
822 #sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 } # see Whatpm::HTML::Tokenizer
823 ## NOTE: "in foreign content" insertion mode is special; it is combined
824 ## with the secondary insertion mode. In this parser, they are stored
825 ## together in the bit-or'ed form.
826 sub IN_CDATA_RCDATA_IM () { 0b1000000000000 }
827 ## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is
828 ## combined with the original insertion mode. In thie parser,
829 ## they are stored together in the bit-or'ed form.
830
831 sub IM_MASK () { 0b11111111111 }
832
833 ## NOTE: "initial" and "before html" insertion modes have no constants.
834
835 ## NOTE: "after after body" insertion mode.
836 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
837
838 ## NOTE: "after after frameset" insertion mode.
839 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
840
841 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
842 sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
843 sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
844 sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
845 sub IN_BODY_IM () { BODY_IMS }
846 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
847 sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
848 sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
849 sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
850 sub IN_TABLE_IM () { TABLE_IMS }
851 sub AFTER_BODY_IM () { BODY_AFTER_IMS }
852 sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
853 sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
854 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
855 sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
856 sub IN_COLUMN_GROUP_IM () { 0b10 }
857
858 sub _initialize_tree_constructor ($) {
859 my $self = shift;
860 ## NOTE: $self->{document} MUST be specified before this method is called
861 $self->{document}->strict_error_checking (0);
862 ## TODO: Turn mutation events off # MUST
863 ## TODO: Turn loose Document option (manakai extension) on
864 $self->{document}->manakai_is_html (1); # MUST
865 $self->{document}->set_user_data (manakai_source_line => 1);
866 $self->{document}->set_user_data (manakai_source_column => 1);
867 } # _initialize_tree_constructor
868
869 sub _terminate_tree_constructor ($) {
870 my $self = shift;
871 $self->{document}->strict_error_checking (1);
872 ## TODO: Turn mutation events on
873 } # _terminate_tree_constructor
874
875 ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
876
877 { # tree construction stage
878 my $token;
879
880 sub _construct_tree ($) {
881 my ($self) = @_;
882
883 ## When an interactive UA render the $self->{document} available
884 ## to the user, or when it begin accepting user input, are
885 ## not defined.
886
887 !!!next-token;
888
889 undef $self->{form_element};
890 undef $self->{head_element};
891 undef $self->{head_element_inserted};
892 $self->{open_elements} = [];
893 undef $self->{inner_html_node};
894 undef $self->{ignore_newline};
895
896 ## NOTE: The "initial" insertion mode.
897 $self->_tree_construction_initial; # MUST
898
899 ## NOTE: The "before html" insertion mode.
900 $self->_tree_construction_root_element;
901 $self->{insertion_mode} = BEFORE_HEAD_IM;
902
903 ## NOTE: The "before head" insertion mode and so on.
904 $self->_tree_construction_main;
905 } # _construct_tree
906
907 sub _tree_construction_initial ($) {
908 my $self = shift;
909
910 ## NOTE: "initial" insertion mode
911
912 INITIAL: {
913 if ($token->{type} == DOCTYPE_TOKEN) {
914 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
915 ## error, switch to a conformance checking mode for another
916 ## language.
917 my $doctype_name = $token->{name};
918 $doctype_name = '' unless defined $doctype_name;
919 $doctype_name =~ tr/a-z/A-Z/; # ASCII case-insensitive
920 if (not defined $token->{name} or # <!DOCTYPE>
921 defined $token->{sysid}) {
922 !!!cp ('t1');
923 !!!parse-error (type => 'not HTML5', token => $token);
924 } elsif ($doctype_name ne 'HTML') {
925 !!!cp ('t2');
926 !!!parse-error (type => 'not HTML5', token => $token);
927 } elsif (defined $token->{pubid}) {
928 if ($token->{pubid} eq 'XSLT-compat') {
929 !!!cp ('t1.2');
930 !!!parse-error (type => 'XSLT-compat', token => $token,
931 level => $self->{level}->{should});
932 } else {
933 !!!parse-error (type => 'not HTML5', token => $token);
934 }
935 } else {
936 !!!cp ('t3');
937 #
938 }
939
940 my $doctype = $self->{document}->create_document_type_definition
941 ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
942 ## NOTE: Default value for both |public_id| and |system_id| attributes
943 ## are empty strings, so that we don't set any value in missing cases.
944 $doctype->public_id ($token->{pubid}) if defined $token->{pubid};
945 $doctype->system_id ($token->{sysid}) if defined $token->{sysid};
946 ## NOTE: Other DocumentType attributes are null or empty lists.
947 ## In Firefox3, |internalSubset| attribute is set to the empty
948 ## string, while |null| is an allowed value for the attribute
949 ## according to DOM3 Core.
950 $self->{document}->append_child ($doctype);
951
952 if ($token->{quirks} or $doctype_name ne 'HTML') {
953 !!!cp ('t4');
954 $self->{document}->manakai_compat_mode ('quirks');
955 } elsif (defined $token->{pubid}) {
956 my $pubid = $token->{pubid};
957 $pubid =~ tr/a-z/A-z/;
958 my $prefix = [
959 "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
960 "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
961 "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
962 "-//IETF//DTD HTML 2.0 LEVEL 1//",
963 "-//IETF//DTD HTML 2.0 LEVEL 2//",
964 "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
965 "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
966 "-//IETF//DTD HTML 2.0 STRICT//",
967 "-//IETF//DTD HTML 2.0//",
968 "-//IETF//DTD HTML 2.1E//",
969 "-//IETF//DTD HTML 3.0//",
970 "-//IETF//DTD HTML 3.2 FINAL//",
971 "-//IETF//DTD HTML 3.2//",
972 "-//IETF//DTD HTML 3//",
973 "-//IETF//DTD HTML LEVEL 0//",
974 "-//IETF//DTD HTML LEVEL 1//",
975 "-//IETF//DTD HTML LEVEL 2//",
976 "-//IETF//DTD HTML LEVEL 3//",
977 "-//IETF//DTD HTML STRICT LEVEL 0//",
978 "-//IETF//DTD HTML STRICT LEVEL 1//",
979 "-//IETF//DTD HTML STRICT LEVEL 2//",
980 "-//IETF//DTD HTML STRICT LEVEL 3//",
981 "-//IETF//DTD HTML STRICT//",
982 "-//IETF//DTD HTML//",
983 "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
984 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
985 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
986 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
987 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
988 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
989 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
990 "-//NETSCAPE COMM. CORP.//DTD HTML//",
991 "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
992 "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
993 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
994 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
995 "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
996 "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
997 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
998 "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
999 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
1000 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
1001 "-//W3C//DTD HTML 3 1995-03-24//",
1002 "-//W3C//DTD HTML 3.2 DRAFT//",
1003 "-//W3C//DTD HTML 3.2 FINAL//",
1004 "-//W3C//DTD HTML 3.2//",
1005 "-//W3C//DTD HTML 3.2S DRAFT//",
1006 "-//W3C//DTD HTML 4.0 FRAMESET//",
1007 "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
1008 "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
1009 "-//W3C//DTD HTML EXPERIMENTAL 970421//",
1010 "-//W3C//DTD W3 HTML//",
1011 "-//W3O//DTD W3 HTML 3.0//",
1012 "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
1013 "-//WEBTECHS//DTD MOZILLA HTML//",
1014 ]; # $prefix
1015 my $match;
1016 for (@$prefix) {
1017 if (substr ($prefix, 0, length $_) eq $_) {
1018 $match = 1;
1019 last;
1020 }
1021 }
1022 if ($match or
1023 $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
1024 $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
1025 $pubid eq "HTML") {
1026 !!!cp ('t5');
1027 $self->{document}->manakai_compat_mode ('quirks');
1028 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
1029 $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
1030 if (defined $token->{sysid}) {
1031 !!!cp ('t6');
1032 $self->{document}->manakai_compat_mode ('quirks');
1033 } else {
1034 !!!cp ('t7');
1035 $self->{document}->manakai_compat_mode ('limited quirks');
1036 }
1037 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
1038 $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
1039 !!!cp ('t8');
1040 $self->{document}->manakai_compat_mode ('limited quirks');
1041 } else {
1042 !!!cp ('t9');
1043 }
1044 } else {
1045 !!!cp ('t10');
1046 }
1047 if (defined $token->{sysid}) {
1048 my $sysid = $token->{sysid};
1049 $sysid =~ tr/A-Z/a-z/;
1050 if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1051 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
1052 ## marked as quirks.
1053 $self->{document}->manakai_compat_mode ('quirks');
1054 !!!cp ('t11');
1055 } else {
1056 !!!cp ('t12');
1057 }
1058 } else {
1059 !!!cp ('t13');
1060 }
1061
1062 ## Go to the "before html" insertion mode.
1063 !!!next-token;
1064 return;
1065 } elsif ({
1066 START_TAG_TOKEN, 1,
1067 END_TAG_TOKEN, 1,
1068 END_OF_FILE_TOKEN, 1,
1069 }->{$token->{type}}) {
1070 !!!cp ('t14');
1071 !!!parse-error (type => 'no DOCTYPE', token => $token);
1072 $self->{document}->manakai_compat_mode ('quirks');
1073 ## Go to the "before html" insertion mode.
1074 ## reprocess
1075 !!!ack-later;
1076 return;
1077 } elsif ($token->{type} == CHARACTER_TOKEN) {
1078 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1079 ## Ignore the token
1080
1081 unless (length $token->{data}) {
1082 !!!cp ('t15');
1083 ## Stay in the insertion mode.
1084 !!!next-token;
1085 redo INITIAL;
1086 } else {
1087 !!!cp ('t16');
1088 }
1089 } else {
1090 !!!cp ('t17');
1091 }
1092
1093 !!!parse-error (type => 'no DOCTYPE', token => $token);
1094 $self->{document}->manakai_compat_mode ('quirks');
1095 ## Go to the "before html" insertion mode.
1096 ## reprocess
1097 return;
1098 } elsif ($token->{type} == COMMENT_TOKEN) {
1099 !!!cp ('t18');
1100 my $comment = $self->{document}->create_comment ($token->{data});
1101 $self->{document}->append_child ($comment);
1102
1103 ## Stay in the insertion mode.
1104 !!!next-token;
1105 redo INITIAL;
1106 } else {
1107 die "$0: $token->{type}: Unknown token type";
1108 }
1109 } # INITIAL
1110
1111 die "$0: _tree_construction_initial: This should be never reached";
1112 } # _tree_construction_initial
1113
1114 sub _tree_construction_root_element ($) {
1115 my $self = shift;
1116
1117 ## NOTE: "before html" insertion mode.
1118
1119 B: {
1120 if ($token->{type} == DOCTYPE_TOKEN) {
1121 !!!cp ('t19');
1122 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1123 ## Ignore the token
1124 ## Stay in the insertion mode.
1125 !!!next-token;
1126 redo B;
1127 } elsif ($token->{type} == COMMENT_TOKEN) {
1128 !!!cp ('t20');
1129 my $comment = $self->{document}->create_comment ($token->{data});
1130 $self->{document}->append_child ($comment);
1131 ## Stay in the insertion mode.
1132 !!!next-token;
1133 redo B;
1134 } elsif ($token->{type} == CHARACTER_TOKEN) {
1135 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1136 ## Ignore the token.
1137
1138 unless (length $token->{data}) {
1139 !!!cp ('t21');
1140 ## Stay in the insertion mode.
1141 !!!next-token;
1142 redo B;
1143 } else {
1144 !!!cp ('t22');
1145 }
1146 } else {
1147 !!!cp ('t23');
1148 }
1149
1150 $self->{application_cache_selection}->(undef);
1151
1152 #
1153 } elsif ($token->{type} == START_TAG_TOKEN) {
1154 if ($token->{tag_name} eq 'html') {
1155 my $root_element;
1156 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
1157 $self->{document}->append_child ($root_element);
1158 push @{$self->{open_elements}},
1159 [$root_element, $el_category->{html}];
1160
1161 if ($token->{attributes}->{manifest}) {
1162 !!!cp ('t24');
1163 $self->{application_cache_selection}
1164 ->($token->{attributes}->{manifest}->{value});
1165 ## ISSUE: Spec is unclear on relative references.
1166 ## According to Hixie (#whatwg 2008-03-19), it should be
1167 ## resolved against the base URI of the document in HTML
1168 ## or xml:base of the element in XHTML.
1169 } else {
1170 !!!cp ('t25');
1171 $self->{application_cache_selection}->(undef);
1172 }
1173
1174 !!!nack ('t25c');
1175
1176 !!!next-token;
1177 return; ## Go to the "before head" insertion mode.
1178 } else {
1179 !!!cp ('t25.1');
1180 #
1181 }
1182 } elsif ({
1183 END_TAG_TOKEN, 1,
1184 END_OF_FILE_TOKEN, 1,
1185 }->{$token->{type}}) {
1186 !!!cp ('t26');
1187 #
1188 } else {
1189 die "$0: $token->{type}: Unknown token type";
1190 }
1191
1192 my $root_element;
1193 !!!create-element ($root_element, $HTML_NS, 'html',, $token);
1194 $self->{document}->append_child ($root_element);
1195 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
1196
1197 $self->{application_cache_selection}->(undef);
1198
1199 ## NOTE: Reprocess the token.
1200 !!!ack-later;
1201 return; ## Go to the "before head" insertion mode.
1202 } # B
1203
1204 die "$0: _tree_construction_root_element: This should never be reached";
1205 } # _tree_construction_root_element
1206
1207 sub _reset_insertion_mode ($) {
1208 my $self = shift;
1209
1210 ## Step 1
1211 my $last;
1212
1213 ## Step 2
1214 my $i = -1;
1215 my $node = $self->{open_elements}->[$i];
1216
1217 ## Step 3
1218 S3: {
1219 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
1220 $last = 1;
1221 if (defined $self->{inner_html_node}) {
1222 !!!cp ('t28');
1223 $node = $self->{inner_html_node};
1224 } else {
1225 die "_reset_insertion_mode: t27";
1226 }
1227 }
1228
1229 ## Step 4..14
1230 my $new_mode;
1231 if ($node->[1] & FOREIGN_EL) {
1232 !!!cp ('t28.1');
1233 ## NOTE: Strictly spaking, the line below only applies to MathML and
1234 ## SVG elements. Currently the HTML syntax supports only MathML and
1235 ## SVG elements as foreigners.
1236 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
1237 } elsif ($node->[1] == TABLE_CELL_EL) {
1238 if ($last) {
1239 !!!cp ('t28.2');
1240 #
1241 } else {
1242 !!!cp ('t28.3');
1243 $new_mode = IN_CELL_IM;
1244 }
1245 } else {
1246 !!!cp ('t28.4');
1247 $new_mode = {
1248 select => IN_SELECT_IM,
1249 ## NOTE: |option| and |optgroup| do not set
1250 ## insertion mode to "in select" by themselves.
1251 tr => IN_ROW_IM,
1252 tbody => IN_TABLE_BODY_IM,
1253 thead => IN_TABLE_BODY_IM,
1254 tfoot => IN_TABLE_BODY_IM,
1255 caption => IN_CAPTION_IM,
1256 colgroup => IN_COLUMN_GROUP_IM,
1257 table => IN_TABLE_IM,
1258 head => IN_BODY_IM, # not in head!
1259 body => IN_BODY_IM,
1260 frameset => IN_FRAMESET_IM,
1261 }->{$node->[0]->manakai_local_name};
1262 }
1263 $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1264
1265 ## Step 15
1266 if ($node->[1] == HTML_EL) {
1267 unless (defined $self->{head_element}) {
1268 !!!cp ('t29');
1269 $self->{insertion_mode} = BEFORE_HEAD_IM;
1270 } else {
1271 ## ISSUE: Can this state be reached?
1272 !!!cp ('t30');
1273 $self->{insertion_mode} = AFTER_HEAD_IM;
1274 }
1275 return;
1276 } else {
1277 !!!cp ('t31');
1278 }
1279
1280 ## Step 16
1281 $self->{insertion_mode} = IN_BODY_IM and return if $last;
1282
1283 ## Step 17
1284 $i--;
1285 $node = $self->{open_elements}->[$i];
1286
1287 ## Step 18
1288 redo S3;
1289 } # S3
1290
1291 die "$0: _reset_insertion_mode: This line should never be reached";
1292 } # _reset_insertion_mode
1293
1294 sub _tree_construction_main ($) {
1295 my $self = shift;
1296
1297 my $active_formatting_elements = [];
1298
1299 my $reconstruct_active_formatting_elements = sub { # MUST
1300 my $insert = shift;
1301
1302 ## Step 1
1303 return unless @$active_formatting_elements;
1304
1305 ## Step 3
1306 my $i = -1;
1307 my $entry = $active_formatting_elements->[$i];
1308
1309 ## Step 2
1310 return if $entry->[0] eq '#marker';
1311 for (@{$self->{open_elements}}) {
1312 if ($entry->[0] eq $_->[0]) {
1313 !!!cp ('t32');
1314 return;
1315 }
1316 }
1317
1318 S4: {
1319 ## Step 4
1320 last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1321
1322 ## Step 5
1323 $i--;
1324 $entry = $active_formatting_elements->[$i];
1325
1326 ## Step 6
1327 if ($entry->[0] eq '#marker') {
1328 !!!cp ('t33_1');
1329 #
1330 } else {
1331 my $in_open_elements;
1332 OE: for (@{$self->{open_elements}}) {
1333 if ($entry->[0] eq $_->[0]) {
1334 !!!cp ('t33');
1335 $in_open_elements = 1;
1336 last OE;
1337 }
1338 }
1339 if ($in_open_elements) {
1340 !!!cp ('t34');
1341 #
1342 } else {
1343 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
1344 !!!cp ('t35');
1345 redo S4;
1346 }
1347 }
1348
1349 ## Step 7
1350 $i++;
1351 $entry = $active_formatting_elements->[$i];
1352 } # S4
1353
1354 S7: {
1355 ## Step 8
1356 my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1357
1358 ## Step 9
1359 $insert->($clone->[0]);
1360 push @{$self->{open_elements}}, $clone;
1361
1362 ## Step 10
1363 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1364
1365 ## Step 11
1366 unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1367 !!!cp ('t36');
1368 ## Step 7'
1369 $i++;
1370 $entry = $active_formatting_elements->[$i];
1371
1372 redo S7;
1373 }
1374
1375 !!!cp ('t37');
1376 } # S7
1377 }; # $reconstruct_active_formatting_elements
1378
1379 my $clear_up_to_marker = sub {
1380 for (reverse 0..$#$active_formatting_elements) {
1381 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1382 !!!cp ('t38');
1383 splice @$active_formatting_elements, $_;
1384 return;
1385 }
1386 }
1387
1388 !!!cp ('t39');
1389 }; # $clear_up_to_marker
1390
1391 my $insert;
1392
1393 my $parse_rcdata = sub ($) {
1394 my ($content_model_flag) = @_;
1395
1396 ## Step 1
1397 my $start_tag_name = $token->{tag_name};
1398 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
1399
1400 ## Step 2
1401 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
1402 delete $self->{escape}; # MUST
1403
1404 ## Step 3, 4
1405 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1406
1407 !!!nack ('t40.1');
1408 !!!next-token;
1409 }; # $parse_rcdata
1410
1411 my $script_start_tag = sub () {
1412 ## Step 1
1413 my $script_el;
1414 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
1415
1416 ## Step 2
1417 ## TODO: mark as "parser-inserted"
1418
1419 ## Step 3
1420 ## TODO: Mark as "already executed", if ...
1421
1422 ## Step 4 (HTML5 revision 2702)
1423 $insert->($script_el);
1424 push @{$self->{open_elements}}, [$script_el, $el_category->{script}];
1425
1426 ## Step 5
1427 $self->{content_model} = CDATA_CONTENT_MODEL;
1428 delete $self->{escape}; # MUST
1429
1430 ## Step 6-7
1431 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1432
1433 !!!nack ('t40.2');
1434 !!!next-token;
1435 }; # $script_start_tag
1436
1437 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
1438 ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
1439 ## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted.
1440 my $open_tables = [[$self->{open_elements}->[0]->[0]]];
1441
1442 my $formatting_end_tag = sub {
1443 my $end_tag_token = shift;
1444 my $tag_name = $end_tag_token->{tag_name};
1445
1446 ## NOTE: The adoption agency algorithm (AAA).
1447
1448 FET: {
1449 ## Step 1
1450 my $formatting_element;
1451 my $formatting_element_i_in_active;
1452 AFE: for (reverse 0..$#$active_formatting_elements) {
1453 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1454 !!!cp ('t52');
1455 last AFE;
1456 } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
1457 eq $tag_name) {
1458 !!!cp ('t51');
1459 $formatting_element = $active_formatting_elements->[$_];
1460 $formatting_element_i_in_active = $_;
1461 last AFE;
1462 }
1463 } # AFE
1464 unless (defined $formatting_element) {
1465 !!!cp ('t53');
1466 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
1467 ## Ignore the token
1468 !!!next-token;
1469 return;
1470 }
1471 ## has an element in scope
1472 my $in_scope = 1;
1473 my $formatting_element_i_in_open;
1474 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1475 my $node = $self->{open_elements}->[$_];
1476 if ($node->[0] eq $formatting_element->[0]) {
1477 if ($in_scope) {
1478 !!!cp ('t54');
1479 $formatting_element_i_in_open = $_;
1480 last INSCOPE;
1481 } else { # in open elements but not in scope
1482 !!!cp ('t55');
1483 !!!parse-error (type => 'unmatched end tag',
1484 text => $token->{tag_name},
1485 token => $end_tag_token);
1486 ## Ignore the token
1487 !!!next-token;
1488 return;
1489 }
1490 } elsif ($node->[1] & SCOPING_EL) {
1491 !!!cp ('t56');
1492 $in_scope = 0;
1493 }
1494 } # INSCOPE
1495 unless (defined $formatting_element_i_in_open) {
1496 !!!cp ('t57');
1497 !!!parse-error (type => 'unmatched end tag',
1498 text => $token->{tag_name},
1499 token => $end_tag_token);
1500 pop @$active_formatting_elements; # $formatting_element
1501 !!!next-token; ## TODO: ok?
1502 return;
1503 }
1504 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1505 !!!cp ('t58');
1506 !!!parse-error (type => 'not closed',
1507 text => $self->{open_elements}->[-1]->[0]
1508 ->manakai_local_name,
1509 token => $end_tag_token);
1510 }
1511
1512 ## Step 2
1513 my $furthest_block;
1514 my $furthest_block_i_in_open;
1515 OE: for (reverse 0..$#{$self->{open_elements}}) {
1516 my $node = $self->{open_elements}->[$_];
1517 if (not ($node->[1] & FORMATTING_EL) and
1518 #not $phrasing_category->{$node->[1]} and
1519 ($node->[1] & SPECIAL_EL or
1520 $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
1521 !!!cp ('t59');
1522 $furthest_block = $node;
1523 $furthest_block_i_in_open = $_;
1524 ## NOTE: The topmost (eldest) node.
1525 } elsif ($node->[0] eq $formatting_element->[0]) {
1526 !!!cp ('t60');
1527 last OE;
1528 }
1529 } # OE
1530
1531 ## Step 3
1532 unless (defined $furthest_block) { # MUST
1533 !!!cp ('t61');
1534 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
1535 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1536 !!!next-token;
1537 return;
1538 }
1539
1540 ## Step 4
1541 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
1542
1543 ## Step 5
1544 my $furthest_block_parent = $furthest_block->[0]->parent_node;
1545 if (defined $furthest_block_parent) {
1546 !!!cp ('t62');
1547 $furthest_block_parent->remove_child ($furthest_block->[0]);
1548 }
1549
1550 ## Step 6
1551 my $bookmark_prev_el
1552 = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1553 ->[0];
1554
1555 ## Step 7
1556 my $node = $furthest_block;
1557 my $node_i_in_open = $furthest_block_i_in_open;
1558 my $last_node = $furthest_block;
1559 S7: {
1560 ## Step 1
1561 $node_i_in_open--;
1562 $node = $self->{open_elements}->[$node_i_in_open];
1563
1564 ## Step 2
1565 my $node_i_in_active;
1566 S7S2: {
1567 for (reverse 0..$#$active_formatting_elements) {
1568 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1569 !!!cp ('t63');
1570 $node_i_in_active = $_;
1571 last S7S2;
1572 }
1573 }
1574 splice @{$self->{open_elements}}, $node_i_in_open, 1;
1575 redo S7;
1576 } # S7S2
1577
1578 ## Step 3
1579 last S7 if $node->[0] eq $formatting_element->[0];
1580
1581 ## Step 4
1582 if ($last_node->[0] eq $furthest_block->[0]) {
1583 !!!cp ('t64');
1584 $bookmark_prev_el = $node->[0];
1585 }
1586
1587 ## Step 5
1588 if ($node->[0]->has_child_nodes ()) {
1589 !!!cp ('t65');
1590 my $clone = [$node->[0]->clone_node (0), $node->[1]];
1591 $active_formatting_elements->[$node_i_in_active] = $clone;
1592 $self->{open_elements}->[$node_i_in_open] = $clone;
1593 $node = $clone;
1594 }
1595
1596 ## Step 6
1597 $node->[0]->append_child ($last_node->[0]);
1598
1599 ## Step 7
1600 $last_node = $node;
1601
1602 ## Step 8
1603 redo S7;
1604 } # S7
1605
1606 ## Step 8
1607 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
1608 my $foster_parent_element;
1609 my $next_sibling;
1610 OE: for (reverse 0..$#{$self->{open_elements}}) {
1611 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1612 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1613 if (defined $parent and $parent->node_type == 1) {
1614 !!!cp ('t65.1');
1615 $foster_parent_element = $parent;
1616 $next_sibling = $self->{open_elements}->[$_]->[0];
1617 } else {
1618 !!!cp ('t65.2');
1619 $foster_parent_element
1620 = $self->{open_elements}->[$_ - 1]->[0];
1621 }
1622 last OE;
1623 }
1624 } # OE
1625 $foster_parent_element = $self->{open_elements}->[0]->[0]
1626 unless defined $foster_parent_element;
1627 $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
1628 $open_tables->[-1]->[1] = 1; # tainted
1629 } else {
1630 !!!cp ('t65.3');
1631 $common_ancestor_node->[0]->append_child ($last_node->[0]);
1632 }
1633
1634 ## Step 9
1635 my $clone = [$formatting_element->[0]->clone_node (0),
1636 $formatting_element->[1]];
1637
1638 ## Step 10
1639 my @cn = @{$furthest_block->[0]->child_nodes};
1640 $clone->[0]->append_child ($_) for @cn;
1641
1642 ## Step 11
1643 $furthest_block->[0]->append_child ($clone->[0]);
1644
1645 ## Step 12
1646 my $i;
1647 AFE: for (reverse 0..$#$active_formatting_elements) {
1648 if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1649 !!!cp ('t66');
1650 splice @$active_formatting_elements, $_, 1;
1651 $i-- and last AFE if defined $i;
1652 } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1653 !!!cp ('t67');
1654 $i = $_;
1655 }
1656 } # AFE
1657 splice @$active_formatting_elements, $i + 1, 0, $clone;
1658
1659 ## Step 13
1660 undef $i;
1661 OE: for (reverse 0..$#{$self->{open_elements}}) {
1662 if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
1663 !!!cp ('t68');
1664 splice @{$self->{open_elements}}, $_, 1;
1665 $i-- and last OE if defined $i;
1666 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
1667 !!!cp ('t69');
1668 $i = $_;
1669 }
1670 } # OE
1671 splice @{$self->{open_elements}}, $i + 1, 0, $clone;
1672
1673 ## Step 14
1674 redo FET;
1675 } # FET
1676 }; # $formatting_end_tag
1677
1678 $insert = my $insert_to_current = sub {
1679 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
1680 }; # $insert_to_current
1681
1682 my $insert_to_foster = sub {
1683 my $child = shift;
1684 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1685 # MUST
1686 my $foster_parent_element;
1687 my $next_sibling;
1688 OE: for (reverse 0..$#{$self->{open_elements}}) {
1689 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1690 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1691 if (defined $parent and $parent->node_type == 1) {
1692 !!!cp ('t70');
1693 $foster_parent_element = $parent;
1694 $next_sibling = $self->{open_elements}->[$_]->[0];
1695 } else {
1696 !!!cp ('t71');
1697 $foster_parent_element
1698 = $self->{open_elements}->[$_ - 1]->[0];
1699 }
1700 last OE;
1701 }
1702 } # OE
1703 $foster_parent_element = $self->{open_elements}->[0]->[0]
1704 unless defined $foster_parent_element;
1705 $foster_parent_element->insert_before
1706 ($child, $next_sibling);
1707 $open_tables->[-1]->[1] = 1; # tainted
1708 } else {
1709 !!!cp ('t72');
1710 $self->{open_elements}->[-1]->[0]->append_child ($child);
1711 }
1712 }; # $insert_to_foster
1713
1714 ## NOTE: Insert a character (MUST): When a character is inserted, if
1715 ## the last node that was inserted by the parser is a Text node and
1716 ## the character has to be inserted after that node, then the
1717 ## character is appended to the Text node. However, if any other
1718 ## node is inserted by the parser, then a new Text node is created
1719 ## and the character is appended as that Text node. If I'm not
1720 ## wrong, for a parser with scripting disabled, there are only two
1721 ## cases where this occurs. One is the case where an element node
1722 ## is inserted to the |head| element. This is covered by using the
1723 ## |$self->{head_element_inserted}| flag. Another is the case where
1724 ## an element or comment is inserted into the |table| subtree while
1725 ## foster parenting happens. This is covered by using the [2] flag
1726 ## of the |$open_tables| structure. All other cases are handled
1727 ## simply by calling |manakai_append_text| method.
1728
1729 ## TODO: |<body><script>document.write("a<br>");
1730 ## document.body.removeChild (document.body.lastChild);
1731 ## document.write ("b")</script>|
1732
1733 B: while (1) {
1734 if ($token->{type} == DOCTYPE_TOKEN) {
1735 !!!cp ('t73');
1736 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1737 ## Ignore the token
1738 ## Stay in the phase
1739 !!!next-token;
1740 next B;
1741 } elsif ($token->{type} == START_TAG_TOKEN and
1742 $token->{tag_name} eq 'html') {
1743 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
1744 !!!cp ('t79');
1745 !!!parse-error (type => 'after html', text => 'html', token => $token);
1746 $self->{insertion_mode} = AFTER_BODY_IM;
1747 } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
1748 !!!cp ('t80');
1749 !!!parse-error (type => 'after html', text => 'html', token => $token);
1750 $self->{insertion_mode} = AFTER_FRAMESET_IM;
1751 } else {
1752 !!!cp ('t81');
1753 }
1754
1755 !!!cp ('t82');
1756 !!!parse-error (type => 'not first start tag', token => $token);
1757 my $top_el = $self->{open_elements}->[0]->[0];
1758 for my $attr_name (keys %{$token->{attributes}}) {
1759 unless ($top_el->has_attribute_ns (undef, $attr_name)) {
1760 !!!cp ('t84');
1761 $top_el->set_attribute_ns
1762 (undef, [undef, $attr_name],
1763 $token->{attributes}->{$attr_name}->{value});
1764 }
1765 }
1766 !!!nack ('t84.1');
1767 !!!next-token;
1768 next B;
1769 } elsif ($token->{type} == COMMENT_TOKEN) {
1770 my $comment = $self->{document}->create_comment ($token->{data});
1771 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
1772 !!!cp ('t85');
1773 $self->{document}->append_child ($comment);
1774 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
1775 !!!cp ('t86');
1776 $self->{open_elements}->[0]->[0]->append_child ($comment);
1777 } else {
1778 !!!cp ('t87');
1779 $self->{open_elements}->[-1]->[0]->append_child ($comment);
1780 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
1781 }
1782 !!!next-token;
1783 next B;
1784 } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1785 if ($token->{type} == CHARACTER_TOKEN) {
1786 $token->{data} =~ s/^\x0A// if $self->{ignore_newline};
1787 delete $self->{ignore_newline};
1788
1789 if (length $token->{data}) {
1790 !!!cp ('t43');
1791 $self->{open_elements}->[-1]->[0]->manakai_append_text
1792 ($token->{data});
1793 } else {
1794 !!!cp ('t43.1');
1795 }
1796 !!!next-token;
1797 next B;
1798 } elsif ($token->{type} == END_TAG_TOKEN) {
1799 delete $self->{ignore_newline};
1800
1801 if ($token->{tag_name} eq 'script') {
1802 !!!cp ('t50');
1803
1804 ## Para 1-2
1805 my $script = pop @{$self->{open_elements}};
1806
1807 ## Para 3
1808 $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1809
1810 ## Para 4
1811 ## TODO: $old_insertion_point = $current_insertion_point;
1812 ## TODO: $current_insertion_point = just before $self->{nc};
1813
1814 ## Para 5
1815 ## TODO: Run the $script->[0].
1816
1817 ## Para 6
1818 ## TODO: $current_insertion_point = $old_insertion_point;
1819
1820 ## Para 7
1821 ## TODO: if ($pending_external_script) {
1822 ## TODO: ...
1823 ## TODO: }
1824
1825 !!!next-token;
1826 next B;
1827 } else {
1828 !!!cp ('t42');
1829
1830 pop @{$self->{open_elements}};
1831
1832 $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1833 !!!next-token;
1834 next B;
1835 }
1836 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1837 delete $self->{ignore_newline};
1838
1839 !!!cp ('t44');
1840 !!!parse-error (type => 'not closed',
1841 text => $self->{open_elements}->[-1]->[0]
1842 ->manakai_local_name,
1843 token => $token);
1844
1845 #if ($self->{open_elements}->[-1]->[1] == SCRIPT_EL) {
1846 # ## TODO: Mark as "already executed"
1847 #}
1848
1849 pop @{$self->{open_elements}};
1850
1851 $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1852 ## Reprocess.
1853 next B;
1854 } else {
1855 die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type";
1856 }
1857 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
1858 if ($token->{type} == CHARACTER_TOKEN) {
1859 !!!cp ('t87.1');
1860 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
1861 !!!next-token;
1862 next B;
1863 } elsif ($token->{type} == START_TAG_TOKEN) {
1864 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
1865 $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
1866 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1867 ($token->{tag_name} eq 'svg' and
1868 $self->{open_elements}->[-1]->[1] == MML_AXML_EL)) {
1869 ## NOTE: "using the rules for secondary insertion mode"then"continue"
1870 !!!cp ('t87.2');
1871 #
1872 } elsif ({
1873 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
1874 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
1875 em => 1, embed => 1, h1 => 1, h2 => 1, h3 => 1,
1876 h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
1877 img => 1, li => 1, listing => 1, menu => 1, meta => 1,
1878 nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
1879 small => 1, span => 1, strong => 1, strike => 1, sub => 1,
1880 sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
1881 }->{$token->{tag_name}} or
1882 ($token->{tag_name} eq 'font' and
1883 ($token->{attributes}->{color} or
1884 $token->{attributes}->{face} or
1885 $token->{attributes}->{size}))) {
1886 !!!cp ('t87.2');
1887 !!!parse-error (type => 'not closed',
1888 text => $self->{open_elements}->[-1]->[0]
1889 ->manakai_local_name,
1890 token => $token);
1891
1892 pop @{$self->{open_elements}}
1893 while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1894
1895 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1896 ## Reprocess.
1897 next B;
1898 } else {
1899 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
1900 my $tag_name = $token->{tag_name};
1901 if ($nsuri eq $SVG_NS) {
1902 $tag_name = {
1903 altglyph => 'altGlyph',
1904 altglyphdef => 'altGlyphDef',
1905 altglyphitem => 'altGlyphItem',
1906 animatecolor => 'animateColor',
1907 animatemotion => 'animateMotion',
1908 animatetransform => 'animateTransform',
1909 clippath => 'clipPath',
1910 feblend => 'feBlend',
1911 fecolormatrix => 'feColorMatrix',
1912 fecomponenttransfer => 'feComponentTransfer',
1913 fecomposite => 'feComposite',
1914 feconvolvematrix => 'feConvolveMatrix',
1915 fediffuselighting => 'feDiffuseLighting',
1916 fedisplacementmap => 'feDisplacementMap',
1917 fedistantlight => 'feDistantLight',
1918 feflood => 'feFlood',
1919 fefunca => 'feFuncA',
1920 fefuncb => 'feFuncB',
1921 fefuncg => 'feFuncG',
1922 fefuncr => 'feFuncR',
1923 fegaussianblur => 'feGaussianBlur',
1924 feimage => 'feImage',
1925 femerge => 'feMerge',
1926 femergenode => 'feMergeNode',
1927 femorphology => 'feMorphology',
1928 feoffset => 'feOffset',
1929 fepointlight => 'fePointLight',
1930 fespecularlighting => 'feSpecularLighting',
1931 fespotlight => 'feSpotLight',
1932 fetile => 'feTile',
1933 feturbulence => 'feTurbulence',
1934 foreignobject => 'foreignObject',
1935 glyphref => 'glyphRef',
1936 lineargradient => 'linearGradient',
1937 radialgradient => 'radialGradient',
1938 #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
1939 textpath => 'textPath',
1940 }->{$tag_name} || $tag_name;
1941 }
1942
1943 ## "adjust SVG attributes" (SVG only) - done in insert-element-f
1944
1945 ## "adjust foreign attributes" - done in insert-element-f
1946
1947 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
1948
1949 if ($self->{self_closing}) {
1950 pop @{$self->{open_elements}};
1951 !!!ack ('t87.3');
1952 } else {
1953 !!!cp ('t87.4');
1954 }
1955
1956 !!!next-token;
1957 next B;
1958 }
1959 } elsif ($token->{type} == END_TAG_TOKEN) {
1960 ## NOTE: "using the rules for secondary insertion mode" then "continue"
1961 if ($token->{tag_name} eq 'script') {
1962 !!!cp ('t87.41');
1963 #
1964 ## XXXscript: Execute script here.
1965 } else {
1966 !!!cp ('t87.5');
1967 #
1968 }
1969 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1970 !!!cp ('t87.6');
1971 !!!parse-error (type => 'not closed',
1972 text => $self->{open_elements}->[-1]->[0]
1973 ->manakai_local_name,
1974 token => $token);
1975
1976 pop @{$self->{open_elements}}
1977 while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1978
1979 ## NOTE: |<span><svg>| ... two parse errors, |<svg>| ... a parse error.
1980
1981 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1982 ## Reprocess.
1983 next B;
1984 } else {
1985 die "$0: $token->{type}: Unknown token type";
1986 }
1987 }
1988
1989 if ($self->{insertion_mode} & HEAD_IMS) {
1990 if ($token->{type} == CHARACTER_TOKEN) {
1991 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1992 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
1993 if ($self->{head_element_inserted}) {
1994 !!!cp ('t88.3');
1995 $self->{open_elements}->[-1]->[0]->append_child
1996 ($self->{document}->create_text_node ($1));
1997 delete $self->{head_element_inserted};
1998 ## NOTE: |</head> <link> |
1999 #
2000 } else {
2001 !!!cp ('t88.2');
2002 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2003 ## NOTE: |</head> &#x20;|
2004 #
2005 }
2006 } else {
2007 !!!cp ('t88.1');
2008 ## Ignore the token.
2009 #
2010 }
2011 unless (length $token->{data}) {
2012 !!!cp ('t88');
2013 !!!next-token;
2014 next B;
2015 }
2016 ## TODO: set $token->{column} appropriately
2017 }
2018
2019 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2020 !!!cp ('t89');
2021 ## As if <head>
2022 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2023 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2024 push @{$self->{open_elements}},
2025 [$self->{head_element}, $el_category->{head}];
2026
2027 ## Reprocess in the "in head" insertion mode...
2028 pop @{$self->{open_elements}};
2029
2030 ## Reprocess in the "after head" insertion mode...
2031 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2032 !!!cp ('t90');
2033 ## As if </noscript>
2034 pop @{$self->{open_elements}};
2035 !!!parse-error (type => 'in noscript:#text', token => $token);
2036
2037 ## Reprocess in the "in head" insertion mode...
2038 ## As if </head>
2039 pop @{$self->{open_elements}};
2040
2041 ## Reprocess in the "after head" insertion mode...
2042 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2043 !!!cp ('t91');
2044 pop @{$self->{open_elements}};
2045
2046 ## Reprocess in the "after head" insertion mode...
2047 } else {
2048 !!!cp ('t92');
2049 }
2050
2051 ## "after head" insertion mode
2052 ## As if <body>
2053 !!!insert-element ('body',, $token);
2054 $self->{insertion_mode} = IN_BODY_IM;
2055 ## reprocess
2056 next B;
2057 } elsif ($token->{type} == START_TAG_TOKEN) {
2058 if ($token->{tag_name} eq 'head') {
2059 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2060 !!!cp ('t93');
2061 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
2062 $self->{open_elements}->[-1]->[0]->append_child
2063 ($self->{head_element});
2064 push @{$self->{open_elements}},
2065 [$self->{head_element}, $el_category->{head}];
2066 $self->{insertion_mode} = IN_HEAD_IM;
2067 !!!nack ('t93.1');
2068 !!!next-token;
2069 next B;
2070 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2071 !!!cp ('t93.2');
2072 !!!parse-error (type => 'after head', text => 'head',
2073 token => $token);
2074 ## Ignore the token
2075 !!!nack ('t93.3');
2076 !!!next-token;
2077 next B;
2078 } else {
2079 !!!cp ('t95');
2080 !!!parse-error (type => 'in head:head',
2081 token => $token); # or in head noscript
2082 ## Ignore the token
2083 !!!nack ('t95.1');
2084 !!!next-token;
2085 next B;
2086 }
2087 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2088 !!!cp ('t96');
2089 ## As if <head>
2090 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2091 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2092 push @{$self->{open_elements}},
2093 [$self->{head_element}, $el_category->{head}];
2094
2095 $self->{insertion_mode} = IN_HEAD_IM;
2096 ## Reprocess in the "in head" insertion mode...
2097 } else {
2098 !!!cp ('t97');
2099 }
2100
2101 if ($token->{tag_name} eq 'base') {
2102 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2103 !!!cp ('t98');
2104 ## As if </noscript>
2105 pop @{$self->{open_elements}};
2106 !!!parse-error (type => 'in noscript', text => 'base',
2107 token => $token);
2108
2109 $self->{insertion_mode} = IN_HEAD_IM;
2110 ## Reprocess in the "in head" insertion mode...
2111 } else {
2112 !!!cp ('t99');
2113 }
2114
2115 ## NOTE: There is a "as if in head" code clone.
2116 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2117 !!!cp ('t100');
2118 !!!parse-error (type => 'after head',
2119 text => $token->{tag_name}, token => $token);
2120 push @{$self->{open_elements}},
2121 [$self->{head_element}, $el_category->{head}];
2122 $self->{head_element_inserted} = 1;
2123 } else {
2124 !!!cp ('t101');
2125 }
2126 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2127 pop @{$self->{open_elements}};
2128 pop @{$self->{open_elements}} # <head>
2129 if $self->{insertion_mode} == AFTER_HEAD_IM;
2130 !!!nack ('t101.1');
2131 !!!next-token;
2132 next B;
2133 } elsif ($token->{tag_name} eq 'link') {
2134 ## NOTE: There is a "as if in head" code clone.
2135 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2136 !!!cp ('t102');
2137 !!!parse-error (type => 'after head',
2138 text => $token->{tag_name}, token => $token);
2139 push @{$self->{open_elements}},
2140 [$self->{head_element}, $el_category->{head}];
2141 $self->{head_element_inserted} = 1;
2142 } else {
2143 !!!cp ('t103');
2144 }
2145 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2146 pop @{$self->{open_elements}};
2147 pop @{$self->{open_elements}} # <head>
2148 if $self->{insertion_mode} == AFTER_HEAD_IM;
2149 !!!ack ('t103.1');
2150 !!!next-token;
2151 next B;
2152 } elsif ($token->{tag_name} eq 'command' or
2153 $token->{tag_name} eq 'eventsource') {
2154 if ($self->{insertion_mode} == IN_HEAD_IM) {
2155 ## NOTE: If the insertion mode at the time of the emission
2156 ## of the token was "before head", $self->{insertion_mode}
2157 ## is already changed to |IN_HEAD_IM|.
2158
2159 ## NOTE: There is a "as if in head" code clone.
2160 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2161 pop @{$self->{open_elements}};
2162 pop @{$self->{open_elements}} # <head>
2163 if $self->{insertion_mode} == AFTER_HEAD_IM;
2164 !!!ack ('t103.2');
2165 !!!next-token;
2166 next B;
2167 } else {
2168 ## NOTE: "in head noscript" or "after head" insertion mode
2169 ## - in these cases, these tags are treated as same as
2170 ## normal in-body tags.
2171 !!!cp ('t103.3');
2172 #
2173 }
2174 } elsif ($token->{tag_name} eq 'meta') {
2175 ## NOTE: There is a "as if in head" code clone.
2176 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2177 !!!cp ('t104');
2178 !!!parse-error (type => 'after head',
2179 text => $token->{tag_name}, token => $token);
2180 push @{$self->{open_elements}},
2181 [$self->{head_element}, $el_category->{head}];
2182 $self->{head_element_inserted} = 1;
2183 } else {
2184 !!!cp ('t105');
2185 }
2186 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2187 my $meta_el = pop @{$self->{open_elements}};
2188
2189 unless ($self->{confident}) {
2190 if ($token->{attributes}->{charset}) {
2191 !!!cp ('t106');
2192 ## NOTE: Whether the encoding is supported or not is handled
2193 ## in the {change_encoding} callback.
2194 $self->{change_encoding}
2195 ->($self, $token->{attributes}->{charset}->{value},
2196 $token);
2197
2198 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2199 ->set_user_data (manakai_has_reference =>
2200 $token->{attributes}->{charset}
2201 ->{has_reference});
2202 } elsif ($token->{attributes}->{content}) {
2203 if ($token->{attributes}->{content}->{value}
2204 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2205 [\x09\x0A\x0C\x0D\x20]*=
2206 [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
2207 ([^"'\x09\x0A\x0C\x0D\x20]
2208 [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
2209 !!!cp ('t107');
2210 ## NOTE: Whether the encoding is supported or not is handled
2211 ## in the {change_encoding} callback.
2212 $self->{change_encoding}
2213 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
2214 $token);
2215 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2216 ->set_user_data (manakai_has_reference =>
2217 $token->{attributes}->{content}
2218 ->{has_reference});
2219 } else {
2220 !!!cp ('t108');
2221 }
2222 }
2223 } else {
2224 if ($token->{attributes}->{charset}) {
2225 !!!cp ('t109');
2226 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2227 ->set_user_data (manakai_has_reference =>
2228 $token->{attributes}->{charset}
2229 ->{has_reference});
2230 }
2231 if ($token->{attributes}->{content}) {
2232 !!!cp ('t110');
2233 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2234 ->set_user_data (manakai_has_reference =>
2235 $token->{attributes}->{content}
2236 ->{has_reference});
2237 }
2238 }
2239
2240 pop @{$self->{open_elements}} # <head>
2241 if $self->{insertion_mode} == AFTER_HEAD_IM;
2242 !!!ack ('t110.1');
2243 !!!next-token;
2244 next B;
2245 } elsif ($token->{tag_name} eq 'title') {
2246 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2247 !!!cp ('t111');
2248 ## As if </noscript>
2249 pop @{$self->{open_elements}};
2250 !!!parse-error (type => 'in noscript', text => 'title',
2251 token => $token);
2252
2253 $self->{insertion_mode} = IN_HEAD_IM;
2254 ## Reprocess in the "in head" insertion mode...
2255 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2256 !!!cp ('t112');
2257 !!!parse-error (type => 'after head',
2258 text => $token->{tag_name}, token => $token);
2259 push @{$self->{open_elements}},
2260 [$self->{head_element}, $el_category->{head}];
2261 $self->{head_element_inserted} = 1;
2262 } else {
2263 !!!cp ('t113');
2264 }
2265
2266 ## NOTE: There is a "as if in head" code clone.
2267 $parse_rcdata->(RCDATA_CONTENT_MODEL);
2268 ## ISSUE: A spec bug [Bug 6038]
2269 splice @{$self->{open_elements}}, -2, 1, () # <head>
2270 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2271 next B;
2272 } elsif ($token->{tag_name} eq 'style' or
2273 $token->{tag_name} eq 'noframes') {
2274 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
2275 ## insertion mode IN_HEAD_IM)
2276 ## NOTE: There is a "as if in head" code clone.
2277 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2278 !!!cp ('t114');
2279 !!!parse-error (type => 'after head',
2280 text => $token->{tag_name}, token => $token);
2281 push @{$self->{open_elements}},
2282 [$self->{head_element}, $el_category->{head}];
2283 $self->{head_element_inserted} = 1;
2284 } else {
2285 !!!cp ('t115');
2286 }
2287 $parse_rcdata->(CDATA_CONTENT_MODEL);
2288 ## ISSUE: A spec bug [Bug 6038]
2289 splice @{$self->{open_elements}}, -2, 1, () # <head>
2290 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2291 next B;
2292 } elsif ($token->{tag_name} eq 'noscript') {
2293 if ($self->{insertion_mode} == IN_HEAD_IM) {
2294 !!!cp ('t116');
2295 ## NOTE: and scripting is disalbed
2296 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2297 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
2298 !!!nack ('t116.1');
2299 !!!next-token;
2300 next B;
2301 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2302 !!!cp ('t117');
2303 !!!parse-error (type => 'in noscript', text => 'noscript',
2304 token => $token);
2305 ## Ignore the token
2306 !!!nack ('t117.1');
2307 !!!next-token;
2308 next B;
2309 } else {
2310 !!!cp ('t118');
2311 #
2312 }
2313 } elsif ($token->{tag_name} eq 'script') {
2314 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2315 !!!cp ('t119');
2316 ## As if </noscript>
2317 pop @{$self->{open_elements}};
2318 !!!parse-error (type => 'in noscript', text => 'script',
2319 token => $token);
2320
2321 $self->{insertion_mode} = IN_HEAD_IM;
2322 ## Reprocess in the "in head" insertion mode...
2323 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2324 !!!cp ('t120');
2325 !!!parse-error (type => 'after head',
2326 text => $token->{tag_name}, token => $token);
2327 push @{$self->{open_elements}},
2328 [$self->{head_element}, $el_category->{head}];
2329 $self->{head_element_inserted} = 1;
2330 } else {
2331 !!!cp ('t121');
2332 }
2333
2334 ## NOTE: There is a "as if in head" code clone.
2335 $script_start_tag->();
2336 ## ISSUE: A spec bug [Bug 6038]
2337 splice @{$self->{open_elements}}, -2, 1 # <head>
2338 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2339 next B;
2340 } elsif ($token->{tag_name} eq 'body' or
2341 $token->{tag_name} eq 'frameset') {
2342 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2343 !!!cp ('t122');
2344 ## As if </noscript>
2345 pop @{$self->{open_elements}};
2346 !!!parse-error (type => 'in noscript',
2347 text => $token->{tag_name}, token => $token);
2348
2349 ## Reprocess in the "in head" insertion mode...
2350 ## As if </head>
2351 pop @{$self->{open_elements}};
2352
2353 ## Reprocess in the "after head" insertion mode...
2354 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2355 !!!cp ('t124');
2356 pop @{$self->{open_elements}};
2357
2358 ## Reprocess in the "after head" insertion mode...
2359 } else {
2360 !!!cp ('t125');
2361 }
2362
2363 ## "after head" insertion mode
2364 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2365 if ($token->{tag_name} eq 'body') {
2366 !!!cp ('t126');
2367 $self->{insertion_mode} = IN_BODY_IM;
2368 } elsif ($token->{tag_name} eq 'frameset') {
2369 !!!cp ('t127');
2370 $self->{insertion_mode} = IN_FRAMESET_IM;
2371 } else {
2372 die "$0: tag name: $self->{tag_name}";
2373 }
2374 !!!nack ('t127.1');
2375 !!!next-token;
2376 next B;
2377 } else {
2378 !!!cp ('t128');
2379 #
2380 }
2381
2382 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2383 !!!cp ('t129');
2384 ## As if </noscript>
2385 pop @{$self->{open_elements}};
2386 !!!parse-error (type => 'in noscript:/',
2387 text => $token->{tag_name}, token => $token);
2388
2389 ## Reprocess in the "in head" insertion mode...
2390 ## As if </head>
2391 pop @{$self->{open_elements}};
2392
2393 ## Reprocess in the "after head" insertion mode...
2394 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2395 !!!cp ('t130');
2396 ## As if </head>
2397 pop @{$self->{open_elements}};
2398
2399 ## Reprocess in the "after head" insertion mode...
2400 } else {
2401 !!!cp ('t131');
2402 }
2403
2404 ## "after head" insertion mode
2405 ## As if <body>
2406 !!!insert-element ('body',, $token);
2407 $self->{insertion_mode} = IN_BODY_IM;
2408 ## reprocess
2409 !!!ack-later;
2410 next B;
2411 } elsif ($token->{type} == END_TAG_TOKEN) {
2412 if ($token->{tag_name} eq 'head') {
2413 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2414 !!!cp ('t132');
2415 ## As if <head>
2416 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2417 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2418 push @{$self->{open_elements}},
2419 [$self->{head_element}, $el_category->{head}];
2420
2421 ## Reprocess in the "in head" insertion mode...
2422 pop @{$self->{open_elements}};
2423 $self->{insertion_mode} = AFTER_HEAD_IM;
2424 !!!next-token;
2425 next B;
2426 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2427 !!!cp ('t133');
2428 ## As if </noscript>
2429 pop @{$self->{open_elements}};
2430 !!!parse-error (type => 'in noscript:/',
2431 text => 'head', token => $token);
2432
2433 ## Reprocess in the "in head" insertion mode...
2434 pop @{$self->{open_elements}};
2435 $self->{insertion_mode} = AFTER_HEAD_IM;
2436 !!!next-token;
2437 next B;
2438 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2439 !!!cp ('t134');
2440 pop @{$self->{open_elements}};
2441 $self->{insertion_mode} = AFTER_HEAD_IM;
2442 !!!next-token;
2443 next B;
2444 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2445 !!!cp ('t134.1');
2446 !!!parse-error (type => 'unmatched end tag', text => 'head',
2447 token => $token);
2448 ## Ignore the token
2449 !!!next-token;
2450 next B;
2451 } else {
2452 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2453 }
2454 } elsif ($token->{tag_name} eq 'noscript') {
2455 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2456 !!!cp ('t136');
2457 pop @{$self->{open_elements}};
2458 $self->{insertion_mode} = IN_HEAD_IM;
2459 !!!next-token;
2460 next B;
2461 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
2462 $self->{insertion_mode} == AFTER_HEAD_IM) {
2463 !!!cp ('t137');
2464 !!!parse-error (type => 'unmatched end tag',
2465 text => 'noscript', token => $token);
2466 ## Ignore the token ## ISSUE: An issue in the spec.
2467 !!!next-token;
2468 next B;
2469 } else {
2470 !!!cp ('t138');
2471 #
2472 }
2473 } elsif ({
2474 body => 1, html => 1,
2475 }->{$token->{tag_name}}) {
2476 ## TODO: This branch is entirely redundant.
2477 if ($self->{insertion_mode} == BEFORE_HEAD_IM or
2478 $self->{insertion_mode} == IN_HEAD_IM or
2479 $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2480 !!!cp ('t140');
2481 !!!parse-error (type => 'unmatched end tag',
2482 text => $token->{tag_name}, token => $token);
2483 ## Ignore the token
2484 !!!next-token;
2485 next B;
2486 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2487 !!!cp ('t140.1');
2488 !!!parse-error (type => 'unmatched end tag',
2489 text => $token->{tag_name}, token => $token);
2490 ## Ignore the token
2491 !!!next-token;
2492 next B;
2493 } else {
2494 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2495 }
2496 } elsif ($token->{tag_name} eq 'p') {
2497 !!!cp ('t142');
2498 !!!parse-error (type => 'unmatched end tag',
2499 text => $token->{tag_name}, token => $token);
2500 ## Ignore the token
2501 !!!next-token;
2502 next B;
2503 } elsif ($token->{tag_name} eq 'br') {
2504 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2505 !!!cp ('t142.2');
2506 ## (before head) as if <head>, (in head) as if </head>
2507 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2508 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2509 $self->{insertion_mode} = AFTER_HEAD_IM;
2510
2511 ## Reprocess in the "after head" insertion mode...
2512 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2513 !!!cp ('t143.2');
2514 ## As if </head>
2515 pop @{$self->{open_elements}};
2516 $self->{insertion_mode} = AFTER_HEAD_IM;
2517
2518 ## Reprocess in the "after head" insertion mode...
2519 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2520 !!!cp ('t143.3');
2521 ## NOTE: Two parse errors for <head><noscript></br>
2522 !!!parse-error (type => 'unmatched end tag',
2523 text => 'br', token => $token);
2524 ## As if </noscript>
2525 pop @{$self->{open_elements}};
2526 $self->{insertion_mode} = IN_HEAD_IM;
2527
2528 ## Reprocess in the "in head" insertion mode...
2529 ## As if </head>
2530 pop @{$self->{open_elements}};
2531 $self->{insertion_mode} = AFTER_HEAD_IM;
2532
2533 ## Reprocess in the "after head" insertion mode...
2534 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2535 !!!cp ('t143.4');
2536 #
2537 } else {
2538 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2539 }
2540
2541 #
2542 } else { ## Other end tags
2543 !!!cp ('t145');
2544 !!!parse-error (type => 'unmatched end tag',
2545 text => $token->{tag_name}, token => $token);
2546 ## Ignore the token
2547 !!!next-token;
2548 next B;
2549 }
2550
2551 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2552 !!!cp ('t146');
2553 ## As if </noscript>
2554 pop @{$self->{open_elements}};
2555 !!!parse-error (type => 'in noscript:/',
2556 text => $token->{tag_name}, token => $token);
2557
2558 ## Reprocess in the "in head" insertion mode...
2559 ## As if </head>
2560 pop @{$self->{open_elements}};
2561
2562 ## Reprocess in the "after head" insertion mode...
2563 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2564 !!!cp ('t147');
2565 ## As if </head>
2566 pop @{$self->{open_elements}};
2567
2568 ## Reprocess in the "after head" insertion mode...
2569 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2570 ## ISSUE: This case cannot be reached?
2571 !!!cp ('t148');
2572 !!!parse-error (type => 'unmatched end tag',
2573 text => $token->{tag_name}, token => $token);
2574 ## Ignore the token ## ISSUE: An issue in the spec.
2575 !!!next-token;
2576 next B;
2577 } else {
2578 !!!cp ('t149');
2579 }
2580
2581 ## "after head" insertion mode
2582 ## As if <body>
2583 !!!insert-element ('body',, $token);
2584 $self->{insertion_mode} = IN_BODY_IM;
2585 ## reprocess
2586 next B;
2587 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2588 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2589 !!!cp ('t149.1');
2590
2591 ## NOTE: As if <head>
2592 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2593 $self->{open_elements}->[-1]->[0]->append_child
2594 ($self->{head_element});
2595 #push @{$self->{open_elements}},
2596 # [$self->{head_element}, $el_category->{head}];
2597 #$self->{insertion_mode} = IN_HEAD_IM;
2598 ## NOTE: Reprocess.
2599
2600 ## NOTE: As if </head>
2601 #pop @{$self->{open_elements}};
2602 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2603 ## NOTE: Reprocess.
2604
2605 #
2606 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2607 !!!cp ('t149.2');
2608
2609 ## NOTE: As if </head>
2610 pop @{$self->{open_elements}};
2611 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2612 ## NOTE: Reprocess.
2613
2614 #
2615 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2616 !!!cp ('t149.3');
2617
2618 !!!parse-error (type => 'in noscript:#eof', token => $token);
2619
2620 ## As if </noscript>
2621 pop @{$self->{open_elements}};
2622 #$self->{insertion_mode} = IN_HEAD_IM;
2623 ## NOTE: Reprocess.
2624
2625 ## NOTE: As if </head>
2626 pop @{$self->{open_elements}};
2627 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2628 ## NOTE: Reprocess.
2629
2630 #
2631 } else {
2632 !!!cp ('t149.4');
2633 #
2634 }
2635
2636 ## NOTE: As if <body>
2637 !!!insert-element ('body',, $token);
2638 $self->{insertion_mode} = IN_BODY_IM;
2639 ## NOTE: Reprocess.
2640 next B;
2641 } else {
2642 die "$0: $token->{type}: Unknown token type";
2643 }
2644 } elsif ($self->{insertion_mode} & BODY_IMS) {
2645 if ($token->{type} == CHARACTER_TOKEN) {
2646 !!!cp ('t150');
2647 ## NOTE: There is a code clone of "character in body".
2648 $reconstruct_active_formatting_elements->($insert_to_current);
2649
2650 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
2651
2652 !!!next-token;
2653 next B;
2654 } elsif ($token->{type} == START_TAG_TOKEN) {
2655 if ({
2656 caption => 1, col => 1, colgroup => 1, tbody => 1,
2657 td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2658 }->{$token->{tag_name}}) {
2659 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2660 ## have an element in table scope
2661 for (reverse 0..$#{$self->{open_elements}}) {
2662 my $node = $self->{open_elements}->[$_];
2663 if ($node->[1] == TABLE_CELL_EL) {
2664 !!!cp ('t151');
2665
2666 ## Close the cell
2667 !!!back-token; # <x>
2668 $token = {type => END_TAG_TOKEN,
2669 tag_name => $node->[0]->manakai_local_name,
2670 line => $token->{line},
2671 column => $token->{column}};
2672 next B;
2673 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2674 !!!cp ('t152');
2675 ## ISSUE: This case can never be reached, maybe.
2676 last;
2677 }
2678 }
2679
2680 !!!cp ('t153');
2681 !!!parse-error (type => 'start tag not allowed',
2682 text => $token->{tag_name}, token => $token);
2683 ## Ignore the token
2684 !!!nack ('t153.1');
2685 !!!next-token;
2686 next B;
2687 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2688 !!!parse-error (type => 'not closed', text => 'caption',
2689 token => $token);
2690
2691 ## NOTE: As if </caption>.
2692 ## have a table element in table scope
2693 my $i;
2694 INSCOPE: {
2695 for (reverse 0..$#{$self->{open_elements}}) {
2696 my $node = $self->{open_elements}->[$_];
2697 if ($node->[1] == CAPTION_EL) {
2698 !!!cp ('t155');
2699 $i = $_;
2700 last INSCOPE;
2701 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2702 !!!cp ('t156');
2703 last;
2704 }
2705 }
2706
2707 !!!cp ('t157');
2708 !!!parse-error (type => 'start tag not allowed',
2709 text => $token->{tag_name}, token => $token);
2710 ## Ignore the token
2711 !!!nack ('t157.1');
2712 !!!next-token;
2713 next B;
2714 } # INSCOPE
2715
2716 ## generate implied end tags
2717 while ($self->{open_elements}->[-1]->[1]
2718 & END_TAG_OPTIONAL_EL) {
2719 !!!cp ('t158');
2720 pop @{$self->{open_elements}};
2721 }
2722
2723 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2724 !!!cp ('t159');
2725 !!!parse-error (type => 'not closed',
2726 text => $self->{open_elements}->[-1]->[0]
2727 ->manakai_local_name,
2728 token => $token);
2729 } else {
2730 !!!cp ('t160');
2731 }
2732
2733 splice @{$self->{open_elements}}, $i;
2734
2735 $clear_up_to_marker->();
2736
2737 $self->{insertion_mode} = IN_TABLE_IM;
2738
2739 ## reprocess
2740 !!!ack-later;
2741 next B;
2742 } else {
2743 !!!cp ('t161');
2744 #
2745 }
2746 } else {
2747 !!!cp ('t162');
2748 #
2749 }
2750 } elsif ($token->{type} == END_TAG_TOKEN) {
2751 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
2752 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2753 ## have an element in table scope
2754 my $i;
2755 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2756 my $node = $self->{open_elements}->[$_];
2757 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2758 !!!cp ('t163');
2759 $i = $_;
2760 last INSCOPE;
2761 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2762 !!!cp ('t164');
2763 last INSCOPE;
2764 }
2765 } # INSCOPE
2766 unless (defined $i) {
2767 !!!cp ('t165');
2768 !!!parse-error (type => 'unmatched end tag',
2769 text => $token->{tag_name},
2770 token => $token);
2771 ## Ignore the token
2772 !!!next-token;
2773 next B;
2774 }
2775
2776 ## generate implied end tags
2777 while ($self->{open_elements}->[-1]->[1]
2778 & END_TAG_OPTIONAL_EL) {
2779 !!!cp ('t166');
2780 pop @{$self->{open_elements}};
2781 }
2782
2783 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
2784 ne $token->{tag_name}) {
2785 !!!cp ('t167');
2786 !!!parse-error (type => 'not closed',
2787 text => $self->{open_elements}->[-1]->[0]
2788 ->manakai_local_name,
2789 token => $token);
2790 } else {
2791 !!!cp ('t168');
2792 }
2793
2794 splice @{$self->{open_elements}}, $i;
2795
2796 $clear_up_to_marker->();
2797
2798 $self->{insertion_mode} = IN_ROW_IM;
2799
2800 !!!next-token;
2801 next B;
2802 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2803 !!!cp ('t169');
2804 !!!parse-error (type => 'unmatched end tag',
2805 text => $token->{tag_name}, token => $token);
2806 ## Ignore the token
2807 !!!next-token;
2808 next B;
2809 } else {
2810 !!!cp ('t170');
2811 #
2812 }
2813 } elsif ($token->{tag_name} eq 'caption') {
2814 if (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2815 ## have a table element in table scope
2816 my $i;
2817 INSCOPE: {
2818 for (reverse 0..$#{$self->{open_elements}}) {
2819 my $node = $self->{open_elements}->[$_];
2820 if ($node->[1] == CAPTION_EL) {
2821 !!!cp ('t171');
2822 $i = $_;
2823 last INSCOPE;
2824 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2825 !!!cp ('t172');
2826 last;
2827 }
2828 }
2829
2830 !!!cp ('t173');
2831 !!!parse-error (type => 'unmatched end tag',
2832 text => $token->{tag_name}, token => $token);
2833 ## Ignore the token
2834 !!!next-token;
2835 next B;
2836 } # INSCOPE
2837
2838 ## generate implied end tags
2839 while ($self->{open_elements}->[-1]->[1]
2840 & END_TAG_OPTIONAL_EL) {
2841 !!!cp ('t174');
2842 pop @{$self->{open_elements}};
2843 }
2844
2845 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2846 !!!cp ('t175');
2847 !!!parse-error (type => 'not closed',
2848 text => $self->{open_elements}->[-1]->[0]
2849 ->manakai_local_name,
2850 token => $token);
2851 } else {
2852 !!!cp ('t176');
2853 }
2854
2855 splice @{$self->{open_elements}}, $i;
2856
2857 $clear_up_to_marker->();
2858
2859 $self->{insertion_mode} = IN_TABLE_IM;
2860
2861 !!!next-token;
2862 next B;
2863 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2864 !!!cp ('t177');
2865 !!!parse-error (type => 'unmatched end tag',
2866 text => $token->{tag_name}, token => $token);
2867 ## Ignore the token
2868 !!!next-token;
2869 next B;
2870 } else {
2871 !!!cp ('t178');
2872 #
2873 }
2874 } elsif ({
2875 table => 1, tbody => 1, tfoot => 1,
2876 thead => 1, tr => 1,
2877 }->{$token->{tag_name}} and
2878 ($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2879 ## have an element in table scope
2880 my $i;
2881 my $tn;
2882 INSCOPE: {
2883 for (reverse 0..$#{$self->{open_elements}}) {
2884 my $node = $self->{open_elements}->[$_];
2885 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2886 !!!cp ('t179');
2887 $i = $_;
2888
2889 ## Close the cell
2890 !!!back-token; # </x>
2891 $token = {type => END_TAG_TOKEN, tag_name => $tn,
2892 line => $token->{line},
2893 column => $token->{column}};
2894 next B;
2895 } elsif ($node->[1] == TABLE_CELL_EL) {
2896 !!!cp ('t180');
2897 $tn = $node->[0]->manakai_local_name;
2898 ## NOTE: There is exactly one |td| or |th| element
2899 ## in scope in the stack of open elements by definition.
2900 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2901 ## ISSUE: Can this be reached?
2902 !!!cp ('t181');
2903 last;
2904 }
2905 }
2906
2907 !!!cp ('t182');
2908 !!!parse-error (type => 'unmatched end tag',
2909 text => $token->{tag_name}, token => $token);
2910 ## Ignore the token
2911 !!!next-token;
2912 next B;
2913 } # INSCOPE
2914 } elsif ($token->{tag_name} eq 'table' and
2915 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2916 !!!parse-error (type => 'not closed', text => 'caption',
2917 token => $token);
2918
2919 ## As if </caption>
2920 ## have a table element in table scope
2921 my $i;
2922 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2923 my $node = $self->{open_elements}->[$_];
2924 if ($node->[1] == CAPTION_EL) {
2925 !!!cp ('t184');
2926 $i = $_;
2927 last INSCOPE;
2928 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2929 !!!cp ('t185');
2930 last INSCOPE;
2931 }
2932 } # INSCOPE
2933 unless (defined $i) {
2934 !!!cp ('t186');
2935 ## TODO: Wrong error type?
2936 !!!parse-error (type => 'unmatched end tag',
2937 text => 'caption', token => $token);
2938 ## Ignore the token
2939 !!!next-token;
2940 next B;
2941 }
2942
2943 ## generate implied end tags
2944 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
2945 !!!cp ('t187');
2946 pop @{$self->{open_elements}};
2947 }
2948
2949 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2950 !!!cp ('t188');
2951 !!!parse-error (type => 'not closed',
2952 text => $self->{open_elements}->[-1]->[0]
2953 ->manakai_local_name,
2954 token => $token);
2955 } else {
2956 !!!cp ('t189');
2957 }
2958
2959 splice @{$self->{open_elements}}, $i;
2960
2961 $clear_up_to_marker->();
2962
2963 $self->{insertion_mode} = IN_TABLE_IM;
2964
2965 ## reprocess
2966 next B;
2967 } elsif ({
2968 body => 1, col => 1, colgroup => 1, html => 1,
2969 }->{$token->{tag_name}}) {
2970 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
2971 !!!cp ('t190');
2972 !!!parse-error (type => 'unmatched end tag',
2973 text => $token->{tag_name}, token => $token);
2974 ## Ignore the token
2975 !!!next-token;
2976 next B;
2977 } else {
2978 !!!cp ('t191');
2979 #
2980 }
2981 } elsif ({
2982 tbody => 1, tfoot => 1,
2983 thead => 1, tr => 1,
2984 }->{$token->{tag_name}} and
2985 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2986 !!!cp ('t192');
2987 !!!parse-error (type => 'unmatched end tag',
2988 text => $token->{tag_name}, token => $token);
2989 ## Ignore the token
2990 !!!next-token;
2991 next B;
2992 } else {
2993 !!!cp ('t193');
2994 #
2995 }
2996 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2997 for my $entry (@{$self->{open_elements}}) {
2998 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
2999 !!!cp ('t75');
3000 !!!parse-error (type => 'in body:#eof', token => $token);
3001 last;
3002 }
3003 }
3004
3005 ## Stop parsing.
3006 last B;
3007 } else {
3008 die "$0: $token->{type}: Unknown token type";
3009 }
3010
3011 $insert = $insert_to_current;
3012 #
3013 } elsif ($self->{insertion_mode} & TABLE_IMS) {
3014 if ($token->{type} == CHARACTER_TOKEN) {
3015 if (not $open_tables->[-1]->[1] and # tainted
3016 $token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3017 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3018
3019 unless (length $token->{data}) {
3020 !!!cp ('t194');
3021 !!!next-token;
3022 next B;
3023 } else {
3024 !!!cp ('t195');
3025 }
3026 }
3027
3028 !!!parse-error (type => 'in table:#text', token => $token);
3029
3030 ## NOTE: As if in body, but insert into the foster parent element.
3031 $reconstruct_active_formatting_elements->($insert_to_foster);
3032
3033 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
3034 # MUST
3035 my $foster_parent_element;
3036 my $next_sibling;
3037 my $prev_sibling;
3038 OE: for (reverse 0..$#{$self->{open_elements}}) {
3039 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
3040 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3041 if (defined $parent and $parent->node_type == 1) {
3042 $foster_parent_element = $parent;
3043 !!!cp ('t196');
3044 $next_sibling = $self->{open_elements}->[$_]->[0];
3045 $prev_sibling = $next_sibling->previous_sibling;
3046 #
3047 } else {
3048 !!!cp ('t197');
3049 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3050 $prev_sibling = $foster_parent_element->last_child;
3051 #
3052 }
3053 last OE;
3054 }
3055 } # OE
3056 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3057 $prev_sibling = $foster_parent_element->last_child
3058 unless defined $foster_parent_element;
3059 undef $prev_sibling unless $open_tables->[-1]->[2]; # ~node inserted
3060 if (defined $prev_sibling and
3061 $prev_sibling->node_type == 3) {
3062 !!!cp ('t198');
3063 $prev_sibling->manakai_append_text ($token->{data});
3064 } else {
3065 !!!cp ('t199');
3066 $foster_parent_element->insert_before
3067 ($self->{document}->create_text_node ($token->{data}),
3068 $next_sibling);
3069 }
3070 $open_tables->[-1]->[1] = 1; # tainted
3071 $open_tables->[-1]->[2] = 1; # ~node inserted
3072 } else {
3073 ## NOTE: Fragment case or in a foster parent'ed element
3074 ## (e.g. |<table><span>a|). In fragment case, whether the
3075 ## character is appended to existing node or a new node is
3076 ## created is irrelevant, since the foster parent'ed nodes
3077 ## are discarded and fragment parsing does not invoke any
3078 ## script.
3079 !!!cp ('t200');
3080 $self->{open_elements}->[-1]->[0]->manakai_append_text
3081 ($token->{data});
3082 }
3083
3084 !!!next-token;
3085 next B;
3086 } elsif ($token->{type} == START_TAG_TOKEN) {
3087 if ({
3088 tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM),
3089 th => 1, td => 1,
3090 }->{$token->{tag_name}}) {
3091 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_IM) {
3092 ## Clear back to table context
3093 while (not ($self->{open_elements}->[-1]->[1]
3094 & TABLE_SCOPING_EL)) {
3095 !!!cp ('t201');
3096 pop @{$self->{open_elements}};
3097 }
3098
3099 !!!insert-element ('tbody',, $token);
3100 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3101 ## reprocess in the "in table body" insertion mode...
3102 }
3103
3104 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3105 unless ($token->{tag_name} eq 'tr') {
3106 !!!cp ('t202');
3107 !!!parse-error (type => 'missing start tag:tr', token => $token);
3108 }
3109
3110 ## Clear back to table body context
3111 while (not ($self->{open_elements}->[-1]->[1]
3112 & TABLE_ROWS_SCOPING_EL)) {
3113 !!!cp ('t203');
3114 ## ISSUE: Can this case be reached?
3115 pop @{$self->{open_elements}};
3116 }
3117
3118 $self->{insertion_mode} = IN_ROW_IM;
3119 if ($token->{tag_name} eq 'tr') {
3120 !!!cp ('t204');
3121 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3122 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3123 !!!nack ('t204');
3124 !!!next-token;
3125 next B;
3126 } else {
3127 !!!cp ('t205');
3128 !!!insert-element ('tr',, $token);
3129 ## reprocess in the "in row" insertion mode
3130 }
3131 } else {
3132 !!!cp ('t206');
3133 }
3134
3135 ## Clear back to table row context
3136 while (not ($self->{open_elements}->[-1]->[1]
3137 & TABLE_ROW_SCOPING_EL)) {
3138 !!!cp ('t207');
3139 pop @{$self->{open_elements}};
3140 }
3141
3142 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3143 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3144 $self->{insertion_mode} = IN_CELL_IM;
3145
3146 push @$active_formatting_elements, ['#marker', ''];
3147
3148 !!!nack ('t207.1');
3149 !!!next-token;
3150 next B;
3151 } elsif ({
3152 caption => 1, col => 1, colgroup => 1,
3153 tbody => 1, tfoot => 1, thead => 1,
3154 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3155 }->{$token->{tag_name}}) {
3156 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3157 ## As if </tr>
3158 ## have an element in table scope
3159 my $i;
3160 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3161 my $node = $self->{open_elements}->[$_];
3162 if ($node->[1] == TABLE_ROW_EL) {
3163 !!!cp ('t208');
3164 $i = $_;
3165 last INSCOPE;
3166 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3167 !!!cp ('t209');
3168 last INSCOPE;
3169 }
3170 } # INSCOPE
3171 unless (defined $i) {
3172 !!!cp ('t210');
3173 ## TODO: This type is wrong.
3174 !!!parse-error (type => 'unmacthed end tag',
3175 text => $token->{tag_name}, token => $token);
3176 ## Ignore the token
3177 !!!nack ('t210.1');
3178 !!!next-token;
3179 next B;
3180 }
3181
3182 ## Clear back to table row context
3183 while (not ($self->{open_elements}->[-1]->[1]
3184 & TABLE_ROW_SCOPING_EL)) {
3185 !!!cp ('t211');
3186 ## ISSUE: Can this case be reached?
3187 pop @{$self->{open_elements}};
3188 }
3189
3190 pop @{$self->{open_elements}}; # tr
3191 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3192 if ($token->{tag_name} eq 'tr') {
3193 !!!cp ('t212');
3194 ## reprocess
3195 !!!ack-later;
3196 next B;
3197 } else {
3198 !!!cp ('t213');
3199 ## reprocess in the "in table body" insertion mode...
3200 }
3201 }
3202
3203 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3204 ## have an element in table scope
3205 my $i;
3206 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3207 my $node = $self->{open_elements}->[$_];
3208 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3209 !!!cp ('t214');
3210 $i = $_;
3211 last INSCOPE;
3212 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3213 !!!cp ('t215');
3214 last INSCOPE;
3215 }
3216 } # INSCOPE
3217 unless (defined $i) {
3218 !!!cp ('t216');
3219 ## TODO: This erorr type is wrong.
3220 !!!parse-error (type => 'unmatched end tag',
3221 text => $token->{tag_name}, token => $token);
3222 ## Ignore the token
3223 !!!nack ('t216.1');
3224 !!!next-token;
3225 next B;
3226 }
3227
3228 ## Clear back to table body context
3229 while (not ($self->{open_elements}->[-1]->[1]
3230 & TABLE_ROWS_SCOPING_EL)) {
3231 !!!cp ('t217');
3232 ## ISSUE: Can this state be reached?
3233 pop @{$self->{open_elements}};
3234 }
3235
3236 ## As if <{current node}>
3237 ## have an element in table scope
3238 ## true by definition
3239
3240 ## Clear back to table body context
3241 ## nop by definition
3242
3243 pop @{$self->{open_elements}};
3244 $self->{insertion_mode} = IN_TABLE_IM;
3245 ## reprocess in "in table" insertion mode...
3246 } else {
3247 !!!cp ('t218');
3248 }
3249
3250 if ($token->{tag_name} eq 'col') {
3251 ## Clear back to table context
3252 while (not ($self->{open_elements}->[-1]->[1]
3253 & TABLE_SCOPING_EL)) {
3254 !!!cp ('t219');
3255 ## ISSUE: Can this state be reached?
3256 pop @{$self->{open_elements}};
3257 }
3258
3259 !!!insert-element ('colgroup',, $token);
3260 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
3261 ## reprocess
3262 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3263 !!!ack-later;
3264 next B;
3265 } elsif ({
3266 caption => 1,
3267 colgroup => 1,
3268 tbody => 1, tfoot => 1, thead => 1,
3269 }->{$token->{tag_name}}) {
3270 ## Clear back to table context
3271 while (not ($self->{open_elements}->[-1]->[1]
3272 & TABLE_SCOPING_EL)) {
3273 !!!cp ('t220');
3274 ## ISSUE: Can this state be reached?
3275 pop @{$self->{open_elements}};
3276 }
3277
3278 push @$active_formatting_elements, ['#marker', '']
3279 if $token->{tag_name} eq 'caption';
3280
3281 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3282 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3283 $self->{insertion_mode} = {
3284 caption => IN_CAPTION_IM,
3285 colgroup => IN_COLUMN_GROUP_IM,
3286 tbody => IN_TABLE_BODY_IM,
3287 tfoot => IN_TABLE_BODY_IM,
3288 thead => IN_TABLE_BODY_IM,
3289 }->{$token->{tag_name}};
3290 !!!next-token;
3291 !!!nack ('t220.1');
3292 next B;
3293 } else {
3294 die "$0: in table: <>: $token->{tag_name}";
3295 }
3296 } elsif ($token->{tag_name} eq 'table') {
3297 !!!parse-error (type => 'not closed',
3298 text => $self->{open_elements}->[-1]->[0]
3299 ->manakai_local_name,
3300 token => $token);
3301
3302 ## As if </table>
3303 ## have a table element in table scope
3304 my $i;
3305 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3306 my $node = $self->{open_elements}->[$_];
3307 if ($node->[1] == TABLE_EL) {
3308 !!!cp ('t221');
3309 $i = $_;
3310 last INSCOPE;
3311 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3312 !!!cp ('t222');
3313 last INSCOPE;
3314 }
3315 } # INSCOPE
3316 unless (defined $i) {
3317 !!!cp ('t223');
3318 ## TODO: The following is wrong, maybe.
3319 !!!parse-error (type => 'unmatched end tag', text => 'table',
3320 token => $token);
3321 ## Ignore tokens </table><table>
3322 !!!nack ('t223.1');
3323 !!!next-token;
3324 next B;
3325 }
3326
3327 ## TODO: Followings are removed from the latest spec.
3328 ## generate implied end tags
3329 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3330 !!!cp ('t224');
3331 pop @{$self->{open_elements}};
3332 }
3333
3334 unless ($self->{open_elements}->[-1]->[1] == TABLE_EL) {
3335 !!!cp ('t225');
3336 ## NOTE: |<table><tr><table>|
3337 !!!parse-error (type => 'not closed',
3338 text => $self->{open_elements}->[-1]->[0]
3339 ->manakai_local_name,
3340 token => $token);
3341 } else {
3342 !!!cp ('t226');
3343 }
3344
3345 splice @{$self->{open_elements}}, $i;
3346 pop @{$open_tables};
3347
3348 $self->_reset_insertion_mode;
3349
3350 ## reprocess
3351 !!!ack-later;
3352 next B;
3353 } elsif ($token->{tag_name} eq 'style') {
3354 if (not $open_tables->[-1]->[1]) { # tainted
3355 !!!cp ('t227.8');
3356 ## NOTE: This is a "as if in head" code clone.
3357 $parse_rcdata->(CDATA_CONTENT_MODEL);
3358 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3359 next B;
3360 } else {
3361 !!!cp ('t227.7');
3362 #
3363 }
3364 } elsif ($token->{tag_name} eq 'script') {
3365 if (not $open_tables->[-1]->[1]) { # tainted
3366 !!!cp ('t227.6');
3367 ## NOTE: This is a "as if in head" code clone.
3368 $script_start_tag->();
3369 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3370 next B;
3371 } else {
3372 !!!cp ('t227.5');
3373 #
3374 }
3375 } elsif ($token->{tag_name} eq 'input') {
3376 if (not $open_tables->[-1]->[1]) { # tainted
3377 if ($token->{attributes}->{type}) { ## TODO: case
3378 my $type = lc $token->{attributes}->{type}->{value};
3379 if ($type eq 'hidden') {
3380 !!!cp ('t227.3');
3381 !!!parse-error (type => 'in table',
3382 text => $token->{tag_name}, token => $token);
3383
3384 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3385 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3386
3387 ## TODO: form element pointer
3388
3389 pop @{$self->{open_elements}};
3390
3391 !!!next-token;
3392 !!!ack ('t227.2.1');
3393 next B;
3394 } else {
3395 !!!cp ('t227.2');
3396 #
3397 }
3398 } else {
3399 !!!cp ('t227.1');
3400 #
3401 }
3402 } else {
3403 !!!cp ('t227.4');
3404 #
3405 }
3406 } else {
3407 !!!cp ('t227');
3408 #
3409 }
3410
3411 !!!parse-error (type => 'in table', text => $token->{tag_name},
3412 token => $token);
3413
3414 $insert = $insert_to_foster;
3415 #
3416 } elsif ($token->{type} == END_TAG_TOKEN) {
3417 if ($token->{tag_name} eq 'tr' and
3418 ($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3419 ## have an element in table scope
3420 my $i;
3421 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3422 my $node = $self->{open_elements}->[$_];
3423 if ($node->[1] == TABLE_ROW_EL) {
3424 !!!cp ('t228');
3425 $i = $_;
3426 last INSCOPE;
3427 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3428 !!!cp ('t229');
3429 last INSCOPE;
3430 }
3431 } # INSCOPE
3432 unless (defined $i) {
3433 !!!cp ('t230');
3434 !!!parse-error (type => 'unmatched end tag',
3435 text => $token->{tag_name}, token => $token);
3436 ## Ignore the token
3437 !!!nack ('t230.1');
3438 !!!next-token;
3439 next B;
3440 } else {
3441 !!!cp ('t232');
3442 }
3443
3444 ## Clear back to table row context
3445 while (not ($self->{open_elements}->[-1]->[1]
3446 & TABLE_ROW_SCOPING_EL)) {
3447 !!!cp ('t231');
3448 ## ISSUE: Can this state be reached?
3449 pop @{$self->{open_elements}};
3450 }
3451
3452 pop @{$self->{open_elements}}; # tr
3453 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3454 !!!next-token;
3455 !!!nack ('t231.1');
3456 next B;
3457 } elsif ($token->{tag_name} eq 'table') {
3458 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3459 ## As if </tr>
3460 ## have an element in table scope
3461 my $i;
3462 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3463 my $node = $self->{open_elements}->[$_];
3464 if ($node->[1] == TABLE_ROW_EL) {
3465 !!!cp ('t233');
3466 $i = $_;
3467 last INSCOPE;
3468 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3469 !!!cp ('t234');
3470 last INSCOPE;
3471 }
3472 } # INSCOPE
3473 unless (defined $i) {
3474 !!!cp ('t235');
3475 ## TODO: The following is wrong.
3476 !!!parse-error (type => 'unmatched end tag',
3477 text => $token->{type}, token => $token);
3478 ## Ignore the token
3479 !!!nack ('t236.1');
3480 !!!next-token;
3481 next B;
3482 }
3483
3484 ## Clear back to table row context
3485 while (not ($self->{open_elements}->[-1]->[1]
3486 & TABLE_ROW_SCOPING_EL)) {
3487 !!!cp ('t236');
3488 ## ISSUE: Can this state be reached?
3489 pop @{$self->{open_elements}};
3490 }
3491
3492 pop @{$self->{open_elements}}; # tr
3493 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3494 ## reprocess in the "in table body" insertion mode...
3495 }
3496
3497 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3498 ## have an element in table scope
3499 my $i;
3500 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3501 my $node = $self->{open_elements}->[$_];
3502 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3503 !!!cp ('t237');
3504 $i = $_;
3505 last INSCOPE;
3506 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3507 !!!cp ('t238');
3508 last INSCOPE;
3509 }
3510 } # INSCOPE
3511 unless (defined $i) {
3512 !!!cp ('t239');
3513 !!!parse-error (type => 'unmatched end tag',
3514 text => $token->{tag_name}, token => $token);
3515 ## Ignore the token
3516 !!!nack ('t239.1');
3517 !!!next-token;
3518 next B;
3519 }
3520
3521 ## Clear back to table body context
3522 while (not ($self->{open_elements}->[-1]->[1]
3523 & TABLE_ROWS_SCOPING_EL)) {
3524 !!!cp ('t240');
3525 pop @{$self->{open_elements}};
3526 }
3527
3528 ## As if <{current node}>
3529 ## have an element in table scope
3530 ## true by definition
3531
3532 ## Clear back to table body context
3533 ## nop by definition
3534
3535 pop @{$self->{open_elements}};
3536 $self->{insertion_mode} = IN_TABLE_IM;
3537 ## reprocess in the "in table" insertion mode...
3538 }
3539
3540 ## NOTE: </table> in the "in table" insertion mode.
3541 ## When you edit the code fragment below, please ensure that
3542 ## the code for <table> in the "in table" insertion mode
3543 ## is synced with it.
3544
3545 ## have a table element in table scope
3546 my $i;
3547 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3548 my $node = $self->{open_elements}->[$_];
3549 if ($node->[1] == TABLE_EL) {
3550 !!!cp ('t241');
3551 $i = $_;
3552 last INSCOPE;
3553 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3554 !!!cp ('t242');
3555 last INSCOPE;
3556 }
3557 } # INSCOPE
3558 unless (defined $i) {
3559 !!!cp ('t243');
3560 !!!parse-error (type => 'unmatched end tag',
3561 text => $token->{tag_name}, token => $token);
3562 ## Ignore the token
3563 !!!nack ('t243.1');
3564 !!!next-token;
3565 next B;
3566 }
3567
3568 splice @{$self->{open_elements}}, $i;
3569 pop @{$open_tables};
3570
3571 $self->_reset_insertion_mode;
3572
3573 !!!next-token;
3574 next B;
3575 } elsif ({
3576 tbody => 1, tfoot => 1, thead => 1,
3577 }->{$token->{tag_name}} and
3578 $self->{insertion_mode} & ROW_IMS) {
3579 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3580 ## have an element in table scope
3581 my $i;
3582 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3583 my $node = $self->{open_elements}->[$_];
3584 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3585 !!!cp ('t247');
3586 $i = $_;
3587 last INSCOPE;
3588 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3589 !!!cp ('t248');
3590 last INSCOPE;
3591 }
3592 } # INSCOPE
3593 unless (defined $i) {
3594 !!!cp ('t249');
3595 !!!parse-error (type => 'unmatched end tag',
3596 text => $token->{tag_name}, token => $token);
3597 ## Ignore the token
3598 !!!nack ('t249.1');
3599 !!!next-token;
3600 next B;
3601 }
3602
3603 ## As if </tr>
3604 ## have an element in table scope
3605 my $i;
3606 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3607 my $node = $self->{open_elements}->[$_];
3608 if ($node->[1] == TABLE_ROW_EL) {
3609 !!!cp ('t250');
3610 $i = $_;
3611 last INSCOPE;
3612 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3613 !!!cp ('t251');
3614 last INSCOPE;
3615 }
3616 } # INSCOPE
3617 unless (defined $i) {
3618 !!!cp ('t252');
3619 !!!parse-error (type => 'unmatched end tag',
3620 text => 'tr', token => $token);
3621 ## Ignore the token
3622 !!!nack ('t252.1');
3623 !!!next-token;
3624 next B;
3625 }
3626
3627 ## Clear back to table row context
3628 while (not ($self->{open_elements}->[-1]->[1]
3629 & TABLE_ROW_SCOPING_EL)) {
3630 !!!cp ('t253');
3631 ## ISSUE: Can this case be reached?
3632 pop @{$self->{open_elements}};
3633 }
3634
3635 pop @{$self->{open_elements}}; # tr
3636 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3637 ## reprocess in the "in table body" insertion mode...
3638 }
3639
3640 ## have an element in table scope
3641 my $i;
3642 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3643 my $node = $self->{open_elements}->[$_];
3644 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3645 !!!cp ('t254');
3646 $i = $_;
3647 last INSCOPE;
3648 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3649 !!!cp ('t255');
3650 last INSCOPE;
3651 }
3652 } # INSCOPE
3653 unless (defined $i) {
3654 !!!cp ('t256');
3655 !!!parse-error (type => 'unmatched end tag',
3656 text => $token->{tag_name}, token => $token);
3657 ## Ignore the token
3658 !!!nack ('t256.1');
3659 !!!next-token;
3660 next B;
3661 }
3662
3663 ## Clear back to table body context
3664 while (not ($self->{open_elements}->[-1]->[1]
3665 & TABLE_ROWS_SCOPING_EL)) {
3666 !!!cp ('t257');
3667 ## ISSUE: Can this case be reached?
3668 pop @{$self->{open_elements}};
3669 }
3670
3671 pop @{$self->{open_elements}};
3672 $self->{insertion_mode} = IN_TABLE_IM;
3673 !!!nack ('t257.1');
3674 !!!next-token;
3675 next B;
3676 } elsif ({
3677 body => 1, caption => 1, col => 1, colgroup => 1,
3678 html => 1, td => 1, th => 1,
3679 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3680 tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
3681 }->{$token->{tag_name}}) {
3682 !!!cp ('t258');
3683 !!!parse-error (type => 'unmatched end tag',
3684 text => $token->{tag_name}, token => $token);
3685 ## Ignore the token
3686 !!!nack ('t258.1');
3687 !!!next-token;
3688 next B;
3689 } else {
3690 !!!cp ('t259');
3691 !!!parse-error (type => 'in table:/',
3692 text => $token->{tag_name}, token => $token);
3693
3694 $insert = $insert_to_foster;
3695 #
3696 }
3697 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3698 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
3699 @{$self->{open_elements}} == 1) { # redundant, maybe
3700 !!!parse-error (type => 'in body:#eof', token => $token);
3701 !!!cp ('t259.1');
3702 #
3703 } else {
3704 !!!cp ('t259.2');
3705 #
3706 }
3707
3708 ## Stop parsing
3709 last B;
3710 } else {
3711 die "$0: $token->{type}: Unknown token type";
3712 }
3713 } elsif (($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
3714 if ($token->{type} == CHARACTER_TOKEN) {
3715 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3716 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3717 unless (length $token->{data}) {
3718 !!!cp ('t260');
3719 !!!next-token;
3720 next B;
3721 }
3722 }
3723
3724 !!!cp ('t261');
3725 #
3726 } elsif ($token->{type} == START_TAG_TOKEN) {
3727 if ($token->{tag_name} eq 'col') {
3728 !!!cp ('t262');
3729 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3730 pop @{$self->{open_elements}};
3731 !!!ack ('t262.1');
3732 !!!next-token;
3733 next B;
3734 } else {
3735 !!!cp ('t263');
3736 #
3737 }
3738 } elsif ($token->{type} == END_TAG_TOKEN) {
3739 if ($token->{tag_name} eq 'colgroup') {
3740 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3741 !!!cp ('t264');
3742 !!!parse-error (type => 'unmatched end tag',
3743 text => 'colgroup', token => $token);
3744 ## Ignore the token
3745 !!!next-token;
3746 next B;
3747 } else {
3748 !!!cp ('t265');
3749 pop @{$self->{open_elements}}; # colgroup
3750 $self->{insertion_mode} = IN_TABLE_IM;
3751 !!!next-token;
3752 next B;
3753 }
3754 } elsif ($token->{tag_name} eq 'col') {
3755 !!!cp ('t266');
3756 !!!parse-error (type => 'unmatched end tag',
3757 text => 'col', token => $token);
3758 ## Ignore the token
3759 !!!next-token;
3760 next B;
3761 } else {
3762 !!!cp ('t267');
3763 #
3764 }
3765 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3766 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
3767 @{$self->{open_elements}} == 1) { # redundant, maybe
3768 !!!cp ('t270.2');
3769 ## Stop parsing.
3770 last B;
3771 } else {
3772 ## NOTE: As if </colgroup>.
3773 !!!cp ('t270.1');
3774 pop @{$self->{open_elements}}; # colgroup
3775 $self->{insertion_mode} = IN_TABLE_IM;
3776 ## Reprocess.
3777 next B;
3778 }
3779 } else {
3780 die "$0: $token->{type}: Unknown token type";
3781 }
3782
3783 ## As if </colgroup>
3784 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3785 !!!cp ('t269');
3786 ## TODO: Wrong error type?
3787 !!!parse-error (type => 'unmatched end tag',
3788 text => 'colgroup', token => $token);
3789 ## Ignore the token
3790 !!!nack ('t269.1');
3791 !!!next-token;
3792 next B;
3793 } else {
3794 !!!cp ('t270');
3795 pop @{$self->{open_elements}}; # colgroup
3796 $self->{insertion_mode} = IN_TABLE_IM;
3797 !!!ack-later;
3798 ## reprocess
3799 next B;
3800 }
3801 } elsif ($self->{insertion_mode} & SELECT_IMS) {
3802 if ($token->{type} == CHARACTER_TOKEN) {
3803 !!!cp ('t271');
3804 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3805 !!!next-token;
3806 next B;
3807 } elsif ($token->{type} == START_TAG_TOKEN) {
3808 if ($token->{tag_name} eq 'option') {
3809 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3810 !!!cp ('t272');
3811 ## As if </option>
3812 pop @{$self->{open_elements}};
3813 } else {
3814 !!!cp ('t273');
3815 }
3816
3817 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3818 !!!nack ('t273.1');
3819 !!!next-token;
3820 next B;
3821 } elsif ($token->{tag_name} eq 'optgroup') {
3822 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3823 !!!cp ('t274');
3824 ## As if </option>
3825 pop @{$self->{open_elements}};
3826 } else {
3827 !!!cp ('t275');
3828 }
3829
3830 if ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3831 !!!cp ('t276');
3832 ## As if </optgroup>
3833 pop @{$self->{open_elements}};
3834 } else {
3835 !!!cp ('t277');
3836 }
3837
3838 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3839 !!!nack ('t277.1');
3840 !!!next-token;
3841 next B;
3842 } elsif ({
3843 select => 1, input => 1, textarea => 1, keygen => 1,
3844 }->{$token->{tag_name}} or
3845 (($self->{insertion_mode} & IM_MASK)
3846 == IN_SELECT_IN_TABLE_IM and
3847 {
3848 caption => 1, table => 1,
3849 tbody => 1, tfoot => 1, thead => 1,
3850 tr => 1, td => 1, th => 1,
3851 }->{$token->{tag_name}})) {
3852
3853 ## 1. Parse error.
3854 if ($token->{tag_name} eq 'select') {
3855 !!!parse-error (type => 'select in select', ## XXX: documentation
3856 token => $token);
3857 } else {
3858 !!!parse-error (type => 'not closed', text => 'select',
3859 token => $token);
3860 }
3861
3862 ## 2./<select>-1. Unless "have an element in table scope" (select):
3863 my $i;
3864 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3865 my $node = $self->{open_elements}->[$_];
3866 if ($node->[1] == SELECT_EL) {
3867 !!!cp ('t278');
3868 $i = $_;
3869 last INSCOPE;
3870 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3871 !!!cp ('t279');
3872 last INSCOPE;
3873 }
3874 } # INSCOPE
3875 unless (defined $i) {
3876 !!!cp ('t280');
3877 if ($token->{tag_name} eq 'select') {
3878 ## NOTE: This error would be raised when
3879 ## |select.innerHTML = '<select>'| is executed; in this
3880 ## case two errors, "select in select" and "unmatched
3881 ## end tags" are reported to the user, the latter might
3882 ## be confusing but this is what the spec requires.
3883 !!!parse-error (type => 'unmatched end tag',
3884 text => 'select',
3885 token => $token);
3886 }
3887 ## Ignore the token.
3888 !!!nack ('t280.1');
3889 !!!next-token;
3890 next B;
3891 }
3892
3893 ## 3. Otherwise, as if there were <select>:
3894
3895 !!!cp ('t281');
3896 splice @{$self->{open_elements}}, $i;
3897
3898 $self->_reset_insertion_mode;
3899
3900 if ($token->{tag_name} eq 'select') {
3901 !!!nack ('t281.2');
3902 !!!next-token;
3903 next B;
3904 } else {
3905 !!!cp ('t281.1');
3906 !!!ack-later;
3907 ## Reprocess the token.
3908 next B;
3909 }
3910 } else {
3911 !!!cp ('t282');
3912 !!!parse-error (type => 'in select',
3913 text => $token->{tag_name}, token => $token);
3914 ## Ignore the token
3915 !!!nack ('t282.1');
3916 !!!next-token;
3917 next B;
3918 }
3919 } elsif ($token->{type} == END_TAG_TOKEN) {
3920 if ($token->{tag_name} eq 'optgroup') {
3921 if ($self->{open_elements}->[-1]->[1] == OPTION_EL and
3922 $self->{open_elements}->[-2]->[1] == OPTGROUP_EL) {
3923 !!!cp ('t283');
3924 ## As if </option>
3925 splice @{$self->{open_elements}}, -2;
3926 } elsif ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3927 !!!cp ('t284');
3928 pop @{$self->{open_elements}};
3929 } else {
3930 !!!cp ('t285');
3931 !!!parse-error (type => 'unmatched end tag',
3932 text => $token->{tag_name}, token => $token);
3933 ## Ignore the token
3934 }
3935 !!!nack ('t285.1');
3936 !!!next-token;
3937 next B;
3938 } elsif ($token->{tag_name} eq 'option') {
3939 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3940 !!!cp ('t286');
3941 pop @{$self->{open_elements}};
3942 } else {
3943 !!!cp ('t287');
3944 !!!parse-error (type => 'unmatched end tag',
3945 text => $token->{tag_name}, token => $token);
3946 ## Ignore the token
3947 }
3948 !!!nack ('t287.1');
3949 !!!next-token;
3950 next B;
3951 } elsif ($token->{tag_name} eq 'select') {
3952 ## have an element in table scope
3953 my $i;
3954 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3955 my $node = $self->{open_elements}->[$_];
3956 if ($node->[1] == SELECT_EL) {
3957 !!!cp ('t288');
3958 $i = $_;
3959 last INSCOPE;
3960 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3961 !!!cp ('t289');
3962 last INSCOPE;
3963 }
3964 } # INSCOPE
3965 unless (defined $i) {
3966 !!!cp ('t290');
3967 !!!parse-error (type => 'unmatched end tag',
3968 text => $token->{tag_name}, token => $token);
3969 ## Ignore the token
3970 !!!nack ('t290.1');
3971 !!!next-token;
3972 next B;
3973 }
3974
3975 !!!cp ('t291');
3976 splice @{$self->{open_elements}}, $i;
3977
3978 $self->_reset_insertion_mode;
3979
3980 !!!nack ('t291.1');
3981 !!!next-token;
3982 next B;
3983 } elsif (($self->{insertion_mode} & IM_MASK)
3984 == IN_SELECT_IN_TABLE_IM and
3985 {
3986 caption => 1, table => 1, tbody => 1,
3987 tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
3988 }->{$token->{tag_name}}) {
3989 ## TODO: The following is wrong?
3990 !!!parse-error (type => 'unmatched end tag',
3991 text => $token->{tag_name}, token => $token);
3992
3993 ## have an element in table scope
3994 my $i;
3995 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3996 my $node = $self->{open_elements}->[$_];
3997 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3998 !!!cp ('t292');
3999 $i = $_;
4000 last INSCOPE;
4001 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4002 !!!cp ('t293');
4003 last INSCOPE;
4004 }
4005 } # INSCOPE
4006 unless (defined $i) {
4007 !!!cp ('t294');
4008 ## Ignore the token
4009 !!!nack ('t294.1');
4010 !!!next-token;
4011 next B;
4012 }
4013
4014 ## As if </select>
4015 ## have an element in table scope
4016 undef $i;
4017 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4018 my $node = $self->{open_elements}->[$_];
4019 if ($node->[1] == SELECT_EL) {
4020 !!!cp ('t295');
4021 $i = $_;
4022 last INSCOPE;
4023 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4024 ## ISSUE: Can this state be reached?
4025 !!!cp ('t296');
4026 last INSCOPE;
4027 }
4028 } # INSCOPE
4029 unless (defined $i) {
4030 !!!cp ('t297');
4031 ## TODO: The following error type is correct?
4032 !!!parse-error (type => 'unmatched end tag',
4033 text => 'select', token => $token);
4034 ## Ignore the </select> token
4035 !!!nack ('t297.1');
4036 !!!next-token; ## TODO: ok?
4037 next B;
4038 }
4039
4040 !!!cp ('t298');
4041 splice @{$self->{open_elements}}, $i;
4042
4043 $self->_reset_insertion_mode;
4044
4045 !!!ack-later;
4046 ## reprocess
4047 next B;
4048 } else {
4049 !!!cp ('t299');
4050 !!!parse-error (type => 'in select:/',
4051 text => $token->{tag_name}, token => $token);
4052 ## Ignore the token
4053 !!!nack ('t299.3');
4054 !!!next-token;
4055 next B;
4056 }
4057 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4058 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4059 @{$self->{open_elements}} == 1) { # redundant, maybe
4060 !!!cp ('t299.1');
4061 !!!parse-error (type => 'in body:#eof', token => $token);
4062 } else {
4063 !!!cp ('t299.2');
4064 }
4065
4066 ## Stop parsing.
4067 last B;
4068 } else {
4069 die "$0: $token->{type}: Unknown token type";
4070 }
4071 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
4072 if ($token->{type} == CHARACTER_TOKEN) {
4073 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4074 my $data = $1;
4075 ## As if in body
4076 $reconstruct_active_formatting_elements->($insert_to_current);
4077
4078 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4079
4080 unless (length $token->{data}) {
4081 !!!cp ('t300');
4082 !!!next-token;
4083 next B;
4084 }
4085 }
4086
4087 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4088 !!!cp ('t301');
4089 !!!parse-error (type => 'after html:#text', token => $token);
4090 #
4091 } else {
4092 !!!cp ('t302');
4093 ## "after body" insertion mode
4094 !!!parse-error (type => 'after body:#text', token => $token);
4095 #
4096 }
4097
4098 $self->{insertion_mode} = IN_BODY_IM;
4099 ## reprocess
4100 next B;
4101 } elsif ($token->{type} == START_TAG_TOKEN) {
4102 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4103 !!!cp ('t303');
4104 !!!parse-error (type => 'after html',
4105 text => $token->{tag_name}, token => $token);
4106 #
4107 } else {
4108 !!!cp ('t304');
4109 ## "after body" insertion mode
4110 !!!parse-error (type => 'after body',
4111 text => $token->{tag_name}, token => $token);
4112 #
4113 }
4114
4115 $self->{insertion_mode} = IN_BODY_IM;
4116 !!!ack-later;
4117 ## reprocess
4118 next B;
4119 } elsif ($token->{type} == END_TAG_TOKEN) {
4120 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4121 !!!cp ('t305');
4122 !!!parse-error (type => 'after html:/',
4123 text => $token->{tag_name}, token => $token);
4124
4125 $self->{insertion_mode} = IN_BODY_IM;
4126 ## Reprocess.
4127 next B;
4128 } else {
4129 !!!cp ('t306');
4130 }
4131
4132 ## "after body" insertion mode
4133 if ($token->{tag_name} eq 'html') {
4134 if (defined $self->{inner_html_node}) {
4135 !!!cp ('t307');
4136 !!!parse-error (type => 'unmatched end tag',
4137 text => 'html', token => $token);
4138 ## Ignore the token
4139 !!!next-token;
4140 next B;
4141 } else {
4142 !!!cp ('t308');
4143 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
4144 !!!next-token;
4145 next B;
4146 }
4147 } else {
4148 !!!cp ('t309');
4149 !!!parse-error (type => 'after body:/',
4150 text => $token->{tag_name}, token => $token);
4151
4152 $self->{insertion_mode} = IN_BODY_IM;
4153 ## reprocess
4154 next B;
4155 }
4156 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4157 !!!cp ('t309.2');
4158 ## Stop parsing
4159 last B;
4160 } else {
4161 die "$0: $token->{type}: Unknown token type";
4162 }
4163 } elsif ($self->{insertion_mode} & FRAME_IMS) {
4164 if ($token->{type} == CHARACTER_TOKEN) {
4165 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4166 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4167
4168 unless (length $token->{data}) {
4169 !!!cp ('t310');
4170 !!!next-token;
4171 next B;
4172 }
4173 }
4174
4175 if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
4176 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4177 !!!cp ('t311');
4178 !!!parse-error (type => 'in frameset:#text', token => $token);
4179 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4180 !!!cp ('t312');
4181 !!!parse-error (type => 'after frameset:#text', token => $token);
4182 } else { # "after after frameset"
4183 !!!cp ('t313');
4184 !!!parse-error (type => 'after html:#text', token => $token);
4185 }
4186
4187 ## Ignore the token.
4188 if (length $token->{data}) {
4189 !!!cp ('t314');
4190 ## reprocess the rest of characters
4191 } else {
4192 !!!cp ('t315');
4193 !!!next-token;
4194 }
4195 next B;
4196 }
4197
4198 die qq[$0: Character "$token->{data}"];
4199 } elsif ($token->{type} == START_TAG_TOKEN) {
4200 if ($token->{tag_name} eq 'frameset' and
4201 $self->{insertion_mode} == IN_FRAMESET_IM) {
4202 !!!cp ('t318');
4203 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4204 !!!nack ('t318.1');
4205 !!!next-token;
4206 next B;
4207 } elsif ($token->{tag_name} eq 'frame' and
4208 $self->{insertion_mode} == IN_FRAMESET_IM) {
4209 !!!cp ('t319');
4210 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4211 pop @{$self->{open_elements}};
4212 !!!ack ('t319.1');
4213 !!!next-token;
4214 next B;
4215 } elsif ($token->{tag_name} eq 'noframes') {
4216 !!!cp ('t320');
4217 ## NOTE: As if in head.
4218 $parse_rcdata->(CDATA_CONTENT_MODEL);
4219 next B;
4220
4221 ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
4222 ## has no parse error.
4223 } else {
4224 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4225 !!!cp ('t321');
4226 !!!parse-error (type => 'in frameset',
4227 text => $token->{tag_name}, token => $token);
4228 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4229 !!!cp ('t322');
4230 !!!parse-error (type => 'after frameset',
4231 text => $token->{tag_name}, token => $token);
4232 } else { # "after after frameset"
4233 !!!cp ('t322.2');
4234 !!!parse-error (type => 'after after frameset',
4235 text => $token->{tag_name}, token => $token);
4236 }
4237 ## Ignore the token
4238 !!!nack ('t322.1');
4239 !!!next-token;
4240 next B;
4241 }
4242 } elsif ($token->{type} == END_TAG_TOKEN) {
4243 if ($token->{tag_name} eq 'frameset' and
4244 $self->{insertion_mode} == IN_FRAMESET_IM) {
4245 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
4246 @{$self->{open_elements}} == 1) {
4247 !!!cp ('t325');
4248 !!!parse-error (type => 'unmatched end tag',
4249 text => $token->{tag_name}, token => $token);
4250 ## Ignore the token
4251 !!!next-token;
4252 } else {
4253 !!!cp ('t326');
4254 pop @{$self->{open_elements}};
4255 !!!next-token;
4256 }
4257
4258 if (not defined $self->{inner_html_node} and
4259 not ($self->{open_elements}->[-1]->[1] == FRAMESET_EL)) {
4260 !!!cp ('t327');
4261 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4262 } else {
4263 !!!cp ('t328');
4264 }
4265 next B;
4266 } elsif ($token->{tag_name} eq 'html' and
4267 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
4268 !!!cp ('t329');
4269 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
4270 !!!next-token;
4271 next B;
4272 } else {
4273 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4274 !!!cp ('t330');
4275 !!!parse-error (type => 'in frameset:/',
4276 text => $token->{tag_name}, token => $token);
4277 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4278 !!!cp ('t330.1');
4279 !!!parse-error (type => 'after frameset:/',
4280 text => $token->{tag_name}, token => $token);
4281 } else { # "after after html"
4282 !!!cp ('t331');
4283 !!!parse-error (type => 'after after frameset:/',
4284 text => $token->{tag_name}, token => $token);
4285 }
4286 ## Ignore the token
4287 !!!next-token;
4288 next B;
4289 }
4290 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4291 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4292 @{$self->{open_elements}} == 1) { # redundant, maybe
4293 !!!cp ('t331.1');
4294 !!!parse-error (type => 'in body:#eof', token => $token);
4295 } else {
4296 !!!cp ('t331.2');
4297 }
4298
4299 ## Stop parsing
4300 last B;
4301 } else {
4302 die "$0: $token->{type}: Unknown token type";
4303 }
4304 } else {
4305 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4306 }
4307
4308 ## "in body" insertion mode
4309 if ($token->{type} == START_TAG_TOKEN) {
4310 if ($token->{tag_name} eq 'script') {
4311 !!!cp ('t332');
4312 ## NOTE: This is an "as if in head" code clone
4313 $script_start_tag->();
4314 next B;
4315 } elsif ($token->{tag_name} eq 'style') {
4316 !!!cp ('t333');
4317 ## NOTE: This is an "as if in head" code clone
4318 $parse_rcdata->(CDATA_CONTENT_MODEL);
4319 next B;
4320 } elsif ({
4321 base => 1, command => 1, eventsource => 1, link => 1,
4322 }->{$token->{tag_name}}) {
4323 !!!cp ('t334');
4324 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4325 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4326 pop @{$self->{open_elements}};
4327 !!!ack ('t334.1');
4328 !!!next-token;
4329 next B;
4330 } elsif ($token->{tag_name} eq 'meta') {
4331 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4332 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4333 my $meta_el = pop @{$self->{open_elements}};
4334
4335 unless ($self->{confident}) {
4336 if ($token->{attributes}->{charset}) {
4337 !!!cp ('t335');
4338 ## NOTE: Whether the encoding is supported or not is handled
4339 ## in the {change_encoding} callback.
4340 $self->{change_encoding}
4341 ->($self, $token->{attributes}->{charset}->{value}, $token);
4342
4343 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4344 ->set_user_data (manakai_has_reference =>
4345 $token->{attributes}->{charset}
4346 ->{has_reference});
4347 } elsif ($token->{attributes}->{content}) {
4348 if ($token->{attributes}->{content}->{value}
4349 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4350 [\x09\x0A\x0C\x0D\x20]*=
4351 [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4352 ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
4353 /x) {
4354 !!!cp ('t336');
4355 ## NOTE: Whether the encoding is supported or not is handled
4356 ## in the {change_encoding} callback.
4357 $self->{change_encoding}
4358 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
4359 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4360 ->set_user_data (manakai_has_reference =>
4361 $token->{attributes}->{content}
4362 ->{has_reference});
4363 }
4364 }
4365 } else {
4366 if ($token->{attributes}->{charset}) {
4367 !!!cp ('t337');
4368 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4369 ->set_user_data (manakai_has_reference =>
4370 $token->{attributes}->{charset}
4371 ->{has_reference});
4372 }
4373 if ($token->{attributes}->{content}) {
4374 !!!cp ('t338');
4375 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4376 ->set_user_data (manakai_has_reference =>
4377 $token->{attributes}->{content}
4378 ->{has_reference});
4379 }
4380 }
4381
4382 !!!ack ('t338.1');
4383 !!!next-token;
4384 next B;
4385 } elsif ($token->{tag_name} eq 'title') {
4386 !!!cp ('t341');
4387 ## NOTE: This is an "as if in head" code clone
4388 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4389 next B;
4390 } elsif ($token->{tag_name} eq 'body') {
4391 !!!parse-error (type => 'in body', text => 'body', token => $token);
4392
4393 if (@{$self->{open_elements}} == 1 or
4394 not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
4395 !!!cp ('t342');
4396 ## Ignore the token
4397 } else {
4398 my $body_el = $self->{open_elements}->[1]->[0];
4399 for my $attr_name (keys %{$token->{attributes}}) {
4400 unless ($body_el->has_attribute_ns (undef, $attr_name)) {
4401 !!!cp ('t343');
4402 $body_el->set_attribute_ns
4403 (undef, [undef, $attr_name],
4404 $token->{attributes}->{$attr_name}->{value});
4405 }
4406 }
4407 }
4408 !!!nack ('t343.1');
4409 !!!next-token;
4410 next B;
4411 } elsif ({
4412 ## NOTE: Start tags for non-phrasing flow content elements
4413
4414 ## NOTE: The normal one
4415 address => 1, article => 1, aside => 1, blockquote => 1,
4416 center => 1, datagrid => 1, details => 1, dialog => 1,
4417 dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
4418 footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1,
4419 h6 => 1, header => 1, menu => 1, nav => 1, ol => 1, p => 1,
4420 section => 1, ul => 1,
4421 ## NOTE: As normal, but drops leading newline
4422 pre => 1, listing => 1,
4423 ## NOTE: As normal, but interacts with the form element pointer
4424 form => 1,
4425
4426 table => 1,
4427 hr => 1,
4428 }->{$token->{tag_name}}) {
4429 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
4430 !!!cp ('t350');
4431 !!!parse-error (type => 'in form:form', token => $token);
4432 ## Ignore the token
4433 !!!nack ('t350.1');
4434 !!!next-token;
4435 next B;
4436 }
4437
4438 if ($token->{tag_name} ne 'table' or # The Hixie Quirk
4439 $self->{document}->manakai_compat_mode ne 'quirks') {
4440 ## has a p element in scope
4441 INSCOPE: for (reverse @{$self->{open_elements}}) {
4442 if ($_->[1] == P_EL) {
4443 !!!cp ('t344');
4444 !!!back-token; # <form>
4445 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4446 line => $token->{line}, column => $token->{column}};
4447 next B;
4448 } elsif ($_->[1] & SCOPING_EL) {
4449 !!!cp ('t345');
4450 last INSCOPE;
4451 }
4452 } # INSCOPE
4453 }
4454
4455 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4456 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
4457 !!!nack ('t346.1');
4458 !!!next-token;
4459 if ($token->{type} == CHARACTER_TOKEN) {
4460 $token->{data} =~ s/^\x0A//;
4461 unless (length $token->{data}) {
4462 !!!cp ('t346');
4463 !!!next-token;
4464 } else {
4465 !!!cp ('t349');
4466 }
4467 } else {
4468 !!!cp ('t348');
4469 }
4470 } elsif ($token->{tag_name} eq 'form') {
4471 !!!cp ('t347.1');
4472 $self->{form_element} = $self->{open_elements}->[-1]->[0];
4473
4474 !!!nack ('t347.2');
4475 !!!next-token;
4476 } elsif ($token->{tag_name} eq 'table') {
4477 !!!cp ('t382');
4478 push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
4479
4480 $self->{insertion_mode} = IN_TABLE_IM;
4481
4482 !!!nack ('t382.1');
4483 !!!next-token;
4484 } elsif ($token->{tag_name} eq 'hr') {
4485 !!!cp ('t386');
4486 pop @{$self->{open_elements}};
4487
4488 !!!nack ('t386.1');
4489 !!!next-token;
4490 } else {
4491 !!!nack ('t347.1');
4492 !!!next-token;
4493 }
4494 next B;
4495 } elsif ($token->{tag_name} eq 'li') {
4496 ## NOTE: As normal, but imply </li> when there's another <li> ...
4497
4498 ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)
4499 ## Interpreted as <li><foo/></li><li/> (non-conforming)
4500 ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
4501 ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
4502 ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
4503 ## object (Fx)
4504 ## Generate non-tree (non-conforming)
4505 ## basefont (IE7 (where basefont is non-void)), center (IE),
4506 ## form (IE), hn (IE)
4507 ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)
4508 ## Interpreted as <li><foo><li/></foo></li> (non-conforming)
4509 ## div (Fx, S)
4510
4511 my $non_optional;
4512 my $i = -1;
4513
4514 ## 1.
4515 for my $node (reverse @{$self->{open_elements}}) {
4516 if ($node->[1] == LI_EL) {
4517 ## 2. (a) As if </li>
4518 {
4519 ## If no </li> - not applied
4520 #
4521
4522 ## Otherwise
4523
4524 ## 1. generate implied end tags, except for </li>
4525 #
4526
4527 ## 2. If current node != "li", parse error
4528 if ($non_optional) {
4529 !!!parse-error (type => 'not closed',
4530 text => $non_optional->[0]->manakai_local_name,
4531 token => $token);
4532 !!!cp ('t355');
4533 } else {
4534 !!!cp ('t356');
4535 }
4536
4537 ## 3. Pop
4538 splice @{$self->{open_elements}}, $i;
4539 }
4540
4541 last; ## 2. (b) goto 5.
4542 } elsif (
4543 ## NOTE: not "formatting" and not "phrasing"
4544 ($node->[1] & SPECIAL_EL or
4545 $node->[1] & SCOPING_EL) and
4546 ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4547 (not $node->[1] & ADDRESS_DIV_P_EL)
4548 ) {
4549 ## 3.
4550 !!!cp ('t357');
4551 last; ## goto 5.
4552 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4553 !!!cp ('t358');
4554 #
4555 } else {
4556 !!!cp ('t359');
4557 $non_optional ||= $node;
4558 #
4559 }
4560 ## 4.
4561 ## goto 2.
4562 $i--;
4563 }
4564
4565 ## 5. (a) has a |p| element in scope
4566 INSCOPE: for (reverse @{$self->{open_elements}}) {
4567 if ($_->[1] == P_EL) {
4568 !!!cp ('t353');
4569
4570 ## NOTE: |<p><li>|, for example.
4571
4572 !!!back-token; # <x>
4573 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4574 line => $token->{line}, column => $token->{column}};
4575 next B;
4576 } elsif ($_->[1] & SCOPING_EL) {
4577 !!!cp ('t354');
4578 last INSCOPE;
4579 }
4580 } # INSCOPE
4581
4582 ## 5. (b) insert
4583 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4584 !!!nack ('t359.1');
4585 !!!next-token;
4586 next B;
4587 } elsif ($token->{tag_name} eq 'dt' or
4588 $token->{tag_name} eq 'dd') {
4589 ## NOTE: As normal, but imply </dt> or </dd> when ...
4590
4591 my $non_optional;
4592 my $i = -1;
4593
4594 ## 1.
4595 for my $node (reverse @{$self->{open_elements}}) {
4596 if ($node->[1] == DTDD_EL) {
4597 ## 2. (a) As if </li>
4598 {
4599 ## If no </li> - not applied
4600 #
4601
4602 ## Otherwise
4603
4604 ## 1. generate implied end tags, except for </dt> or </dd>
4605 #
4606
4607 ## 2. If current node != "dt"|"dd", parse error
4608 if ($non_optional) {
4609 !!!parse-error (type => 'not closed',
4610 text => $non_optional->[0]->manakai_local_name,
4611 token => $token);
4612 !!!cp ('t355.1');
4613 } else {
4614 !!!cp ('t356.1');
4615 }
4616
4617 ## 3. Pop
4618 splice @{$self->{open_elements}}, $i;
4619 }
4620
4621 last; ## 2. (b) goto 5.
4622 } elsif (
4623 ## NOTE: not "formatting" and not "phrasing"
4624 ($node->[1] & SPECIAL_EL or
4625 $node->[1] & SCOPING_EL) and
4626 ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4627
4628 (not $node->[1] & ADDRESS_DIV_P_EL)
4629 ) {
4630 ## 3.
4631 !!!cp ('t357.1');
4632 last; ## goto 5.
4633 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4634 !!!cp ('t358.1');
4635 #
4636 } else {
4637 !!!cp ('t359.1');
4638 $non_optional ||= $node;
4639 #
4640 }
4641 ## 4.
4642 ## goto 2.
4643 $i--;
4644 }
4645
4646 ## 5. (a) has a |p| element in scope
4647 INSCOPE: for (reverse @{$self->{open_elements}}) {
4648 if ($_->[1] == P_EL) {
4649 !!!cp ('t353.1');
4650 !!!back-token; # <x>
4651 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4652 line => $token->{line}, column => $token->{column}};
4653 next B;
4654 } elsif ($_->[1] & SCOPING_EL) {
4655 !!!cp ('t354.1');
4656 last INSCOPE;
4657 }
4658 } # INSCOPE
4659
4660 ## 5. (b) insert
4661 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4662 !!!nack ('t359.2');
4663 !!!next-token;
4664 next B;
4665 } elsif ($token->{tag_name} eq 'plaintext') {
4666 ## NOTE: As normal, but effectively ends parsing
4667
4668 ## has a p element in scope
4669 INSCOPE: for (reverse @{$self->{open_elements}}) {
4670 if ($_->[1] == P_EL) {
4671 !!!cp ('t367');
4672 !!!back-token; # <plaintext>
4673 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4674 line => $token->{line}, column => $token->{column}};
4675 next B;
4676 } elsif ($_->[1] & SCOPING_EL) {
4677 !!!cp ('t368');
4678 last INSCOPE;
4679 }
4680 } # INSCOPE
4681
4682 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4683
4684 $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
4685
4686 !!!nack ('t368.1');
4687 !!!next-token;
4688 next B;
4689 } elsif ($token->{tag_name} eq 'a') {
4690 AFE: for my $i (reverse 0..$#$active_formatting_elements) {
4691 my $node = $active_formatting_elements->[$i];
4692 if ($node->[1] == A_EL) {
4693 !!!cp ('t371');
4694 !!!parse-error (type => 'in a:a', token => $token);
4695
4696 !!!back-token; # <a>
4697 $token = {type => END_TAG_TOKEN, tag_name => 'a',
4698 line => $token->{line}, column => $token->{column}};
4699 $formatting_end_tag->($token);
4700
4701 AFE2: for (reverse 0..$#$active_formatting_elements) {
4702 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4703 !!!cp ('t372');
4704 splice @$active_formatting_elements, $_, 1;
4705 last AFE2;
4706 }
4707 } # AFE2
4708 OE: for (reverse 0..$#{$self->{open_elements}}) {
4709 if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
4710 !!!cp ('t373');
4711 splice @{$self->{open_elements}}, $_, 1;
4712 last OE;
4713 }
4714 } # OE
4715 last AFE;
4716 } elsif ($node->[0] eq '#marker') {
4717 !!!cp ('t374');
4718 last AFE;
4719 }
4720 } # AFE
4721
4722 $reconstruct_active_formatting_elements->($insert_to_current);
4723
4724 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4725 push @$active_formatting_elements, $self->{open_elements}->[-1];
4726
4727 !!!nack ('t374.1');
4728 !!!next-token;
4729 next B;
4730 } elsif ($token->{tag_name} eq 'nobr') {
4731 $reconstruct_active_formatting_elements->($insert_to_current);
4732
4733 ## has a |nobr| element in scope
4734 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4735 my $node = $self->{open_elements}->[$_];
4736 if ($node->[1] == NOBR_EL) {
4737 !!!cp ('t376');
4738 !!!parse-error (type => 'in nobr:nobr', token => $token);
4739 !!!back-token; # <nobr>
4740 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
4741 line => $token->{line}, column => $token->{column}};
4742 next B;
4743 } elsif ($node->[1] & SCOPING_EL) {
4744 !!!cp ('t377');
4745 last INSCOPE;
4746 }
4747 } # INSCOPE
4748
4749 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4750 push @$active_formatting_elements, $self->{open_elements}->[-1];
4751
4752 !!!nack ('t377.1');
4753 !!!next-token;
4754 next B;
4755 } elsif ($token->{tag_name} eq 'button') {
4756 ## has a button element in scope
4757 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4758 my $node = $self->{open_elements}->[$_];
4759 if ($node->[1] == BUTTON_EL) {
4760 !!!cp ('t378');
4761 !!!parse-error (type => 'in button:button', token => $token);
4762 !!!back-token; # <button>
4763 $token = {type => END_TAG_TOKEN, tag_name => 'button',
4764 line => $token->{line}, column => $token->{column}};
4765 next B;
4766 } elsif ($node->[1] & SCOPING_EL) {
4767 !!!cp ('t379');
4768 last INSCOPE;
4769 }
4770 } # INSCOPE
4771
4772 $reconstruct_active_formatting_elements->($insert_to_current);
4773
4774 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4775
4776 ## TODO: associate with $self->{form_element} if defined
4777
4778 push @$active_formatting_elements, ['#marker', ''];
4779
4780 !!!nack ('t379.1');
4781 !!!next-token;
4782 next B;
4783 } elsif ({
4784 xmp => 1,
4785 iframe => 1,
4786 noembed => 1,
4787 noframes => 1, ## NOTE: This is an "as if in head" code clone.
4788 noscript => 0, ## TODO: 1 if scripting is enabled
4789 }->{$token->{tag_name}}) {
4790 if ($token->{tag_name} eq 'xmp') {
4791 !!!cp ('t381');
4792 $reconstruct_active_formatting_elements->($insert_to_current);
4793 } else {
4794 !!!cp ('t399');
4795 }
4796 ## NOTE: There is an "as if in body" code clone.
4797 $parse_rcdata->(CDATA_CONTENT_MODEL);
4798 next B;
4799 } elsif ($token->{tag_name} eq 'isindex') {
4800 !!!parse-error (type => 'isindex', token => $token);
4801
4802 if (defined $self->{form_element}) {
4803 !!!cp ('t389');
4804 ## Ignore the token
4805 !!!nack ('t389'); ## NOTE: Not acknowledged.
4806 !!!next-token;
4807 next B;
4808 } else {
4809 !!!ack ('t391.1');
4810
4811 my $at = $token->{attributes};
4812 my $form_attrs;
4813 $form_attrs->{action} = $at->{action} if $at->{action};
4814 my $prompt_attr = $at->{prompt};
4815 $at->{name} = {name => 'name', value => 'isindex'};
4816 delete $at->{action};
4817 delete $at->{prompt};
4818 my @tokens = (
4819 {type => START_TAG_TOKEN, tag_name => 'form',
4820 attributes => $form_attrs,
4821 line => $token->{line}, column => $token->{column}},
4822 {type => START_TAG_TOKEN, tag_name => 'hr',
4823 line => $token->{line}, column => $token->{column}},
4824 {type => START_TAG_TOKEN, tag_name => 'label',
4825 line => $token->{line}, column => $token->{column}},
4826 );
4827 if ($prompt_attr) {
4828 !!!cp ('t390');
4829 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
4830 #line => $token->{line}, column => $token->{column},
4831 };
4832 } else {
4833 !!!cp ('t391');
4834 push @tokens, {type => CHARACTER_TOKEN,
4835 data => 'This is a searchable index. Insert your search keywords here: ',
4836 #line => $token->{line}, column => $token->{column},
4837 }; # SHOULD
4838 ## TODO: make this configurable
4839 }
4840 push @tokens,
4841 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
4842 line => $token->{line}, column => $token->{column}},
4843 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
4844 {type => END_TAG_TOKEN, tag_name => 'label',
4845 line => $token->{line}, column => $token->{column}},
4846 {type => START_TAG_TOKEN, tag_name => 'hr',
4847 line => $token->{line}, column => $token->{column}},
4848 {type => END_TAG_TOKEN, tag_name => 'form',
4849 line => $token->{line}, column => $token->{column}};
4850 !!!back-token (@tokens);
4851 !!!next-token;
4852 next B;
4853 }
4854 } elsif ($token->{tag_name} eq 'textarea') {
4855 ## 1. Insert
4856 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4857
4858 ## Step 2 # XXX
4859 ## TODO: $self->{form_element} if defined
4860
4861 ## 2. Drop U+000A LINE FEED
4862 $self->{ignore_newline} = 1;
4863
4864 ## 3. RCDATA
4865 $self->{content_model} = RCDATA_CONTENT_MODEL;
4866 delete $self->{escape}; # MUST
4867
4868 ## 4., 6. Insertion mode
4869 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
4870
4871 ## XXX: 5. frameset-ok flag
4872
4873 !!!nack ('t392.1');
4874 !!!next-token;
4875 next B;
4876 } elsif ($token->{tag_name} eq 'optgroup' or
4877 $token->{tag_name} eq 'option') {
4878 ## has an |option| element in scope
4879 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4880 my $node = $self->{open_elements}->[$_];
4881 if ($node->[1] == OPTION_EL) {
4882 !!!cp ('t397.1');
4883 ## NOTE: As if </option>
4884 !!!back-token; # <option> or <optgroup>
4885 $token = {type => END_TAG_TOKEN, tag_name => 'option',
4886 line => $token->{line}, column => $token->{column}};
4887 next B;
4888 } elsif ($node->[1] & SCOPING_EL) {
4889 !!!cp ('t397.2');
4890 last INSCOPE;
4891 }
4892 } # INSCOPE
4893
4894 $reconstruct_active_formatting_elements->($insert_to_current);
4895
4896 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4897
4898 !!!nack ('t397.3');
4899 !!!next-token;
4900 redo B;
4901 } elsif ($token->{tag_name} eq 'rt' or
4902 $token->{tag_name} eq 'rp') {
4903 ## has a |ruby| element in scope
4904 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4905 my $node = $self->{open_elements}->[$_];
4906 if ($node->[1] == RUBY_EL) {
4907 !!!cp ('t398.1');
4908 ## generate implied end tags
4909 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4910 !!!cp ('t398.2');
4911 pop @{$self->{open_elements}};
4912 }
4913 unless ($self->{open_elements}->[-1]->[1] == RUBY_EL) {
4914 !!!cp ('t398.3');
4915 !!!parse-error (type => 'not closed',
4916 text => $self->{open_elements}->[-1]->[0]
4917 ->manakai_local_name,
4918 token => $token);
4919 pop @{$self->{open_elements}}
4920 while not $self->{open_elements}->[-1]->[1] == RUBY_EL;
4921 }
4922 last INSCOPE;
4923 } elsif ($node->[1] & SCOPING_EL) {
4924 !!!cp ('t398.4');
4925 last INSCOPE;
4926 }
4927 } # INSCOPE
4928
4929 ## TODO: <non-ruby><rt> is not allowed.
4930
4931 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4932
4933 !!!nack ('t398.5');
4934 !!!next-token;
4935 redo B;
4936 } elsif ($token->{tag_name} eq 'math' or
4937 $token->{tag_name} eq 'svg') {
4938 $reconstruct_active_formatting_elements->($insert_to_current);
4939
4940 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
4941
4942 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
4943
4944 ## "adjust foreign attributes" - done in insert-element-f
4945
4946 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
4947
4948 if ($self->{self_closing}) {
4949 pop @{$self->{open_elements}};
4950 !!!ack ('t398.6');
4951 } else {
4952 !!!cp ('t398.7');
4953 $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
4954 ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
4955 ## mode, "in body" (not "in foreign content") secondary insertion
4956 ## mode, maybe.
4957 }
4958
4959 !!!next-token;
4960 next B;
4961 } elsif ({
4962 caption => 1, col => 1, colgroup => 1, frame => 1,
4963 frameset => 1, head => 1,
4964 tbody => 1, td => 1, tfoot => 1, th => 1,
4965 thead => 1, tr => 1,
4966 }->{$token->{tag_name}}) {
4967 !!!cp ('t401');
4968 !!!parse-error (type => 'in body',
4969 text => $token->{tag_name}, token => $token);
4970 ## Ignore the token
4971 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
4972 !!!next-token;
4973 next B;
4974 } elsif ($token->{tag_name} eq 'param' or
4975 $token->{tag_name} eq 'source') {
4976 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4977 pop @{$self->{open_elements}};
4978
4979 !!!ack ('t398.5');
4980 !!!next-token;
4981 redo B;
4982 } else {
4983 if ($token->{tag_name} eq 'image') {
4984 !!!cp ('t384');
4985 !!!parse-error (type => 'image', token => $token);
4986 $token->{tag_name} = 'img';
4987 } else {
4988 !!!cp ('t385');
4989 }
4990
4991 ## NOTE: There is an "as if <br>" code clone.
4992 $reconstruct_active_formatting_elements->($insert_to_current);
4993
4994 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4995
4996 if ({
4997 applet => 1, marquee => 1, object => 1,
4998 }->{$token->{tag_name}}) {
4999 !!!cp ('t380');
5000 push @$active_formatting_elements, ['#marker', ''];
5001 !!!nack ('t380.1');
5002 } elsif ({
5003 b => 1, big => 1, em => 1, font => 1, i => 1,
5004 s => 1, small => 1, strike => 1,
5005 strong => 1, tt => 1, u => 1,
5006 }->{$token->{tag_name}}) {
5007 !!!cp ('t375');
5008 push @$active_formatting_elements, $self->{open_elements}->[-1];
5009 !!!nack ('t375.1');
5010 } elsif ($token->{tag_name} eq 'input') {
5011 !!!cp ('t388');
5012 ## TODO: associate with $self->{form_element} if defined
5013 pop @{$self->{open_elements}};
5014 !!!ack ('t388.2');
5015 } elsif ({
5016 area => 1, basefont => 1, bgsound => 1, br => 1,
5017 embed => 1, img => 1, spacer => 1, wbr => 1,
5018 }->{$token->{tag_name}}) {
5019 !!!cp ('t388.1');
5020 pop @{$self->{open_elements}};
5021 !!!ack ('t388.3');
5022 } elsif ($token->{tag_name} eq 'select') {
5023 ## TODO: associate with $self->{form_element} if defined
5024
5025 if ($self->{insertion_mode} & TABLE_IMS or
5026 $self->{insertion_mode} & BODY_TABLE_IMS or
5027 ($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
5028 !!!cp ('t400.1');
5029 $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
5030 } else {
5031 !!!cp ('t400.2');
5032 $self->{insertion_mode} = IN_SELECT_IM;
5033 }
5034 !!!nack ('t400.3');
5035 } else {
5036 !!!nack ('t402');
5037 }
5038
5039 !!!next-token;
5040 next B;
5041 }
5042 } elsif ($token->{type} == END_TAG_TOKEN) {
5043 if ($token->{tag_name} eq 'body') {
5044 ## has a |body| element in scope
5045 my $i;
5046 INSCOPE: {
5047 for (reverse @{$self->{open_elements}}) {
5048 if ($_->[1] == BODY_EL) {
5049 !!!cp ('t405');
5050 $i = $_;
5051 last INSCOPE;
5052 } elsif ($_->[1] & SCOPING_EL) {
5053 !!!cp ('t405.1');
5054 last;
5055 }
5056 }
5057
5058 ## NOTE: |<marquee></body>|, |<svg><foreignobject></body>|
5059
5060 !!!parse-error (type => 'unmatched end tag',
5061 text => $token->{tag_name}, token => $token);
5062 ## NOTE: Ignore the token.
5063 !!!next-token;
5064 next B;
5065 } # INSCOPE
5066
5067 for (@{$self->{open_elements}}) {
5068 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL ||
5069 $_->[1] == OPTGROUP_EL ||
5070 $_->[1] == OPTION_EL ||
5071 $_->[1] == RUBY_COMPONENT_EL) {
5072 !!!cp ('t403');
5073 !!!parse-error (type => 'not closed',
5074 text => $_->[0]->manakai_local_name,
5075 token => $token);
5076 last;
5077 } else {
5078 !!!cp ('t404');
5079 }
5080 }
5081
5082 $self->{insertion_mode} = AFTER_BODY_IM;
5083 !!!next-token;
5084 next B;
5085 } elsif ($token->{tag_name} eq 'html') {
5086 ## TODO: Update this code. It seems that the code below is not
5087 ## up-to-date, though it has same effect as speced.
5088 if (@{$self->{open_elements}} > 1 and
5089 $self->{open_elements}->[1]->[1] == BODY_EL) {
5090 unless ($self->{open_elements}->[-1]->[1] == BODY_EL) {
5091 !!!cp ('t406');
5092 !!!parse-error (type => 'not closed',
5093 text => $self->{open_elements}->[1]->[0]
5094 ->manakai_local_name,
5095 token => $token);
5096 } else {
5097 !!!cp ('t407');
5098 }
5099 $self->{insertion_mode} = AFTER_BODY_IM;
5100 ## reprocess
5101 next B;
5102 } else {
5103 !!!cp ('t408');
5104 !!!parse-error (type => 'unmatched end tag',
5105 text => $token->{tag_name}, token => $token);
5106 ## Ignore the token
5107 !!!next-token;
5108 next B;
5109 }
5110 } elsif ({
5111 ## NOTE: End tags for non-phrasing flow content elements
5112
5113 ## NOTE: The normal ones
5114 address => 1, article => 1, aside => 1, blockquote => 1,
5115 center => 1, datagrid => 1, details => 1, dialog => 1,
5116 dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
5117 footer => 1, header => 1, listing => 1, menu => 1, nav => 1,
5118 ol => 1, pre => 1, section => 1, ul => 1,
5119
5120 ## NOTE: As normal, but ... optional tags
5121 dd => 1, dt => 1, li => 1,
5122
5123 applet => 1, button => 1, marquee => 1, object => 1,
5124 }->{$token->{tag_name}}) {
5125 ## NOTE: Code for <li> start tags includes "as if </li>" code.
5126 ## Code for <dt> or <dd> start tags includes "as if </dt> or
5127 ## </dd>" code.
5128
5129 ## has an element in scope
5130 my $i;
5131 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5132 my $node = $self->{open_elements}->[$_];
5133 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5134 !!!cp ('t410');
5135 $i = $_;
5136 last INSCOPE;
5137 } elsif ($node->[1] & SCOPING_EL) {
5138 !!!cp ('t411');
5139 last INSCOPE;
5140 }
5141 } # INSCOPE
5142
5143 unless (defined $i) { # has an element in scope
5144 !!!cp ('t413');
5145 !!!parse-error (type => 'unmatched end tag',
5146 text => $token->{tag_name}, token => $token);
5147 ## NOTE: Ignore the token.
5148 } else {
5149 ## Step 1. generate implied end tags
5150 while ({
5151 ## END_TAG_OPTIONAL_EL
5152 dd => ($token->{tag_name} ne 'dd'),
5153 dt => ($token->{tag_name} ne 'dt'),
5154 li => ($token->{tag_name} ne 'li'),
5155 option => 1,
5156 optgroup => 1,
5157 p => 1,
5158 rt => 1,
5159 rp => 1,
5160 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
5161 !!!cp ('t409');
5162 pop @{$self->{open_elements}};
5163 }
5164
5165 ## Step 2.
5166 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5167 ne $token->{tag_name}) {
5168 !!!cp ('t412');
5169 !!!parse-error (type => 'not closed',
5170 text => $self->{open_elements}->[-1]->[0]
5171 ->manakai_local_name,
5172 token => $token);
5173 } else {
5174 !!!cp ('t414');
5175 }
5176
5177 ## Step 3.
5178 splice @{$self->{open_elements}}, $i;
5179
5180 ## Step 4.
5181 $clear_up_to_marker->()
5182 if {
5183 applet => 1, button => 1, marquee => 1, object => 1,
5184 }->{$token->{tag_name}};
5185 }
5186 !!!next-token;
5187 next B;
5188 } elsif ($token->{tag_name} eq 'form') {
5189 ## NOTE: As normal, but interacts with the form element pointer
5190
5191 undef $self->{form_element};
5192
5193 ## has an element in scope
5194 my $i;
5195 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5196 my $node = $self->{open_elements}->[$_];
5197 if ($node->[1] == FORM_EL) {
5198 !!!cp ('t418');
5199 $i = $_;
5200 last INSCOPE;
5201 } elsif ($node->[1] & SCOPING_EL) {
5202 !!!cp ('t419');
5203 last INSCOPE;
5204 }
5205 } # INSCOPE
5206
5207 unless (defined $i) { # has an element in scope
5208 !!!cp ('t421');
5209 !!!parse-error (type => 'unmatched end tag',
5210 text => $token->{tag_name}, token => $token);
5211 ## NOTE: Ignore the token.
5212 } else {
5213 ## Step 1. generate implied end tags
5214 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5215 !!!cp ('t417');
5216 pop @{$self->{open_elements}};
5217 }
5218
5219 ## Step 2.
5220 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5221 ne $token->{tag_name}) {
5222 !!!cp ('t417.1');
5223 !!!parse-error (type => 'not closed',
5224 text => $self->{open_elements}->[-1]->[0]
5225 ->manakai_local_name,
5226 token => $token);
5227 } else {
5228 !!!cp ('t420');
5229 }
5230
5231 ## Step 3.
5232 splice @{$self->{open_elements}}, $i;
5233 }
5234
5235 !!!next-token;
5236 next B;
5237 } elsif ({
5238 ## NOTE: As normal, except acts as a closer for any ...
5239 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5240 }->{$token->{tag_name}}) {
5241 ## has an element in scope
5242 my $i;
5243 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5244 my $node = $self->{open_elements}->[$_];
5245 if ($node->[1] == HEADING_EL) {
5246 !!!cp ('t423');
5247 $i = $_;
5248 last INSCOPE;
5249 } elsif ($node->[1] & SCOPING_EL) {
5250 !!!cp ('t424');
5251 last INSCOPE;
5252 }
5253 } # INSCOPE
5254
5255 unless (defined $i) { # has an element in scope
5256 !!!cp ('t425.1');
5257 !!!parse-error (type => 'unmatched end tag',
5258 text => $token->{tag_name}, token => $token);
5259 ## NOTE: Ignore the token.
5260 } else {
5261 ## Step 1. generate implied end tags
5262 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5263 !!!cp ('t422');
5264 pop @{$self->{open_elements}};
5265 }
5266
5267 ## Step 2.
5268 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5269 ne $token->{tag_name}) {
5270 !!!cp ('t425');
5271 !!!parse-error (type => 'unmatched end tag',
5272 text => $token->{tag_name}, token => $token);
5273 } else {
5274 !!!cp ('t426');
5275 }
5276
5277 ## Step 3.
5278 splice @{$self->{open_elements}}, $i;
5279 }
5280
5281 !!!next-token;
5282 next B;
5283 } elsif ($token->{tag_name} eq 'p') {
5284 ## NOTE: As normal, except </p> implies <p> and ...
5285
5286 ## has an element in scope
5287 my $non_optional;
5288 my $i;
5289 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5290 my $node = $self->{open_elements}->[$_];
5291 if ($node->[1] == P_EL) {
5292 !!!cp ('t410.1');
5293 $i = $_;
5294 last INSCOPE;
5295 } elsif ($node->[1] & SCOPING_EL) {
5296 !!!cp ('t411.1');
5297 last INSCOPE;
5298 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
5299 ## NOTE: |END_TAG_OPTIONAL_EL| includes "p"
5300 !!!cp ('t411.2');
5301 #
5302 } else {
5303 !!!cp ('t411.3');
5304 $non_optional ||= $node;
5305 #
5306 }
5307 } # INSCOPE
5308
5309 if (defined $i) {
5310 ## 1. Generate implied end tags
5311 #
5312
5313 ## 2. If current node != "p", parse error
5314 if ($non_optional) {
5315 !!!cp ('t412.1');
5316 !!!parse-error (type => 'not closed',
5317 text => $non_optional->[0]->manakai_local_name,
5318 token => $token);
5319 } else {
5320 !!!cp ('t414.1');
5321 }
5322
5323 ## 3. Pop
5324 splice @{$self->{open_elements}}, $i;
5325 } else {
5326 !!!cp ('t413.1');
5327 !!!parse-error (type => 'unmatched end tag',
5328 text => $token->{tag_name}, token => $token);
5329
5330 !!!cp ('t415.1');
5331 ## As if <p>, then reprocess the current token
5332 my $el;
5333 !!!create-element ($el, $HTML_NS, 'p',, $token);
5334 $insert->($el);
5335 ## NOTE: Not inserted into |$self->{open_elements}|.
5336 }
5337
5338 !!!next-token;
5339 next B;
5340 } elsif ({
5341 a => 1,
5342 b => 1, big => 1, em => 1, font => 1, i => 1,
5343 nobr => 1, s => 1, small => 1, strike => 1,
5344 strong => 1, tt => 1, u => 1,
5345 }->{$token->{tag_name}}) {
5346 !!!cp ('t427');
5347 $formatting_end_tag->($token);
5348 next B;
5349 } elsif ($token->{tag_name} eq 'br') {
5350 !!!cp ('t428');
5351 !!!parse-error (type => 'unmatched end tag',
5352 text => 'br', token => $token);
5353
5354 ## As if <br>
5355 $reconstruct_active_formatting_elements->($insert_to_current);
5356
5357 my $el;
5358 !!!create-element ($el, $HTML_NS, 'br',, $token);
5359 $insert->($el);
5360
5361 ## Ignore the token.
5362 !!!next-token;
5363 next B;
5364 } else {
5365 if ($token->{tag_name} eq 'sarcasm') {
5366 sleep 0.001; # take a deep breath
5367 }
5368
5369 ## Step 1
5370 my $node_i = -1;
5371 my $node = $self->{open_elements}->[$node_i];
5372
5373 ## Step 2
5374 S2: {
5375 my $node_tag_name = $node->[0]->manakai_local_name;
5376 $node_tag_name =~ tr/A-Z/a-z/; # for SVG camelCase tag names
5377 if ($node_tag_name eq $token->{tag_name}) {
5378 ## Step 1
5379 ## generate implied end tags
5380 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5381 !!!cp ('t430');
5382 ## NOTE: |<ruby><rt></ruby>|.
5383 ## ISSUE: <ruby><rt></rt> will also take this code path,
5384 ## which seems wrong.
5385 pop @{$self->{open_elements}};
5386 $node_i++;
5387 }
5388
5389 ## Step 2
5390 my $current_tag_name
5391 = $self->{open_elements}->[-1]->[0]->manakai_local_name;
5392 $current_tag_name =~ tr/A-Z/a-z/;
5393 if ($current_tag_name ne $token->{tag_name}) {
5394 !!!cp ('t431');
5395 ## NOTE: <x><y></x>
5396 !!!parse-error (type => 'not closed',
5397 text => $self->{open_elements}->[-1]->[0]
5398 ->manakai_local_name,
5399 token => $token);
5400 } else {
5401 !!!cp ('t432');
5402 }
5403
5404 ## Step 3
5405 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
5406
5407 !!!next-token;
5408 last S2;
5409 } else {
5410 ## Step 3
5411 if (not ($node->[1] & FORMATTING_EL) and
5412 #not $phrasing_category->{$node->[1]} and
5413 ($node->[1] & SPECIAL_EL or
5414 $node->[1] & SCOPING_EL)) {
5415 !!!cp ('t433');
5416 !!!parse-error (type => 'unmatched end tag',
5417 text => $token->{tag_name}, token => $token);
5418 ## Ignore the token
5419 !!!next-token;
5420 last S2;
5421
5422 ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
5423 ## 9.27, "a" is a child of <dd> (conforming). In
5424 ## Firefox 3.0.2, "a" is a child of <body>. In WinIE 7,
5425 ## "a" is a child of both <body> and <dd>.
5426 }
5427
5428 !!!cp ('t434');
5429 }
5430
5431 ## Step 4
5432 $node_i--;
5433 $node = $self->{open_elements}->[$node_i];
5434
5435 ## Step 5;
5436 redo S2;
5437 } # S2
5438 next B;
5439 }
5440 }
5441 next B;
5442 } continue { # B
5443 if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
5444 ## NOTE: The code below is executed in cases where it does not have
5445 ## to be, but it it is harmless even in those cases.
5446 ## has an element in scope
5447 INSCOPE: {
5448 for (reverse 0..$#{$self->{open_elements}}) {
5449 my $node = $self->{open_elements}->[$_];
5450 if ($node->[1] & FOREIGN_EL) {
5451 last INSCOPE;
5452 } elsif ($node->[1] & SCOPING_EL) {
5453 last;
5454 }
5455 }
5456
5457 ## NOTE: No foreign element in scope.
5458 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
5459 } # INSCOPE
5460 }
5461 } # B
5462
5463 ## Stop parsing # MUST
5464
5465 ## TODO: script stuffs
5466 } # _tree_construct_main
5467
5468 ## XXX: How this method is organized is somewhat out of date, although
5469 ## it still does what the current spec documents.
5470 sub set_inner_html ($$$$;$) {
5471 my $class = shift;
5472 my $node = shift; # /context/
5473 #my $s = \$_[0];
5474 my $onerror = $_[1];
5475 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
5476
5477 ## ISSUE: Should {confident} be true?
5478
5479 my $nt = $node->node_type;
5480 if ($nt == 9) { # Document (invoke the algorithm with no /context/ element)
5481 # MUST
5482
5483 ## Step 1 # MUST
5484 ## TODO: If the document has an active parser, ...
5485 ## ISSUE: There is an issue in the spec.
5486
5487 ## Step 2 # MUST
5488 my @cn = @{$node->child_nodes};
5489 for (@cn) {
5490 $node->remove_child ($_);
5491 }
5492
5493 ## Step 3, 4, 5 # MUST
5494 $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
5495 } elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element)
5496 ## TODO: If non-html element
5497
5498 ## NOTE: Most of this code is copied from |parse_string|
5499
5500 ## TODO: Support for $get_wrapper
5501
5502 ## F1. Create an HTML document.
5503 my $this_doc = $node->owner_document;
5504 my $doc = $this_doc->implementation->create_document;
5505 $doc->manakai_is_html (1);
5506
5507 ## F2. Propagate quirkness flag
5508 my $node_doc = $node->owner_document;
5509 $doc->manakai_compat_mode ($node_doc->manakai_compat_mode);
5510
5511 ## F3. Create an HTML parser
5512 my $p = $class->new;
5513 $p->{document} = $doc;
5514
5515 ## Step 8 # MUST
5516 my $i = 0;
5517 $p->{line_prev} = $p->{line} = 1;
5518 $p->{column_prev} = $p->{column} = 0;
5519 require Whatpm::Charset::DecodeHandle;
5520 my $input = Whatpm::Charset::DecodeHandle::CharString->new (\($_[0]));
5521 $input = $get_wrapper->($input);
5522 $p->{set_nc} = sub {
5523 my $self = shift;
5524
5525 my $char = '';
5526 if (defined $self->{next_nc}) {
5527 $char = $self->{next_nc};
5528 delete $self->{next_nc};
5529 $self->{nc} = ord $char;
5530 } else {
5531 $self->{char_buffer} = '';
5532 $self->{char_buffer_pos} = 0;
5533
5534 my $count = $input->manakai_read_until
5535 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/,
5536 $self->{char_buffer_pos});
5537 if ($count) {
5538 $self->{line_prev} = $self->{line};
5539 $self->{column_prev} = $self->{column};
5540 $self->{column}++;
5541 $self->{nc}
5542 = ord substr ($self->{char_buffer},
5543 $self->{char_buffer_pos}++, 1);
5544 return;
5545 }
5546
5547 if ($input->read ($char, 1)) {
5548 $self->{nc} = ord $char;
5549 } else {
5550 $self->{nc} = -1;
5551 return;
5552 }
5553 }
5554
5555 ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
5556 $p->{column}++;
5557
5558 if ($self->{nc} == 0x000A) { # LF
5559 $p->{line}++;
5560 $p->{column} = 0;
5561 !!!cp ('i1');
5562 } elsif ($self->{nc} == 0x000D) { # CR
5563 ## TODO: support for abort/streaming
5564 my $next = '';
5565 if ($input->read ($next, 1) and $next ne "\x0A") {
5566 $self->{next_nc} = $next;
5567 }
5568 $self->{nc} = 0x000A; # LF # MUST
5569 $p->{line}++;
5570 $p->{column} = 0;
5571 !!!cp ('i2');
5572 } elsif ($self->{nc} == 0x0000) { # NULL
5573 !!!cp ('i4');
5574 !!!parse-error (type => 'NULL');
5575 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5576 }
5577 };
5578
5579 $p->{read_until} = sub {
5580 #my ($scalar, $specials_range, $offset) = @_;
5581 return 0 if defined $p->{next_nc};
5582
5583 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
5584 my $offset = $_[2] || 0;
5585
5586 if ($p->{char_buffer_pos} < length $p->{char_buffer}) {
5587 pos ($p->{char_buffer}) = $p->{char_buffer_pos};
5588 if ($p->{char_buffer} =~ /\G(?>$pattern)+/) {
5589 substr ($_[0], $offset)
5590 = substr ($p->{char_buffer}, $-[0], $+[0] - $-[0]);
5591 my $count = $+[0] - $-[0];
5592 if ($count) {
5593 $p->{column} += $count;
5594 $p->{char_buffer_pos} += $count;
5595 $p->{line_prev} = $p->{line};
5596 $p->{column_prev} = $p->{column} - 1;
5597 $p->{nc} = -1;
5598 }
5599 return $count;
5600 } else {
5601 return 0;
5602 }
5603 } else {
5604 my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
5605 if ($count) {
5606 $p->{column} += $count;
5607 $p->{column_prev} += $count;
5608 $p->{nc} = -1;
5609 }
5610 return $count;
5611 }
5612 }; # $p->{read_until}
5613
5614 my $ponerror = $onerror || sub {
5615 my (%opt) = @_;
5616 my $line = $opt{line};
5617 my $column = $opt{column};
5618 if (defined $opt{token} and defined $opt{token}->{line}) {
5619 $line = $opt{token}->{line};
5620 $column = $opt{token}->{column};
5621 }
5622 warn "Parse error ($opt{type}) at line $line column $column\n";
5623 };
5624 $p->{parse_error} = sub {
5625 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
5626 };
5627
5628 my $char_onerror = sub {
5629 my (undef, $type, %opt) = @_;
5630 $ponerror->(layer => 'encode',
5631 line => $p->{line}, column => $p->{column} + 1,
5632 %opt, type => $type);
5633 }; # $char_onerror
5634 $input->onerror ($char_onerror);
5635
5636 $p->_initialize_tokenizer;
5637 $p->_initialize_tree_constructor;
5638
5639 ## F4. If /context/ is not undef...
5640
5641 ## F4.1. content model flag
5642 my $node_ln = $node->manakai_local_name;
5643 $p->{content_model} = {
5644 title => RCDATA_CONTENT_MODEL,
5645 textarea => RCDATA_CONTENT_MODEL,
5646 style => CDATA_CONTENT_MODEL,
5647 script => CDATA_CONTENT_MODEL,
5648 xmp => CDATA_CONTENT_MODEL,
5649 iframe => CDATA_CONTENT_MODEL,
5650 noembed => CDATA_CONTENT_MODEL,
5651 noframes => CDATA_CONTENT_MODEL,
5652 noscript => CDATA_CONTENT_MODEL,
5653 plaintext => PLAINTEXT_CONTENT_MODEL,
5654 }->{$node_ln};
5655 $p->{content_model} = PCDATA_CONTENT_MODEL
5656 unless defined $p->{content_model};
5657
5658 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
5659 ## TODO: Foreign element OK?
5660
5661 ## F4.2. Root |html| element
5662 my $root = $doc->create_element_ns
5663 ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5664
5665 ## F4.3.
5666 $doc->append_child ($root);
5667
5668 ## F4.4.
5669 push @{$p->{open_elements}}, [$root, $el_category->{html}];
5670
5671 undef $p->{head_element};
5672 undef $p->{head_element_inserted};
5673
5674 ## F4.5.
5675 $p->_reset_insertion_mode;
5676
5677 ## F4.6.
5678 my $anode = $node;
5679 AN: while (defined $anode) {
5680 if ($anode->node_type == 1) {
5681 my $nsuri = $anode->namespace_uri;
5682 if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5683 if ($anode->manakai_local_name eq 'form') {
5684 !!!cp ('i5');
5685 $p->{form_element} = $anode;
5686 last AN;
5687 }
5688 }
5689 }
5690 $anode = $anode->parent_node;
5691 } # AN
5692
5693 ## F.6. Start the parser.
5694 {
5695 my $self = $p;
5696 !!!next-token;
5697 }
5698 $p->_tree_construction_main;
5699
5700 ## F.7.
5701 my @cn = @{$node->child_nodes};
5702 for (@cn) {
5703 $node->remove_child ($_);
5704 }
5705 ## ISSUE: mutation events? read-only?
5706
5707 ## Step 11 # MUST
5708 @cn = @{$root->child_nodes};
5709 for (@cn) {
5710 $this_doc->adopt_node ($_);
5711 $node->append_child ($_);
5712 }
5713 ## ISSUE: mutation events?
5714
5715 $p->_terminate_tree_constructor;
5716
5717 delete $p->{parse_error}; # delete loop
5718 } else {
5719 die "$0: |set_inner_html| is not defined for node of type $nt";
5720 }
5721 } # set_inner_html
5722
5723 } # tree construction stage
5724
5725 package Whatpm::HTML::RestartParser;
5726 push our @ISA, 'Error';
5727
5728 1;
5729 # $Date: 2009/08/16 04:59:53 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24