/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.217 - (show annotations) (download) (as text)
Sun Jul 5 05:13:13 2009 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.216: +18 -15 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	5 Jul 2009 05:12:28 -0000
	* tree-test-1.dat: Added tests for <table> in <p> (c.f. HTML5
	revision 3132).

2009-07-05  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	5 Jul 2009 05:13:02 -0000
2009-07-05  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src (_tree_construction_main): Put |table| in |p| in
	quirks mode (The Hixie Quirk, HTML5 revision 3132).

1 package Whatpm::HTML;
2 use strict;
3 our $VERSION=do{my @r=(q$Revision: 1.216 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 use Error qw(:try);
5
6 use Whatpm::HTML::Tokenizer;
7
8 ## NOTE: This module don't check all HTML5 parse errors; character
9 ## encoding related parse errors are expected to be handled by relevant
10 ## modules.
11 ## Parse errors for control characters that are not allowed in HTML5
12 ## documents, for surrogate code points, and for noncharacter code
13 ## points, as well as U+FFFD substitions for characters whose code points
14 ## is higher than U+10FFFF may be detected by combining the parser with
15 ## the checker implemented by Whatpm::Charset::UnicodeChecker (for its
16 ## usage example, see |t/HTML-tree.t| in the Whatpm package or the
17 ## WebHACC::Language::HTML module in the WebHACC package).
18
19 ## ISSUE:
20 ## var doc = implementation.createDocument (null, null, null);
21 ## doc.write ('');
22 ## alert (doc.compatMode);
23
24 require IO::Handle;
25
26 ## Namespace URLs
27
28 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
29 my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
30 my $SVG_NS = q<http://www.w3.org/2000/svg>;
31 my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
32 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
33 my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
34
35 ## Element categories
36
37 ## Bits 12-15
38 sub SPECIAL_EL () { 0b1_000000000000000 }
39 sub SCOPING_EL () { 0b1_00000000000000 }
40 sub FORMATTING_EL () { 0b1_0000000000000 }
41 sub PHRASING_EL () { 0b1_000000000000 }
42
43 ## Bits 10-11
44 #sub FOREIGN_EL () { 0b1_00000000000 } # see Whatpm::HTML::Tokenizer
45 sub FOREIGN_FLOW_CONTENT_EL () { 0b1_0000000000 }
46
47 ## Bits 6-9
48 sub TABLE_SCOPING_EL () { 0b1_000000000 }
49 sub TABLE_ROWS_SCOPING_EL () { 0b1_00000000 }
50 sub TABLE_ROW_SCOPING_EL () { 0b1_0000000 }
51 sub TABLE_ROWS_EL () { 0b1_000000 }
52
53 ## Bit 5
54 sub ADDRESS_DIV_P_EL () { 0b1_00000 }
55
56 ## NOTE: Used in </body> and EOF algorithms.
57 ## Bit 4
58 sub ALL_END_TAG_OPTIONAL_EL () { 0b1_0000 }
59
60 ## NOTE: Used in "generate implied end tags" algorithm.
61 ## NOTE: There is a code where a modified version of
62 ## END_TAG_OPTIONAL_EL is used in "generate implied end tags"
63 ## implementation (search for the algorithm name).
64 ## Bit 3
65 sub END_TAG_OPTIONAL_EL () { 0b1_000 }
66
67 ## Bits 0-2
68
69 sub MISC_SPECIAL_EL () { SPECIAL_EL | 0b000 }
70 sub FORM_EL () { SPECIAL_EL | 0b001 }
71 sub FRAMESET_EL () { SPECIAL_EL | 0b010 }
72 sub HEADING_EL () { SPECIAL_EL | 0b011 }
73 sub SELECT_EL () { SPECIAL_EL | 0b100 }
74 sub SCRIPT_EL () { SPECIAL_EL | 0b101 }
75
76 sub ADDRESS_DIV_EL () { SPECIAL_EL | ADDRESS_DIV_P_EL | 0b001 }
77 sub BODY_EL () { SPECIAL_EL | ALL_END_TAG_OPTIONAL_EL | 0b001 }
78
79 sub DTDD_EL () {
80 SPECIAL_EL |
81 END_TAG_OPTIONAL_EL |
82 ALL_END_TAG_OPTIONAL_EL |
83 0b010
84 }
85 sub LI_EL () {
86 SPECIAL_EL |
87 END_TAG_OPTIONAL_EL |
88 ALL_END_TAG_OPTIONAL_EL |
89 0b100
90 }
91 sub P_EL () {
92 SPECIAL_EL |
93 ADDRESS_DIV_P_EL |
94 END_TAG_OPTIONAL_EL |
95 ALL_END_TAG_OPTIONAL_EL |
96 0b001
97 }
98
99 sub TABLE_ROW_EL () {
100 SPECIAL_EL |
101 TABLE_ROWS_EL |
102 TABLE_ROW_SCOPING_EL |
103 ALL_END_TAG_OPTIONAL_EL |
104 0b001
105 }
106 sub TABLE_ROW_GROUP_EL () {
107 SPECIAL_EL |
108 TABLE_ROWS_EL |
109 TABLE_ROWS_SCOPING_EL |
110 ALL_END_TAG_OPTIONAL_EL |
111 0b001
112 }
113
114 sub MISC_SCOPING_EL () { SCOPING_EL | 0b000 }
115 sub BUTTON_EL () { SCOPING_EL | 0b001 }
116 sub CAPTION_EL () { SCOPING_EL | 0b010 }
117 sub HTML_EL () {
118 SCOPING_EL |
119 TABLE_SCOPING_EL |
120 TABLE_ROWS_SCOPING_EL |
121 TABLE_ROW_SCOPING_EL |
122 ALL_END_TAG_OPTIONAL_EL |
123 0b001
124 }
125 sub TABLE_EL () {
126 SCOPING_EL |
127 TABLE_ROWS_EL |
128 TABLE_SCOPING_EL |
129 0b001
130 }
131 sub TABLE_CELL_EL () {
132 SCOPING_EL |
133 TABLE_ROW_SCOPING_EL |
134 ALL_END_TAG_OPTIONAL_EL |
135 0b001
136 }
137
138 sub MISC_FORMATTING_EL () { FORMATTING_EL | 0b000 }
139 sub A_EL () { FORMATTING_EL | 0b001 }
140 sub NOBR_EL () { FORMATTING_EL | 0b010 }
141
142 sub RUBY_EL () { PHRASING_EL | 0b001 }
143
144 ## ISSUE: ALL_END_TAG_OPTIONAL_EL?
145 sub OPTGROUP_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b001 }
146 sub OPTION_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b010 }
147 sub RUBY_COMPONENT_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b100 }
148
149 sub MML_AXML_EL () { PHRASING_EL | FOREIGN_EL | 0b001 }
150
151 my $el_category = {
152 a => A_EL,
153 address => ADDRESS_DIV_EL,
154 applet => MISC_SCOPING_EL,
155 area => MISC_SPECIAL_EL,
156 article => MISC_SPECIAL_EL,
157 aside => MISC_SPECIAL_EL,
158 b => FORMATTING_EL,
159 base => MISC_SPECIAL_EL,
160 basefont => MISC_SPECIAL_EL,
161 bgsound => MISC_SPECIAL_EL,
162 big => FORMATTING_EL,
163 blockquote => MISC_SPECIAL_EL,
164 body => BODY_EL,
165 br => MISC_SPECIAL_EL,
166 button => BUTTON_EL,
167 caption => CAPTION_EL,
168 center => MISC_SPECIAL_EL,
169 col => MISC_SPECIAL_EL,
170 colgroup => MISC_SPECIAL_EL,
171 command => MISC_SPECIAL_EL,
172 datagrid => MISC_SPECIAL_EL,
173 dd => DTDD_EL,
174 details => MISC_SPECIAL_EL,
175 dialog => MISC_SPECIAL_EL,
176 dir => MISC_SPECIAL_EL,
177 div => ADDRESS_DIV_EL,
178 dl => MISC_SPECIAL_EL,
179 dt => DTDD_EL,
180 em => FORMATTING_EL,
181 embed => MISC_SPECIAL_EL,
182 eventsource => MISC_SPECIAL_EL,
183 fieldset => MISC_SPECIAL_EL,
184 figure => MISC_SPECIAL_EL,
185 font => FORMATTING_EL,
186 footer => MISC_SPECIAL_EL,
187 form => FORM_EL,
188 frame => MISC_SPECIAL_EL,
189 frameset => FRAMESET_EL,
190 h1 => HEADING_EL,
191 h2 => HEADING_EL,
192 h3 => HEADING_EL,
193 h4 => HEADING_EL,
194 h5 => HEADING_EL,
195 h6 => HEADING_EL,
196 head => MISC_SPECIAL_EL,
197 header => MISC_SPECIAL_EL,
198 hr => MISC_SPECIAL_EL,
199 html => HTML_EL,
200 i => FORMATTING_EL,
201 iframe => MISC_SPECIAL_EL,
202 img => MISC_SPECIAL_EL,
203 #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
204 input => MISC_SPECIAL_EL,
205 isindex => MISC_SPECIAL_EL,
206 li => LI_EL,
207 link => MISC_SPECIAL_EL,
208 listing => MISC_SPECIAL_EL,
209 marquee => MISC_SCOPING_EL,
210 menu => MISC_SPECIAL_EL,
211 meta => MISC_SPECIAL_EL,
212 nav => MISC_SPECIAL_EL,
213 nobr => NOBR_EL,
214 noembed => MISC_SPECIAL_EL,
215 noframes => MISC_SPECIAL_EL,
216 noscript => MISC_SPECIAL_EL,
217 object => MISC_SCOPING_EL,
218 ol => MISC_SPECIAL_EL,
219 optgroup => OPTGROUP_EL,
220 option => OPTION_EL,
221 p => P_EL,
222 param => MISC_SPECIAL_EL,
223 plaintext => MISC_SPECIAL_EL,
224 pre => MISC_SPECIAL_EL,
225 rp => RUBY_COMPONENT_EL,
226 rt => RUBY_COMPONENT_EL,
227 ruby => RUBY_EL,
228 s => FORMATTING_EL,
229 script => MISC_SPECIAL_EL,
230 select => SELECT_EL,
231 section => MISC_SPECIAL_EL,
232 small => FORMATTING_EL,
233 spacer => MISC_SPECIAL_EL,
234 strike => FORMATTING_EL,
235 strong => FORMATTING_EL,
236 style => MISC_SPECIAL_EL,
237 table => TABLE_EL,
238 tbody => TABLE_ROW_GROUP_EL,
239 td => TABLE_CELL_EL,
240 textarea => MISC_SPECIAL_EL,
241 tfoot => TABLE_ROW_GROUP_EL,
242 th => TABLE_CELL_EL,
243 thead => TABLE_ROW_GROUP_EL,
244 title => MISC_SPECIAL_EL,
245 tr => TABLE_ROW_EL,
246 tt => FORMATTING_EL,
247 u => FORMATTING_EL,
248 ul => MISC_SPECIAL_EL,
249 wbr => MISC_SPECIAL_EL,
250 };
251
252 my $el_category_f = {
253 $MML_NS => {
254 'annotation-xml' => MML_AXML_EL,
255 mi => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
256 mo => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
257 mn => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
258 ms => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
259 mtext => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
260 },
261 $SVG_NS => {
262 foreignObject => SCOPING_EL | FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
263 desc => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
264 title => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
265 },
266 ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
267 };
268
269 my $svg_attr_name = {
270 attributename => 'attributeName',
271 attributetype => 'attributeType',
272 basefrequency => 'baseFrequency',
273 baseprofile => 'baseProfile',
274 calcmode => 'calcMode',
275 clippathunits => 'clipPathUnits',
276 contentscripttype => 'contentScriptType',
277 contentstyletype => 'contentStyleType',
278 diffuseconstant => 'diffuseConstant',
279 edgemode => 'edgeMode',
280 externalresourcesrequired => 'externalResourcesRequired',
281 filterres => 'filterRes',
282 filterunits => 'filterUnits',
283 glyphref => 'glyphRef',
284 gradienttransform => 'gradientTransform',
285 gradientunits => 'gradientUnits',
286 kernelmatrix => 'kernelMatrix',
287 kernelunitlength => 'kernelUnitLength',
288 keypoints => 'keyPoints',
289 keysplines => 'keySplines',
290 keytimes => 'keyTimes',
291 lengthadjust => 'lengthAdjust',
292 limitingconeangle => 'limitingConeAngle',
293 markerheight => 'markerHeight',
294 markerunits => 'markerUnits',
295 markerwidth => 'markerWidth',
296 maskcontentunits => 'maskContentUnits',
297 maskunits => 'maskUnits',
298 numoctaves => 'numOctaves',
299 pathlength => 'pathLength',
300 patterncontentunits => 'patternContentUnits',
301 patterntransform => 'patternTransform',
302 patternunits => 'patternUnits',
303 pointsatx => 'pointsAtX',
304 pointsaty => 'pointsAtY',
305 pointsatz => 'pointsAtZ',
306 preservealpha => 'preserveAlpha',
307 preserveaspectratio => 'preserveAspectRatio',
308 primitiveunits => 'primitiveUnits',
309 refx => 'refX',
310 refy => 'refY',
311 repeatcount => 'repeatCount',
312 repeatdur => 'repeatDur',
313 requiredextensions => 'requiredExtensions',
314 requiredfeatures => 'requiredFeatures',
315 specularconstant => 'specularConstant',
316 specularexponent => 'specularExponent',
317 spreadmethod => 'spreadMethod',
318 startoffset => 'startOffset',
319 stddeviation => 'stdDeviation',
320 stitchtiles => 'stitchTiles',
321 surfacescale => 'surfaceScale',
322 systemlanguage => 'systemLanguage',
323 tablevalues => 'tableValues',
324 targetx => 'targetX',
325 targety => 'targetY',
326 textlength => 'textLength',
327 viewbox => 'viewBox',
328 viewtarget => 'viewTarget',
329 xchannelselector => 'xChannelSelector',
330 ychannelselector => 'yChannelSelector',
331 zoomandpan => 'zoomAndPan',
332 };
333
334 my $foreign_attr_xname = {
335 'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
336 'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
337 'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
338 'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
339 'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
340 'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
341 'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
342 'xml:base' => [$XML_NS, ['xml', 'base']],
343 'xml:lang' => [$XML_NS, ['xml', 'lang']],
344 'xml:space' => [$XML_NS, ['xml', 'space']],
345 'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
346 'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
347 };
348
349 ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
350
351 ## TODO: Invoke the reset algorithm when a resettable element is
352 ## created (cf. HTML5 revision 2259).
353
354 sub parse_byte_string ($$$$;$) {
355 my $self = shift;
356 my $charset_name = shift;
357 open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
358 return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
359 } # parse_byte_string
360
361 sub parse_byte_stream ($$$$;$$) {
362 # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
363 my $self = ref $_[0] ? shift : shift->new;
364 my $charset_name = shift;
365 my $byte_stream = $_[0];
366
367 my $onerror = $_[2] || sub {
368 my (%opt) = @_;
369 warn "Parse error ($opt{type})\n";
370 };
371 $self->{parse_error} = $onerror; # updated later by parse_char_string
372
373 my $get_wrapper = $_[3] || sub ($) {
374 return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
375 };
376
377 ## HTML5 encoding sniffing algorithm
378 require Message::Charset::Info;
379 my $charset;
380 my $buffer;
381 my ($char_stream, $e_status);
382
383 SNIFFING: {
384 ## NOTE: By setting |allow_fallback| option true when the
385 ## |get_decode_handle| method is invoked, we ignore what the HTML5
386 ## spec requires, i.e. unsupported encoding should be ignored.
387 ## TODO: We should not do this unless the parser is invoked
388 ## in the conformance checking mode, in which this behavior
389 ## would be useful.
390
391 ## Step 1
392 if (defined $charset_name) {
393 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
394 ## TODO: Is this ok? Transfer protocol's parameter should be
395 ## interpreted in its semantics?
396
397 ($char_stream, $e_status) = $charset->get_decode_handle
398 ($byte_stream, allow_error_reporting => 1,
399 allow_fallback => 1);
400 if ($char_stream) {
401 $self->{confident} = 1;
402 last SNIFFING;
403 } else {
404 !!!parse-error (type => 'charset:not supported',
405 layer => 'encode',
406 line => 1, column => 1,
407 value => $charset_name,
408 level => $self->{level}->{uncertain});
409 }
410 }
411
412 ## Step 2
413 my $byte_buffer = '';
414 for (1..1024) {
415 my $char = $byte_stream->getc;
416 last unless defined $char;
417 $byte_buffer .= $char;
418 } ## TODO: timeout
419
420 ## Step 3
421 if ($byte_buffer =~ /^\xFE\xFF/) {
422 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
423 ($char_stream, $e_status) = $charset->get_decode_handle
424 ($byte_stream, allow_error_reporting => 1,
425 allow_fallback => 1, byte_buffer => \$byte_buffer);
426 $self->{confident} = 1;
427 last SNIFFING;
428 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
429 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
430 ($char_stream, $e_status) = $charset->get_decode_handle
431 ($byte_stream, allow_error_reporting => 1,
432 allow_fallback => 1, byte_buffer => \$byte_buffer);
433 $self->{confident} = 1;
434 last SNIFFING;
435 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
436 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
437 ($char_stream, $e_status) = $charset->get_decode_handle
438 ($byte_stream, allow_error_reporting => 1,
439 allow_fallback => 1, byte_buffer => \$byte_buffer);
440 $self->{confident} = 1;
441 last SNIFFING;
442 }
443
444 ## Step 4
445 ## TODO: <meta charset>
446
447 ## Step 5
448 ## TODO: from history
449
450 ## Step 6
451 require Whatpm::Charset::UniversalCharDet;
452 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
453 ($byte_buffer);
454 if (defined $charset_name) {
455 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
456
457 require Whatpm::Charset::DecodeHandle;
458 $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
459 ($byte_stream);
460 ($char_stream, $e_status) = $charset->get_decode_handle
461 ($buffer, allow_error_reporting => 1,
462 allow_fallback => 1, byte_buffer => \$byte_buffer);
463 if ($char_stream) {
464 $buffer->{buffer} = $byte_buffer;
465 !!!parse-error (type => 'sniffing:chardet',
466 text => $charset_name,
467 level => $self->{level}->{info},
468 layer => 'encode',
469 line => 1, column => 1);
470 $self->{confident} = 0;
471 last SNIFFING;
472 }
473 }
474
475 ## Step 7: default
476 ## TODO: Make this configurable.
477 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
478 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
479 ## detectable in the step 6.
480 require Whatpm::Charset::DecodeHandle;
481 $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
482 ($byte_stream);
483 ($char_stream, $e_status)
484 = $charset->get_decode_handle ($buffer,
485 allow_error_reporting => 1,
486 allow_fallback => 1,
487 byte_buffer => \$byte_buffer);
488 $buffer->{buffer} = $byte_buffer;
489 !!!parse-error (type => 'sniffing:default',
490 text => 'windows-1252',
491 level => $self->{level}->{info},
492 line => 1, column => 1,
493 layer => 'encode');
494 $self->{confident} = 0;
495 } # SNIFFING
496
497 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
498 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
499 !!!parse-error (type => 'chardecode:fallback',
500 #text => $self->{input_encoding},
501 level => $self->{level}->{uncertain},
502 line => 1, column => 1,
503 layer => 'encode');
504 } elsif (not ($e_status &
505 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
506 $self->{input_encoding} = $charset->get_iana_name;
507 !!!parse-error (type => 'chardecode:no error',
508 text => $self->{input_encoding},
509 level => $self->{level}->{uncertain},
510 line => 1, column => 1,
511 layer => 'encode');
512 } else {
513 $self->{input_encoding} = $charset->get_iana_name;
514 }
515
516 $self->{change_encoding} = sub {
517 my $self = shift;
518 $charset_name = shift;
519 my $token = shift;
520
521 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
522 ($char_stream, $e_status) = $charset->get_decode_handle
523 ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
524 byte_buffer => \ $buffer->{buffer});
525
526 if ($char_stream) { # if supported
527 ## "Change the encoding" algorithm:
528
529 ## Step 1
530 if (defined $self->{input_encoding} and
531 $self->{input_encoding} eq $charset_name) {
532 !!!parse-error (type => 'charset label:matching',
533 text => $charset_name,
534 level => $self->{level}->{info});
535 $self->{confident} = 1;
536 return;
537 }
538
539 ## Step 2 (HTML5 revision 3205)
540 if (defined $self->{input_encoding} and
541 Message::Charset::Info->get_by_html_name ($self->{input_encoding})
542 ->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
543 $self->{confident} = 1;
544 return;
545 }
546
547 ## Step 3
548 if ($charset->{category} &
549 Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
550 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
551 ($char_stream, $e_status) = $charset->get_decode_handle
552 ($byte_stream,
553 byte_buffer => \ $buffer->{buffer});
554 }
555 $charset_name = $charset->get_iana_name;
556
557 !!!parse-error (type => 'charset label detected',
558 text => $self->{input_encoding},
559 value => $charset_name,
560 level => $self->{level}->{warn},
561 token => $token);
562
563 ## Step 4
564 # if (can) {
565 ## change the encoding on the fly.
566 #$self->{confident} = 1;
567 #return;
568 # }
569
570 ## Step 5
571 throw Whatpm::HTML::RestartParser ();
572 }
573 }; # $self->{change_encoding}
574
575 my $char_onerror = sub {
576 my (undef, $type, %opt) = @_;
577 !!!parse-error (layer => 'encode',
578 line => $self->{line}, column => $self->{column} + 1,
579 %opt, type => $type);
580 if ($opt{octets}) {
581 ${$opt{octets}} = "\x{FFFD}"; # relacement character
582 }
583 };
584
585 my $wrapped_char_stream = $get_wrapper->($char_stream);
586 $wrapped_char_stream->onerror ($char_onerror);
587
588 my @args = ($_[1], $_[2]); # $doc, $onerror - $get_wrapper = undef;
589 my $return;
590 try {
591 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
592 } catch Whatpm::HTML::RestartParser with {
593 ## NOTE: Invoked after {change_encoding}.
594
595 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
596 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
597 !!!parse-error (type => 'chardecode:fallback',
598 level => $self->{level}->{uncertain},
599 #text => $self->{input_encoding},
600 line => 1, column => 1,
601 layer => 'encode');
602 } elsif (not ($e_status &
603 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
604 $self->{input_encoding} = $charset->get_iana_name;
605 !!!parse-error (type => 'chardecode:no error',
606 text => $self->{input_encoding},
607 level => $self->{level}->{uncertain},
608 line => 1, column => 1,
609 layer => 'encode');
610 } else {
611 $self->{input_encoding} = $charset->get_iana_name;
612 }
613 $self->{confident} = 1;
614
615 $wrapped_char_stream = $get_wrapper->($char_stream);
616 $wrapped_char_stream->onerror ($char_onerror);
617
618 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
619 };
620 return $return;
621 } # parse_byte_stream
622
623 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
624 ## and the HTML layer MUST ignore it. However, we does strip BOM in
625 ## the encoding layer and the HTML layer does not ignore any U+FEFF,
626 ## because the core part of our HTML parser expects a string of character,
627 ## not a string of bytes or code units or anything which might contain a BOM.
628 ## Therefore, any parser interface that accepts a string of bytes,
629 ## such as |parse_byte_string| in this module, must ensure that it does
630 ## strip the BOM and never strip any ZWNBSP.
631
632 sub parse_char_string ($$$;$$) {
633 #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
634 my $self = shift;
635 my $s = ref $_[0] ? $_[0] : \($_[0]);
636 require Whatpm::Charset::DecodeHandle;
637 my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
638 return $self->parse_char_stream ($input, @_[1..$#_]);
639 } # parse_char_string
640 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
641
642 sub parse_char_stream ($$$;$$) {
643 my $self = ref $_[0] ? shift : shift->new;
644 my $input = $_[0];
645 $self->{document} = $_[1];
646 @{$self->{document}->child_nodes} = ();
647
648 ## NOTE: |set_inner_html| copies most of this method's code
649
650 $self->{confident} = 1 unless exists $self->{confident};
651 $self->{document}->input_encoding ($self->{input_encoding})
652 if defined $self->{input_encoding};
653 ## TODO: |{input_encoding}| is needless?
654
655 $self->{line_prev} = $self->{line} = 1;
656 $self->{column_prev} = -1;
657 $self->{column} = 0;
658 $self->{set_nc} = sub {
659 my $self = shift;
660
661 my $char = '';
662 if (defined $self->{next_nc}) {
663 $char = $self->{next_nc};
664 delete $self->{next_nc};
665 $self->{nc} = ord $char;
666 } else {
667 $self->{char_buffer} = '';
668 $self->{char_buffer_pos} = 0;
669
670 my $count = $input->manakai_read_until
671 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/, $self->{char_buffer_pos});
672 if ($count) {
673 $self->{line_prev} = $self->{line};
674 $self->{column_prev} = $self->{column};
675 $self->{column}++;
676 $self->{nc}
677 = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
678 return;
679 }
680
681 if ($input->read ($char, 1)) {
682 $self->{nc} = ord $char;
683 } else {
684 $self->{nc} = -1;
685 return;
686 }
687 }
688
689 ($self->{line_prev}, $self->{column_prev})
690 = ($self->{line}, $self->{column});
691 $self->{column}++;
692
693 if ($self->{nc} == 0x000A) { # LF
694 !!!cp ('j1');
695 $self->{line}++;
696 $self->{column} = 0;
697 } elsif ($self->{nc} == 0x000D) { # CR
698 !!!cp ('j2');
699 ## TODO: support for abort/streaming
700 my $next = '';
701 if ($input->read ($next, 1) and $next ne "\x0A") {
702 $self->{next_nc} = $next;
703 }
704 $self->{nc} = 0x000A; # LF # MUST
705 $self->{line}++;
706 $self->{column} = 0;
707 } elsif ($self->{nc} == 0x0000) { # NULL
708 !!!cp ('j4');
709 !!!parse-error (type => 'NULL');
710 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
711 }
712 };
713
714 $self->{read_until} = sub {
715 #my ($scalar, $specials_range, $offset) = @_;
716 return 0 if defined $self->{next_nc};
717
718 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
719 my $offset = $_[2] || 0;
720
721 if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
722 pos ($self->{char_buffer}) = $self->{char_buffer_pos};
723 if ($self->{char_buffer} =~ /\G(?>$pattern)+/) {
724 substr ($_[0], $offset)
725 = substr ($self->{char_buffer}, $-[0], $+[0] - $-[0]);
726 my $count = $+[0] - $-[0];
727 if ($count) {
728 $self->{column} += $count;
729 $self->{char_buffer_pos} += $count;
730 $self->{line_prev} = $self->{line};
731 $self->{column_prev} = $self->{column} - 1;
732 $self->{nc} = -1;
733 }
734 return $count;
735 } else {
736 return 0;
737 }
738 } else {
739 my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
740 if ($count) {
741 $self->{column} += $count;
742 $self->{line_prev} = $self->{line};
743 $self->{column_prev} = $self->{column} - 1;
744 $self->{nc} = -1;
745 }
746 return $count;
747 }
748 }; # $self->{read_until}
749
750 my $onerror = $_[2] || sub {
751 my (%opt) = @_;
752 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
753 my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
754 warn "Parse error ($opt{type}) at line $line column $column\n";
755 };
756 $self->{parse_error} = sub {
757 $onerror->(line => $self->{line}, column => $self->{column}, @_);
758 };
759
760 my $char_onerror = sub {
761 my (undef, $type, %opt) = @_;
762 !!!parse-error (layer => 'encode',
763 line => $self->{line}, column => $self->{column} + 1,
764 %opt, type => $type);
765 }; # $char_onerror
766
767 if ($_[3]) {
768 $input = $_[3]->($input);
769 $input->onerror ($char_onerror);
770 } else {
771 $input->onerror ($char_onerror) unless defined $input->onerror;
772 }
773
774 $self->_initialize_tokenizer;
775 $self->_initialize_tree_constructor;
776 $self->_construct_tree;
777 $self->_terminate_tree_constructor;
778
779 delete $self->{parse_error}; # remove loop
780
781 return $self->{document};
782 } # parse_char_stream
783
784 sub new ($) {
785 my $class = shift;
786 my $self = bless {
787 level => {must => 'm',
788 should => 's',
789 warn => 'w',
790 info => 'i',
791 uncertain => 'u'},
792 }, $class;
793 $self->{set_nc} = sub {
794 $self->{nc} = -1;
795 };
796 $self->{parse_error} = sub {
797 #
798 };
799 $self->{change_encoding} = sub {
800 # if ($_[0] is a supported encoding) {
801 # run "change the encoding" algorithm;
802 # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
803 # }
804 };
805 $self->{application_cache_selection} = sub {
806 #
807 };
808 return $self;
809 } # new
810
811 ## Insertion modes
812
813 sub AFTER_HTML_IMS () { 0b100 }
814 sub HEAD_IMS () { 0b1000 }
815 sub BODY_IMS () { 0b10000 }
816 sub BODY_TABLE_IMS () { 0b100000 }
817 sub TABLE_IMS () { 0b1000000 }
818 sub ROW_IMS () { 0b10000000 }
819 sub BODY_AFTER_IMS () { 0b100000000 }
820 sub FRAME_IMS () { 0b1000000000 }
821 sub SELECT_IMS () { 0b10000000000 }
822 #sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 } # see Whatpm::HTML::Tokenizer
823 ## NOTE: "in foreign content" insertion mode is special; it is combined
824 ## with the secondary insertion mode. In this parser, they are stored
825 ## together in the bit-or'ed form.
826 sub IN_CDATA_RCDATA_IM () { 0b1000000000000 }
827 ## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is
828 ## combined with the original insertion mode. In thie parser,
829 ## they are stored together in the bit-or'ed form.
830
831 sub IM_MASK () { 0b11111111111 }
832
833 ## NOTE: "initial" and "before html" insertion modes have no constants.
834
835 ## NOTE: "after after body" insertion mode.
836 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
837
838 ## NOTE: "after after frameset" insertion mode.
839 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
840
841 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
842 sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
843 sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
844 sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
845 sub IN_BODY_IM () { BODY_IMS }
846 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
847 sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
848 sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
849 sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
850 sub IN_TABLE_IM () { TABLE_IMS }
851 sub AFTER_BODY_IM () { BODY_AFTER_IMS }
852 sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
853 sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
854 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
855 sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
856 sub IN_COLUMN_GROUP_IM () { 0b10 }
857
858 sub _initialize_tree_constructor ($) {
859 my $self = shift;
860 ## NOTE: $self->{document} MUST be specified before this method is called
861 $self->{document}->strict_error_checking (0);
862 ## TODO: Turn mutation events off # MUST
863 ## TODO: Turn loose Document option (manakai extension) on
864 $self->{document}->manakai_is_html (1); # MUST
865 $self->{document}->set_user_data (manakai_source_line => 1);
866 $self->{document}->set_user_data (manakai_source_column => 1);
867 } # _initialize_tree_constructor
868
869 sub _terminate_tree_constructor ($) {
870 my $self = shift;
871 $self->{document}->strict_error_checking (1);
872 ## TODO: Turn mutation events on
873 } # _terminate_tree_constructor
874
875 ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
876
877 { # tree construction stage
878 my $token;
879
880 sub _construct_tree ($) {
881 my ($self) = @_;
882
883 ## When an interactive UA render the $self->{document} available
884 ## to the user, or when it begin accepting user input, are
885 ## not defined.
886
887 !!!next-token;
888
889 undef $self->{form_element};
890 undef $self->{head_element};
891 undef $self->{head_element_inserted};
892 $self->{open_elements} = [];
893 undef $self->{inner_html_node};
894 undef $self->{ignore_newline};
895
896 ## NOTE: The "initial" insertion mode.
897 $self->_tree_construction_initial; # MUST
898
899 ## NOTE: The "before html" insertion mode.
900 $self->_tree_construction_root_element;
901 $self->{insertion_mode} = BEFORE_HEAD_IM;
902
903 ## NOTE: The "before head" insertion mode and so on.
904 $self->_tree_construction_main;
905 } # _construct_tree
906
907 sub _tree_construction_initial ($) {
908 my $self = shift;
909
910 ## NOTE: "initial" insertion mode
911
912 INITIAL: {
913 if ($token->{type} == DOCTYPE_TOKEN) {
914 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
915 ## error, switch to a conformance checking mode for another
916 ## language.
917 my $doctype_name = $token->{name};
918 $doctype_name = '' unless defined $doctype_name;
919 $doctype_name =~ tr/a-z/A-Z/; # ASCII case-insensitive
920 if (not defined $token->{name} or # <!DOCTYPE>
921 defined $token->{sysid}) {
922 !!!cp ('t1');
923 !!!parse-error (type => 'not HTML5', token => $token);
924 } elsif ($doctype_name ne 'HTML') {
925 !!!cp ('t2');
926 !!!parse-error (type => 'not HTML5', token => $token);
927 } elsif (defined $token->{pubid}) {
928 if ($token->{pubid} eq 'XSLT-compat') {
929 !!!cp ('t1.2');
930 !!!parse-error (type => 'XSLT-compat', token => $token,
931 level => $self->{level}->{should});
932 } else {
933 !!!parse-error (type => 'not HTML5', token => $token);
934 }
935 } else {
936 !!!cp ('t3');
937 #
938 }
939
940 my $doctype = $self->{document}->create_document_type_definition
941 ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
942 ## NOTE: Default value for both |public_id| and |system_id| attributes
943 ## are empty strings, so that we don't set any value in missing cases.
944 $doctype->public_id ($token->{pubid}) if defined $token->{pubid};
945 $doctype->system_id ($token->{sysid}) if defined $token->{sysid};
946 ## NOTE: Other DocumentType attributes are null or empty lists.
947 ## In Firefox3, |internalSubset| attribute is set to the empty
948 ## string, while |null| is an allowed value for the attribute
949 ## according to DOM3 Core.
950 $self->{document}->append_child ($doctype);
951
952 if ($token->{quirks} or $doctype_name ne 'HTML') {
953 !!!cp ('t4');
954 $self->{document}->manakai_compat_mode ('quirks');
955 } elsif (defined $token->{pubid}) {
956 my $pubid = $token->{pubid};
957 $pubid =~ tr/a-z/A-z/;
958 my $prefix = [
959 "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
960 "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
961 "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
962 "-//IETF//DTD HTML 2.0 LEVEL 1//",
963 "-//IETF//DTD HTML 2.0 LEVEL 2//",
964 "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
965 "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
966 "-//IETF//DTD HTML 2.0 STRICT//",
967 "-//IETF//DTD HTML 2.0//",
968 "-//IETF//DTD HTML 2.1E//",
969 "-//IETF//DTD HTML 3.0//",
970 "-//IETF//DTD HTML 3.2 FINAL//",
971 "-//IETF//DTD HTML 3.2//",
972 "-//IETF//DTD HTML 3//",
973 "-//IETF//DTD HTML LEVEL 0//",
974 "-//IETF//DTD HTML LEVEL 1//",
975 "-//IETF//DTD HTML LEVEL 2//",
976 "-//IETF//DTD HTML LEVEL 3//",
977 "-//IETF//DTD HTML STRICT LEVEL 0//",
978 "-//IETF//DTD HTML STRICT LEVEL 1//",
979 "-//IETF//DTD HTML STRICT LEVEL 2//",
980 "-//IETF//DTD HTML STRICT LEVEL 3//",
981 "-//IETF//DTD HTML STRICT//",
982 "-//IETF//DTD HTML//",
983 "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
984 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
985 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
986 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
987 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
988 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
989 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
990 "-//NETSCAPE COMM. CORP.//DTD HTML//",
991 "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
992 "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
993 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
994 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
995 "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
996 "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
997 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
998 "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
999 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
1000 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
1001 "-//W3C//DTD HTML 3 1995-03-24//",
1002 "-//W3C//DTD HTML 3.2 DRAFT//",
1003 "-//W3C//DTD HTML 3.2 FINAL//",
1004 "-//W3C//DTD HTML 3.2//",
1005 "-//W3C//DTD HTML 3.2S DRAFT//",
1006 "-//W3C//DTD HTML 4.0 FRAMESET//",
1007 "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
1008 "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
1009 "-//W3C//DTD HTML EXPERIMENTAL 970421//",
1010 "-//W3C//DTD W3 HTML//",
1011 "-//W3O//DTD W3 HTML 3.0//",
1012 "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
1013 "-//WEBTECHS//DTD MOZILLA HTML//",
1014 ]; # $prefix
1015 my $match;
1016 for (@$prefix) {
1017 if (substr ($prefix, 0, length $_) eq $_) {
1018 $match = 1;
1019 last;
1020 }
1021 }
1022 if ($match or
1023 $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
1024 $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
1025 $pubid eq "HTML") {
1026 !!!cp ('t5');
1027 $self->{document}->manakai_compat_mode ('quirks');
1028 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
1029 $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
1030 if (defined $token->{sysid}) {
1031 !!!cp ('t6');
1032 $self->{document}->manakai_compat_mode ('quirks');
1033 } else {
1034 !!!cp ('t7');
1035 $self->{document}->manakai_compat_mode ('limited quirks');
1036 }
1037 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
1038 $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
1039 !!!cp ('t8');
1040 $self->{document}->manakai_compat_mode ('limited quirks');
1041 } else {
1042 !!!cp ('t9');
1043 }
1044 } else {
1045 !!!cp ('t10');
1046 }
1047 if (defined $token->{sysid}) {
1048 my $sysid = $token->{sysid};
1049 $sysid =~ tr/A-Z/a-z/;
1050 if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1051 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
1052 ## marked as quirks.
1053 $self->{document}->manakai_compat_mode ('quirks');
1054 !!!cp ('t11');
1055 } else {
1056 !!!cp ('t12');
1057 }
1058 } else {
1059 !!!cp ('t13');
1060 }
1061
1062 ## Go to the "before html" insertion mode.
1063 !!!next-token;
1064 return;
1065 } elsif ({
1066 START_TAG_TOKEN, 1,
1067 END_TAG_TOKEN, 1,
1068 END_OF_FILE_TOKEN, 1,
1069 }->{$token->{type}}) {
1070 !!!cp ('t14');
1071 !!!parse-error (type => 'no DOCTYPE', token => $token);
1072 $self->{document}->manakai_compat_mode ('quirks');
1073 ## Go to the "before html" insertion mode.
1074 ## reprocess
1075 !!!ack-later;
1076 return;
1077 } elsif ($token->{type} == CHARACTER_TOKEN) {
1078 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1079 ## Ignore the token
1080
1081 unless (length $token->{data}) {
1082 !!!cp ('t15');
1083 ## Stay in the insertion mode.
1084 !!!next-token;
1085 redo INITIAL;
1086 } else {
1087 !!!cp ('t16');
1088 }
1089 } else {
1090 !!!cp ('t17');
1091 }
1092
1093 !!!parse-error (type => 'no DOCTYPE', token => $token);
1094 $self->{document}->manakai_compat_mode ('quirks');
1095 ## Go to the "before html" insertion mode.
1096 ## reprocess
1097 return;
1098 } elsif ($token->{type} == COMMENT_TOKEN) {
1099 !!!cp ('t18');
1100 my $comment = $self->{document}->create_comment ($token->{data});
1101 $self->{document}->append_child ($comment);
1102
1103 ## Stay in the insertion mode.
1104 !!!next-token;
1105 redo INITIAL;
1106 } else {
1107 die "$0: $token->{type}: Unknown token type";
1108 }
1109 } # INITIAL
1110
1111 die "$0: _tree_construction_initial: This should be never reached";
1112 } # _tree_construction_initial
1113
1114 sub _tree_construction_root_element ($) {
1115 my $self = shift;
1116
1117 ## NOTE: "before html" insertion mode.
1118
1119 B: {
1120 if ($token->{type} == DOCTYPE_TOKEN) {
1121 !!!cp ('t19');
1122 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1123 ## Ignore the token
1124 ## Stay in the insertion mode.
1125 !!!next-token;
1126 redo B;
1127 } elsif ($token->{type} == COMMENT_TOKEN) {
1128 !!!cp ('t20');
1129 my $comment = $self->{document}->create_comment ($token->{data});
1130 $self->{document}->append_child ($comment);
1131 ## Stay in the insertion mode.
1132 !!!next-token;
1133 redo B;
1134 } elsif ($token->{type} == CHARACTER_TOKEN) {
1135 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1136 ## Ignore the token.
1137
1138 unless (length $token->{data}) {
1139 !!!cp ('t21');
1140 ## Stay in the insertion mode.
1141 !!!next-token;
1142 redo B;
1143 } else {
1144 !!!cp ('t22');
1145 }
1146 } else {
1147 !!!cp ('t23');
1148 }
1149
1150 $self->{application_cache_selection}->(undef);
1151
1152 #
1153 } elsif ($token->{type} == START_TAG_TOKEN) {
1154 if ($token->{tag_name} eq 'html') {
1155 my $root_element;
1156 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
1157 $self->{document}->append_child ($root_element);
1158 push @{$self->{open_elements}},
1159 [$root_element, $el_category->{html}];
1160
1161 if ($token->{attributes}->{manifest}) {
1162 !!!cp ('t24');
1163 $self->{application_cache_selection}
1164 ->($token->{attributes}->{manifest}->{value});
1165 ## ISSUE: Spec is unclear on relative references.
1166 ## According to Hixie (#whatwg 2008-03-19), it should be
1167 ## resolved against the base URI of the document in HTML
1168 ## or xml:base of the element in XHTML.
1169 } else {
1170 !!!cp ('t25');
1171 $self->{application_cache_selection}->(undef);
1172 }
1173
1174 !!!nack ('t25c');
1175
1176 !!!next-token;
1177 return; ## Go to the "before head" insertion mode.
1178 } else {
1179 !!!cp ('t25.1');
1180 #
1181 }
1182 } elsif ({
1183 END_TAG_TOKEN, 1,
1184 END_OF_FILE_TOKEN, 1,
1185 }->{$token->{type}}) {
1186 !!!cp ('t26');
1187 #
1188 } else {
1189 die "$0: $token->{type}: Unknown token type";
1190 }
1191
1192 my $root_element;
1193 !!!create-element ($root_element, $HTML_NS, 'html',, $token);
1194 $self->{document}->append_child ($root_element);
1195 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
1196
1197 $self->{application_cache_selection}->(undef);
1198
1199 ## NOTE: Reprocess the token.
1200 !!!ack-later;
1201 return; ## Go to the "before head" insertion mode.
1202 } # B
1203
1204 die "$0: _tree_construction_root_element: This should never be reached";
1205 } # _tree_construction_root_element
1206
1207 sub _reset_insertion_mode ($) {
1208 my $self = shift;
1209
1210 ## Step 1
1211 my $last;
1212
1213 ## Step 2
1214 my $i = -1;
1215 my $node = $self->{open_elements}->[$i];
1216
1217 ## Step 3
1218 S3: {
1219 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
1220 $last = 1;
1221 if (defined $self->{inner_html_node}) {
1222 !!!cp ('t28');
1223 $node = $self->{inner_html_node};
1224 } else {
1225 die "_reset_insertion_mode: t27";
1226 }
1227 }
1228
1229 ## Step 4..14
1230 my $new_mode;
1231 if ($node->[1] & FOREIGN_EL) {
1232 !!!cp ('t28.1');
1233 ## NOTE: Strictly spaking, the line below only applies to MathML and
1234 ## SVG elements. Currently the HTML syntax supports only MathML and
1235 ## SVG elements as foreigners.
1236 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
1237 } elsif ($node->[1] == TABLE_CELL_EL) {
1238 if ($last) {
1239 !!!cp ('t28.2');
1240 #
1241 } else {
1242 !!!cp ('t28.3');
1243 $new_mode = IN_CELL_IM;
1244 }
1245 } else {
1246 !!!cp ('t28.4');
1247 $new_mode = {
1248 select => IN_SELECT_IM,
1249 ## NOTE: |option| and |optgroup| do not set
1250 ## insertion mode to "in select" by themselves.
1251 tr => IN_ROW_IM,
1252 tbody => IN_TABLE_BODY_IM,
1253 thead => IN_TABLE_BODY_IM,
1254 tfoot => IN_TABLE_BODY_IM,
1255 caption => IN_CAPTION_IM,
1256 colgroup => IN_COLUMN_GROUP_IM,
1257 table => IN_TABLE_IM,
1258 head => IN_BODY_IM, # not in head!
1259 body => IN_BODY_IM,
1260 frameset => IN_FRAMESET_IM,
1261 }->{$node->[0]->manakai_local_name};
1262 }
1263 $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1264
1265 ## Step 15
1266 if ($node->[1] == HTML_EL) {
1267 unless (defined $self->{head_element}) {
1268 !!!cp ('t29');
1269 $self->{insertion_mode} = BEFORE_HEAD_IM;
1270 } else {
1271 ## ISSUE: Can this state be reached?
1272 !!!cp ('t30');
1273 $self->{insertion_mode} = AFTER_HEAD_IM;
1274 }
1275 return;
1276 } else {
1277 !!!cp ('t31');
1278 }
1279
1280 ## Step 16
1281 $self->{insertion_mode} = IN_BODY_IM and return if $last;
1282
1283 ## Step 17
1284 $i--;
1285 $node = $self->{open_elements}->[$i];
1286
1287 ## Step 18
1288 redo S3;
1289 } # S3
1290
1291 die "$0: _reset_insertion_mode: This line should never be reached";
1292 } # _reset_insertion_mode
1293
1294 sub _tree_construction_main ($) {
1295 my $self = shift;
1296
1297 my $active_formatting_elements = [];
1298
1299 my $reconstruct_active_formatting_elements = sub { # MUST
1300 my $insert = shift;
1301
1302 ## Step 1
1303 return unless @$active_formatting_elements;
1304
1305 ## Step 3
1306 my $i = -1;
1307 my $entry = $active_formatting_elements->[$i];
1308
1309 ## Step 2
1310 return if $entry->[0] eq '#marker';
1311 for (@{$self->{open_elements}}) {
1312 if ($entry->[0] eq $_->[0]) {
1313 !!!cp ('t32');
1314 return;
1315 }
1316 }
1317
1318 S4: {
1319 ## Step 4
1320 last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1321
1322 ## Step 5
1323 $i--;
1324 $entry = $active_formatting_elements->[$i];
1325
1326 ## Step 6
1327 if ($entry->[0] eq '#marker') {
1328 !!!cp ('t33_1');
1329 #
1330 } else {
1331 my $in_open_elements;
1332 OE: for (@{$self->{open_elements}}) {
1333 if ($entry->[0] eq $_->[0]) {
1334 !!!cp ('t33');
1335 $in_open_elements = 1;
1336 last OE;
1337 }
1338 }
1339 if ($in_open_elements) {
1340 !!!cp ('t34');
1341 #
1342 } else {
1343 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
1344 !!!cp ('t35');
1345 redo S4;
1346 }
1347 }
1348
1349 ## Step 7
1350 $i++;
1351 $entry = $active_formatting_elements->[$i];
1352 } # S4
1353
1354 S7: {
1355 ## Step 8
1356 my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1357
1358 ## Step 9
1359 $insert->($clone->[0]);
1360 push @{$self->{open_elements}}, $clone;
1361
1362 ## Step 10
1363 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1364
1365 ## Step 11
1366 unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1367 !!!cp ('t36');
1368 ## Step 7'
1369 $i++;
1370 $entry = $active_formatting_elements->[$i];
1371
1372 redo S7;
1373 }
1374
1375 !!!cp ('t37');
1376 } # S7
1377 }; # $reconstruct_active_formatting_elements
1378
1379 my $clear_up_to_marker = sub {
1380 for (reverse 0..$#$active_formatting_elements) {
1381 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1382 !!!cp ('t38');
1383 splice @$active_formatting_elements, $_;
1384 return;
1385 }
1386 }
1387
1388 !!!cp ('t39');
1389 }; # $clear_up_to_marker
1390
1391 my $insert;
1392
1393 my $parse_rcdata = sub ($) {
1394 my ($content_model_flag) = @_;
1395
1396 ## Step 1
1397 my $start_tag_name = $token->{tag_name};
1398 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
1399
1400 ## Step 2
1401 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
1402 delete $self->{escape}; # MUST
1403
1404 ## Step 3, 4
1405 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1406
1407 !!!nack ('t40.1');
1408 !!!next-token;
1409 }; # $parse_rcdata
1410
1411 my $script_start_tag = sub () {
1412 ## Step 1
1413 my $script_el;
1414 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
1415
1416 ## Step 2
1417 ## TODO: mark as "parser-inserted"
1418
1419 ## Step 3
1420 ## TODO: Mark as "already executed", if ...
1421
1422 ## Step 4
1423 $insert->($script_el);
1424
1425 ## ISSUE: $script_el is not put into the stack
1426 push @{$self->{open_elements}}, [$script_el, $el_category->{script}];
1427
1428 ## Step 5
1429 $self->{content_model} = CDATA_CONTENT_MODEL;
1430 delete $self->{escape}; # MUST
1431
1432 ## Step 6-7
1433 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1434
1435 !!!nack ('t40.2');
1436 !!!next-token;
1437 }; # $script_start_tag
1438
1439 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
1440 ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
1441 ## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted.
1442 my $open_tables = [[$self->{open_elements}->[0]->[0]]];
1443
1444 my $formatting_end_tag = sub {
1445 my $end_tag_token = shift;
1446 my $tag_name = $end_tag_token->{tag_name};
1447
1448 ## NOTE: The adoption agency algorithm (AAA).
1449
1450 FET: {
1451 ## Step 1
1452 my $formatting_element;
1453 my $formatting_element_i_in_active;
1454 AFE: for (reverse 0..$#$active_formatting_elements) {
1455 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1456 !!!cp ('t52');
1457 last AFE;
1458 } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
1459 eq $tag_name) {
1460 !!!cp ('t51');
1461 $formatting_element = $active_formatting_elements->[$_];
1462 $formatting_element_i_in_active = $_;
1463 last AFE;
1464 }
1465 } # AFE
1466 unless (defined $formatting_element) {
1467 !!!cp ('t53');
1468 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
1469 ## Ignore the token
1470 !!!next-token;
1471 return;
1472 }
1473 ## has an element in scope
1474 my $in_scope = 1;
1475 my $formatting_element_i_in_open;
1476 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1477 my $node = $self->{open_elements}->[$_];
1478 if ($node->[0] eq $formatting_element->[0]) {
1479 if ($in_scope) {
1480 !!!cp ('t54');
1481 $formatting_element_i_in_open = $_;
1482 last INSCOPE;
1483 } else { # in open elements but not in scope
1484 !!!cp ('t55');
1485 !!!parse-error (type => 'unmatched end tag',
1486 text => $token->{tag_name},
1487 token => $end_tag_token);
1488 ## Ignore the token
1489 !!!next-token;
1490 return;
1491 }
1492 } elsif ($node->[1] & SCOPING_EL) {
1493 !!!cp ('t56');
1494 $in_scope = 0;
1495 }
1496 } # INSCOPE
1497 unless (defined $formatting_element_i_in_open) {
1498 !!!cp ('t57');
1499 !!!parse-error (type => 'unmatched end tag',
1500 text => $token->{tag_name},
1501 token => $end_tag_token);
1502 pop @$active_formatting_elements; # $formatting_element
1503 !!!next-token; ## TODO: ok?
1504 return;
1505 }
1506 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1507 !!!cp ('t58');
1508 !!!parse-error (type => 'not closed',
1509 text => $self->{open_elements}->[-1]->[0]
1510 ->manakai_local_name,
1511 token => $end_tag_token);
1512 }
1513
1514 ## Step 2
1515 my $furthest_block;
1516 my $furthest_block_i_in_open;
1517 OE: for (reverse 0..$#{$self->{open_elements}}) {
1518 my $node = $self->{open_elements}->[$_];
1519 if (not ($node->[1] & FORMATTING_EL) and
1520 #not $phrasing_category->{$node->[1]} and
1521 ($node->[1] & SPECIAL_EL or
1522 $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
1523 !!!cp ('t59');
1524 $furthest_block = $node;
1525 $furthest_block_i_in_open = $_;
1526 ## NOTE: The topmost (eldest) node.
1527 } elsif ($node->[0] eq $formatting_element->[0]) {
1528 !!!cp ('t60');
1529 last OE;
1530 }
1531 } # OE
1532
1533 ## Step 3
1534 unless (defined $furthest_block) { # MUST
1535 !!!cp ('t61');
1536 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
1537 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1538 !!!next-token;
1539 return;
1540 }
1541
1542 ## Step 4
1543 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
1544
1545 ## Step 5
1546 my $furthest_block_parent = $furthest_block->[0]->parent_node;
1547 if (defined $furthest_block_parent) {
1548 !!!cp ('t62');
1549 $furthest_block_parent->remove_child ($furthest_block->[0]);
1550 }
1551
1552 ## Step 6
1553 my $bookmark_prev_el
1554 = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1555 ->[0];
1556
1557 ## Step 7
1558 my $node = $furthest_block;
1559 my $node_i_in_open = $furthest_block_i_in_open;
1560 my $last_node = $furthest_block;
1561 S7: {
1562 ## Step 1
1563 $node_i_in_open--;
1564 $node = $self->{open_elements}->[$node_i_in_open];
1565
1566 ## Step 2
1567 my $node_i_in_active;
1568 S7S2: {
1569 for (reverse 0..$#$active_formatting_elements) {
1570 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1571 !!!cp ('t63');
1572 $node_i_in_active = $_;
1573 last S7S2;
1574 }
1575 }
1576 splice @{$self->{open_elements}}, $node_i_in_open, 1;
1577 redo S7;
1578 } # S7S2
1579
1580 ## Step 3
1581 last S7 if $node->[0] eq $formatting_element->[0];
1582
1583 ## Step 4
1584 if ($last_node->[0] eq $furthest_block->[0]) {
1585 !!!cp ('t64');
1586 $bookmark_prev_el = $node->[0];
1587 }
1588
1589 ## Step 5
1590 if ($node->[0]->has_child_nodes ()) {
1591 !!!cp ('t65');
1592 my $clone = [$node->[0]->clone_node (0), $node->[1]];
1593 $active_formatting_elements->[$node_i_in_active] = $clone;
1594 $self->{open_elements}->[$node_i_in_open] = $clone;
1595 $node = $clone;
1596 }
1597
1598 ## Step 6
1599 $node->[0]->append_child ($last_node->[0]);
1600
1601 ## Step 7
1602 $last_node = $node;
1603
1604 ## Step 8
1605 redo S7;
1606 } # S7
1607
1608 ## Step 8
1609 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
1610 my $foster_parent_element;
1611 my $next_sibling;
1612 OE: for (reverse 0..$#{$self->{open_elements}}) {
1613 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1614 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1615 if (defined $parent and $parent->node_type == 1) {
1616 !!!cp ('t65.1');
1617 $foster_parent_element = $parent;
1618 $next_sibling = $self->{open_elements}->[$_]->[0];
1619 } else {
1620 !!!cp ('t65.2');
1621 $foster_parent_element
1622 = $self->{open_elements}->[$_ - 1]->[0];
1623 }
1624 last OE;
1625 }
1626 } # OE
1627 $foster_parent_element = $self->{open_elements}->[0]->[0]
1628 unless defined $foster_parent_element;
1629 $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
1630 $open_tables->[-1]->[1] = 1; # tainted
1631 } else {
1632 !!!cp ('t65.3');
1633 $common_ancestor_node->[0]->append_child ($last_node->[0]);
1634 }
1635
1636 ## Step 9
1637 my $clone = [$formatting_element->[0]->clone_node (0),
1638 $formatting_element->[1]];
1639
1640 ## Step 10
1641 my @cn = @{$furthest_block->[0]->child_nodes};
1642 $clone->[0]->append_child ($_) for @cn;
1643
1644 ## Step 11
1645 $furthest_block->[0]->append_child ($clone->[0]);
1646
1647 ## Step 12
1648 my $i;
1649 AFE: for (reverse 0..$#$active_formatting_elements) {
1650 if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1651 !!!cp ('t66');
1652 splice @$active_formatting_elements, $_, 1;
1653 $i-- and last AFE if defined $i;
1654 } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1655 !!!cp ('t67');
1656 $i = $_;
1657 }
1658 } # AFE
1659 splice @$active_formatting_elements, $i + 1, 0, $clone;
1660
1661 ## Step 13
1662 undef $i;
1663 OE: for (reverse 0..$#{$self->{open_elements}}) {
1664 if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
1665 !!!cp ('t68');
1666 splice @{$self->{open_elements}}, $_, 1;
1667 $i-- and last OE if defined $i;
1668 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
1669 !!!cp ('t69');
1670 $i = $_;
1671 }
1672 } # OE
1673 splice @{$self->{open_elements}}, $i + 1, 0, $clone;
1674
1675 ## Step 14
1676 redo FET;
1677 } # FET
1678 }; # $formatting_end_tag
1679
1680 $insert = my $insert_to_current = sub {
1681 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
1682 }; # $insert_to_current
1683
1684 my $insert_to_foster = sub {
1685 my $child = shift;
1686 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1687 # MUST
1688 my $foster_parent_element;
1689 my $next_sibling;
1690 OE: for (reverse 0..$#{$self->{open_elements}}) {
1691 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1692 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1693 if (defined $parent and $parent->node_type == 1) {
1694 !!!cp ('t70');
1695 $foster_parent_element = $parent;
1696 $next_sibling = $self->{open_elements}->[$_]->[0];
1697 } else {
1698 !!!cp ('t71');
1699 $foster_parent_element
1700 = $self->{open_elements}->[$_ - 1]->[0];
1701 }
1702 last OE;
1703 }
1704 } # OE
1705 $foster_parent_element = $self->{open_elements}->[0]->[0]
1706 unless defined $foster_parent_element;
1707 $foster_parent_element->insert_before
1708 ($child, $next_sibling);
1709 $open_tables->[-1]->[1] = 1; # tainted
1710 } else {
1711 !!!cp ('t72');
1712 $self->{open_elements}->[-1]->[0]->append_child ($child);
1713 }
1714 }; # $insert_to_foster
1715
1716 ## NOTE: Insert a character (MUST): When a character is inserted, if
1717 ## the last node that was inserted by the parser is a Text node and
1718 ## the character has to be inserted after that node, then the
1719 ## character is appended to the Text node. However, if any other
1720 ## node is inserted by the parser, then a new Text node is created
1721 ## and the character is appended as that Text node. If I'm not
1722 ## wrong, for a parser with scripting disabled, there are only two
1723 ## cases where this occurs. One is the case where an element node
1724 ## is inserted to the |head| element. This is covered by using the
1725 ## |$self->{head_element_inserted}| flag. Another is the case where
1726 ## an element or comment is inserted into the |table| subtree while
1727 ## foster parenting happens. This is covered by using the [2] flag
1728 ## of the |$open_tables| structure. All other cases are handled
1729 ## simply by calling |manakai_append_text| method.
1730
1731 ## TODO: |<body><script>document.write("a<br>");
1732 ## document.body.removeChild (document.body.lastChild);
1733 ## document.write ("b")</script>|
1734
1735 B: while (1) {
1736 if ($token->{type} == DOCTYPE_TOKEN) {
1737 !!!cp ('t73');
1738 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1739 ## Ignore the token
1740 ## Stay in the phase
1741 !!!next-token;
1742 next B;
1743 } elsif ($token->{type} == START_TAG_TOKEN and
1744 $token->{tag_name} eq 'html') {
1745 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
1746 !!!cp ('t79');
1747 !!!parse-error (type => 'after html', text => 'html', token => $token);
1748 $self->{insertion_mode} = AFTER_BODY_IM;
1749 } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
1750 !!!cp ('t80');
1751 !!!parse-error (type => 'after html', text => 'html', token => $token);
1752 $self->{insertion_mode} = AFTER_FRAMESET_IM;
1753 } else {
1754 !!!cp ('t81');
1755 }
1756
1757 !!!cp ('t82');
1758 !!!parse-error (type => 'not first start tag', token => $token);
1759 my $top_el = $self->{open_elements}->[0]->[0];
1760 for my $attr_name (keys %{$token->{attributes}}) {
1761 unless ($top_el->has_attribute_ns (undef, $attr_name)) {
1762 !!!cp ('t84');
1763 $top_el->set_attribute_ns
1764 (undef, [undef, $attr_name],
1765 $token->{attributes}->{$attr_name}->{value});
1766 }
1767 }
1768 !!!nack ('t84.1');
1769 !!!next-token;
1770 next B;
1771 } elsif ($token->{type} == COMMENT_TOKEN) {
1772 my $comment = $self->{document}->create_comment ($token->{data});
1773 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
1774 !!!cp ('t85');
1775 $self->{document}->append_child ($comment);
1776 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
1777 !!!cp ('t86');
1778 $self->{open_elements}->[0]->[0]->append_child ($comment);
1779 } else {
1780 !!!cp ('t87');
1781 $self->{open_elements}->[-1]->[0]->append_child ($comment);
1782 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
1783 }
1784 !!!next-token;
1785 next B;
1786 } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1787 if ($token->{type} == CHARACTER_TOKEN) {
1788 $token->{data} =~ s/^\x0A// if $self->{ignore_newline};
1789 delete $self->{ignore_newline};
1790
1791 if (length $token->{data}) {
1792 !!!cp ('t43');
1793 $self->{open_elements}->[-1]->[0]->manakai_append_text
1794 ($token->{data});
1795 } else {
1796 !!!cp ('t43.1');
1797 }
1798 !!!next-token;
1799 next B;
1800 } elsif ($token->{type} == END_TAG_TOKEN) {
1801 delete $self->{ignore_newline};
1802
1803 if ($token->{tag_name} eq 'script') {
1804 !!!cp ('t50');
1805
1806 ## Para 1-2
1807 my $script = pop @{$self->{open_elements}};
1808
1809 ## Para 3
1810 $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1811
1812 ## Para 4
1813 ## TODO: $old_insertion_point = $current_insertion_point;
1814 ## TODO: $current_insertion_point = just before $self->{nc};
1815
1816 ## Para 5
1817 ## TODO: Run the $script->[0].
1818
1819 ## Para 6
1820 ## TODO: $current_insertion_point = $old_insertion_point;
1821
1822 ## Para 7
1823 ## TODO: if ($pending_external_script) {
1824 ## TODO: ...
1825 ## TODO: }
1826
1827 !!!next-token;
1828 next B;
1829 } else {
1830 !!!cp ('t42');
1831
1832 pop @{$self->{open_elements}};
1833
1834 $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1835 !!!next-token;
1836 next B;
1837 }
1838 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1839 delete $self->{ignore_newline};
1840
1841 !!!cp ('t44');
1842 !!!parse-error (type => 'not closed',
1843 text => $self->{open_elements}->[-1]->[0]
1844 ->manakai_local_name,
1845 token => $token);
1846
1847 #if ($self->{open_elements}->[-1]->[1] == SCRIPT_EL) {
1848 # ## TODO: Mark as "already executed"
1849 #}
1850
1851 pop @{$self->{open_elements}};
1852
1853 $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1854 ## Reprocess.
1855 next B;
1856 } else {
1857 die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type";
1858 }
1859 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
1860 if ($token->{type} == CHARACTER_TOKEN) {
1861 !!!cp ('t87.1');
1862 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
1863 !!!next-token;
1864 next B;
1865 } elsif ($token->{type} == START_TAG_TOKEN) {
1866 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
1867 $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
1868 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1869 ($token->{tag_name} eq 'svg' and
1870 $self->{open_elements}->[-1]->[1] == MML_AXML_EL)) {
1871 ## NOTE: "using the rules for secondary insertion mode"then"continue"
1872 !!!cp ('t87.2');
1873 #
1874 } elsif ({
1875 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
1876 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
1877 em => 1, embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1,
1878 h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
1879 img => 1, li => 1, listing => 1, menu => 1, meta => 1,
1880 nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
1881 small => 1, span => 1, strong => 1, strike => 1, sub => 1,
1882 sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
1883 }->{$token->{tag_name}}) {
1884 !!!cp ('t87.2');
1885 !!!parse-error (type => 'not closed',
1886 text => $self->{open_elements}->[-1]->[0]
1887 ->manakai_local_name,
1888 token => $token);
1889
1890 pop @{$self->{open_elements}}
1891 while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1892
1893 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1894 ## Reprocess.
1895 next B;
1896 } else {
1897 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
1898 my $tag_name = $token->{tag_name};
1899 if ($nsuri eq $SVG_NS) {
1900 $tag_name = {
1901 altglyph => 'altGlyph',
1902 altglyphdef => 'altGlyphDef',
1903 altglyphitem => 'altGlyphItem',
1904 animatecolor => 'animateColor',
1905 animatemotion => 'animateMotion',
1906 animatetransform => 'animateTransform',
1907 clippath => 'clipPath',
1908 feblend => 'feBlend',
1909 fecolormatrix => 'feColorMatrix',
1910 fecomponenttransfer => 'feComponentTransfer',
1911 fecomposite => 'feComposite',
1912 feconvolvematrix => 'feConvolveMatrix',
1913 fediffuselighting => 'feDiffuseLighting',
1914 fedisplacementmap => 'feDisplacementMap',
1915 fedistantlight => 'feDistantLight',
1916 feflood => 'feFlood',
1917 fefunca => 'feFuncA',
1918 fefuncb => 'feFuncB',
1919 fefuncg => 'feFuncG',
1920 fefuncr => 'feFuncR',
1921 fegaussianblur => 'feGaussianBlur',
1922 feimage => 'feImage',
1923 femerge => 'feMerge',
1924 femergenode => 'feMergeNode',
1925 femorphology => 'feMorphology',
1926 feoffset => 'feOffset',
1927 fepointlight => 'fePointLight',
1928 fespecularlighting => 'feSpecularLighting',
1929 fespotlight => 'feSpotLight',
1930 fetile => 'feTile',
1931 feturbulence => 'feTurbulence',
1932 foreignobject => 'foreignObject',
1933 glyphref => 'glyphRef',
1934 lineargradient => 'linearGradient',
1935 radialgradient => 'radialGradient',
1936 #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
1937 textpath => 'textPath',
1938 }->{$tag_name} || $tag_name;
1939 }
1940
1941 ## "adjust SVG attributes" (SVG only) - done in insert-element-f
1942
1943 ## "adjust foreign attributes" - done in insert-element-f
1944
1945 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
1946
1947 if ($self->{self_closing}) {
1948 pop @{$self->{open_elements}};
1949 !!!ack ('t87.3');
1950 } else {
1951 !!!cp ('t87.4');
1952 }
1953
1954 !!!next-token;
1955 next B;
1956 }
1957 } elsif ($token->{type} == END_TAG_TOKEN) {
1958 ## NOTE: "using the rules for secondary insertion mode" then "continue"
1959 !!!cp ('t87.5');
1960 #
1961 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1962 !!!cp ('t87.6');
1963 !!!parse-error (type => 'not closed',
1964 text => $self->{open_elements}->[-1]->[0]
1965 ->manakai_local_name,
1966 token => $token);
1967
1968 pop @{$self->{open_elements}}
1969 while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1970
1971 ## NOTE: |<span><svg>| ... two parse errors, |<svg>| ... a parse error.
1972
1973 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1974 ## Reprocess.
1975 next B;
1976 } else {
1977 die "$0: $token->{type}: Unknown token type";
1978 }
1979 }
1980
1981 if ($self->{insertion_mode} & HEAD_IMS) {
1982 if ($token->{type} == CHARACTER_TOKEN) {
1983 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1984 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
1985 if ($self->{head_element_inserted}) {
1986 !!!cp ('t88.3');
1987 $self->{open_elements}->[-1]->[0]->append_child
1988 ($self->{document}->create_text_node ($1));
1989 delete $self->{head_element_inserted};
1990 ## NOTE: |</head> <link> |
1991 #
1992 } else {
1993 !!!cp ('t88.2');
1994 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
1995 ## NOTE: |</head> &#x20;|
1996 #
1997 }
1998 } else {
1999 !!!cp ('t88.1');
2000 ## Ignore the token.
2001 #
2002 }
2003 unless (length $token->{data}) {
2004 !!!cp ('t88');
2005 !!!next-token;
2006 next B;
2007 }
2008 ## TODO: set $token->{column} appropriately
2009 }
2010
2011 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2012 !!!cp ('t89');
2013 ## As if <head>
2014 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2015 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2016 push @{$self->{open_elements}},
2017 [$self->{head_element}, $el_category->{head}];
2018
2019 ## Reprocess in the "in head" insertion mode...
2020 pop @{$self->{open_elements}};
2021
2022 ## Reprocess in the "after head" insertion mode...
2023 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2024 !!!cp ('t90');
2025 ## As if </noscript>
2026 pop @{$self->{open_elements}};
2027 !!!parse-error (type => 'in noscript:#text', token => $token);
2028
2029 ## Reprocess in the "in head" insertion mode...
2030 ## As if </head>
2031 pop @{$self->{open_elements}};
2032
2033 ## Reprocess in the "after head" insertion mode...
2034 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2035 !!!cp ('t91');
2036 pop @{$self->{open_elements}};
2037
2038 ## Reprocess in the "after head" insertion mode...
2039 } else {
2040 !!!cp ('t92');
2041 }
2042
2043 ## "after head" insertion mode
2044 ## As if <body>
2045 !!!insert-element ('body',, $token);
2046 $self->{insertion_mode} = IN_BODY_IM;
2047 ## reprocess
2048 next B;
2049 } elsif ($token->{type} == START_TAG_TOKEN) {
2050 if ($token->{tag_name} eq 'head') {
2051 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2052 !!!cp ('t93');
2053 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
2054 $self->{open_elements}->[-1]->[0]->append_child
2055 ($self->{head_element});
2056 push @{$self->{open_elements}},
2057 [$self->{head_element}, $el_category->{head}];
2058 $self->{insertion_mode} = IN_HEAD_IM;
2059 !!!nack ('t93.1');
2060 !!!next-token;
2061 next B;
2062 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2063 !!!cp ('t93.2');
2064 !!!parse-error (type => 'after head', text => 'head',
2065 token => $token);
2066 ## Ignore the token
2067 !!!nack ('t93.3');
2068 !!!next-token;
2069 next B;
2070 } else {
2071 !!!cp ('t95');
2072 !!!parse-error (type => 'in head:head',
2073 token => $token); # or in head noscript
2074 ## Ignore the token
2075 !!!nack ('t95.1');
2076 !!!next-token;
2077 next B;
2078 }
2079 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2080 !!!cp ('t96');
2081 ## As if <head>
2082 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2083 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2084 push @{$self->{open_elements}},
2085 [$self->{head_element}, $el_category->{head}];
2086
2087 $self->{insertion_mode} = IN_HEAD_IM;
2088 ## Reprocess in the "in head" insertion mode...
2089 } else {
2090 !!!cp ('t97');
2091 }
2092
2093 if ($token->{tag_name} eq 'base') {
2094 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2095 !!!cp ('t98');
2096 ## As if </noscript>
2097 pop @{$self->{open_elements}};
2098 !!!parse-error (type => 'in noscript', text => 'base',
2099 token => $token);
2100
2101 $self->{insertion_mode} = IN_HEAD_IM;
2102 ## Reprocess in the "in head" insertion mode...
2103 } else {
2104 !!!cp ('t99');
2105 }
2106
2107 ## NOTE: There is a "as if in head" code clone.
2108 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2109 !!!cp ('t100');
2110 !!!parse-error (type => 'after head',
2111 text => $token->{tag_name}, token => $token);
2112 push @{$self->{open_elements}},
2113 [$self->{head_element}, $el_category->{head}];
2114 $self->{head_element_inserted} = 1;
2115 } else {
2116 !!!cp ('t101');
2117 }
2118 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2119 pop @{$self->{open_elements}};
2120 pop @{$self->{open_elements}} # <head>
2121 if $self->{insertion_mode} == AFTER_HEAD_IM;
2122 !!!nack ('t101.1');
2123 !!!next-token;
2124 next B;
2125 } elsif ($token->{tag_name} eq 'link') {
2126 ## NOTE: There is a "as if in head" code clone.
2127 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2128 !!!cp ('t102');
2129 !!!parse-error (type => 'after head',
2130 text => $token->{tag_name}, token => $token);
2131 push @{$self->{open_elements}},
2132 [$self->{head_element}, $el_category->{head}];
2133 $self->{head_element_inserted} = 1;
2134 } else {
2135 !!!cp ('t103');
2136 }
2137 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2138 pop @{$self->{open_elements}};
2139 pop @{$self->{open_elements}} # <head>
2140 if $self->{insertion_mode} == AFTER_HEAD_IM;
2141 !!!ack ('t103.1');
2142 !!!next-token;
2143 next B;
2144 } elsif ($token->{tag_name} eq 'command' or
2145 $token->{tag_name} eq 'eventsource') {
2146 if ($self->{insertion_mode} == IN_HEAD_IM) {
2147 ## NOTE: If the insertion mode at the time of the emission
2148 ## of the token was "before head", $self->{insertion_mode}
2149 ## is already changed to |IN_HEAD_IM|.
2150
2151 ## NOTE: There is a "as if in head" code clone.
2152 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2153 pop @{$self->{open_elements}};
2154 pop @{$self->{open_elements}} # <head>
2155 if $self->{insertion_mode} == AFTER_HEAD_IM;
2156 !!!ack ('t103.2');
2157 !!!next-token;
2158 next B;
2159 } else {
2160 ## NOTE: "in head noscript" or "after head" insertion mode
2161 ## - in these cases, these tags are treated as same as
2162 ## normal in-body tags.
2163 !!!cp ('t103.3');
2164 #
2165 }
2166 } elsif ($token->{tag_name} eq 'meta') {
2167 ## NOTE: There is a "as if in head" code clone.
2168 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2169 !!!cp ('t104');
2170 !!!parse-error (type => 'after head',
2171 text => $token->{tag_name}, token => $token);
2172 push @{$self->{open_elements}},
2173 [$self->{head_element}, $el_category->{head}];
2174 $self->{head_element_inserted} = 1;
2175 } else {
2176 !!!cp ('t105');
2177 }
2178 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2179 my $meta_el = pop @{$self->{open_elements}};
2180
2181 unless ($self->{confident}) {
2182 if ($token->{attributes}->{charset}) {
2183 !!!cp ('t106');
2184 ## NOTE: Whether the encoding is supported or not is handled
2185 ## in the {change_encoding} callback.
2186 $self->{change_encoding}
2187 ->($self, $token->{attributes}->{charset}->{value},
2188 $token);
2189
2190 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2191 ->set_user_data (manakai_has_reference =>
2192 $token->{attributes}->{charset}
2193 ->{has_reference});
2194 } elsif ($token->{attributes}->{content}) {
2195 if ($token->{attributes}->{content}->{value}
2196 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2197 [\x09\x0A\x0C\x0D\x20]*=
2198 [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
2199 ([^"'\x09\x0A\x0C\x0D\x20]
2200 [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
2201 !!!cp ('t107');
2202 ## NOTE: Whether the encoding is supported or not is handled
2203 ## in the {change_encoding} callback.
2204 $self->{change_encoding}
2205 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
2206 $token);
2207 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2208 ->set_user_data (manakai_has_reference =>
2209 $token->{attributes}->{content}
2210 ->{has_reference});
2211 } else {
2212 !!!cp ('t108');
2213 }
2214 }
2215 } else {
2216 if ($token->{attributes}->{charset}) {
2217 !!!cp ('t109');
2218 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2219 ->set_user_data (manakai_has_reference =>
2220 $token->{attributes}->{charset}
2221 ->{has_reference});
2222 }
2223 if ($token->{attributes}->{content}) {
2224 !!!cp ('t110');
2225 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2226 ->set_user_data (manakai_has_reference =>
2227 $token->{attributes}->{content}
2228 ->{has_reference});
2229 }
2230 }
2231
2232 pop @{$self->{open_elements}} # <head>
2233 if $self->{insertion_mode} == AFTER_HEAD_IM;
2234 !!!ack ('t110.1');
2235 !!!next-token;
2236 next B;
2237 } elsif ($token->{tag_name} eq 'title') {
2238 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2239 !!!cp ('t111');
2240 ## As if </noscript>
2241 pop @{$self->{open_elements}};
2242 !!!parse-error (type => 'in noscript', text => 'title',
2243 token => $token);
2244
2245 $self->{insertion_mode} = IN_HEAD_IM;
2246 ## Reprocess in the "in head" insertion mode...
2247 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2248 !!!cp ('t112');
2249 !!!parse-error (type => 'after head',
2250 text => $token->{tag_name}, token => $token);
2251 push @{$self->{open_elements}},
2252 [$self->{head_element}, $el_category->{head}];
2253 $self->{head_element_inserted} = 1;
2254 } else {
2255 !!!cp ('t113');
2256 }
2257
2258 ## NOTE: There is a "as if in head" code clone.
2259 $parse_rcdata->(RCDATA_CONTENT_MODEL);
2260 ## ISSUE: A spec bug [Bug 6038]
2261 splice @{$self->{open_elements}}, -2, 1, () # <head>
2262 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2263 next B;
2264 } elsif ($token->{tag_name} eq 'style' or
2265 $token->{tag_name} eq 'noframes') {
2266 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
2267 ## insertion mode IN_HEAD_IM)
2268 ## NOTE: There is a "as if in head" code clone.
2269 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2270 !!!cp ('t114');
2271 !!!parse-error (type => 'after head',
2272 text => $token->{tag_name}, token => $token);
2273 push @{$self->{open_elements}},
2274 [$self->{head_element}, $el_category->{head}];
2275 $self->{head_element_inserted} = 1;
2276 } else {
2277 !!!cp ('t115');
2278 }
2279 $parse_rcdata->(CDATA_CONTENT_MODEL);
2280 ## ISSUE: A spec bug [Bug 6038]
2281 splice @{$self->{open_elements}}, -2, 1, () # <head>
2282 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2283 next B;
2284 } elsif ($token->{tag_name} eq 'noscript') {
2285 if ($self->{insertion_mode} == IN_HEAD_IM) {
2286 !!!cp ('t116');
2287 ## NOTE: and scripting is disalbed
2288 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2289 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
2290 !!!nack ('t116.1');
2291 !!!next-token;
2292 next B;
2293 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2294 !!!cp ('t117');
2295 !!!parse-error (type => 'in noscript', text => 'noscript',
2296 token => $token);
2297 ## Ignore the token
2298 !!!nack ('t117.1');
2299 !!!next-token;
2300 next B;
2301 } else {
2302 !!!cp ('t118');
2303 #
2304 }
2305 } elsif ($token->{tag_name} eq 'script') {
2306 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2307 !!!cp ('t119');
2308 ## As if </noscript>
2309 pop @{$self->{open_elements}};
2310 !!!parse-error (type => 'in noscript', text => 'script',
2311 token => $token);
2312
2313 $self->{insertion_mode} = IN_HEAD_IM;
2314 ## Reprocess in the "in head" insertion mode...
2315 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2316 !!!cp ('t120');
2317 !!!parse-error (type => 'after head',
2318 text => $token->{tag_name}, token => $token);
2319 push @{$self->{open_elements}},
2320 [$self->{head_element}, $el_category->{head}];
2321 $self->{head_element_inserted} = 1;
2322 } else {
2323 !!!cp ('t121');
2324 }
2325
2326 ## NOTE: There is a "as if in head" code clone.
2327 $script_start_tag->();
2328 ## ISSUE: A spec bug [Bug 6038]
2329 splice @{$self->{open_elements}}, -2, 1 # <head>
2330 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2331 next B;
2332 } elsif ($token->{tag_name} eq 'body' or
2333 $token->{tag_name} eq 'frameset') {
2334 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2335 !!!cp ('t122');
2336 ## As if </noscript>
2337 pop @{$self->{open_elements}};
2338 !!!parse-error (type => 'in noscript',
2339 text => $token->{tag_name}, token => $token);
2340
2341 ## Reprocess in the "in head" insertion mode...
2342 ## As if </head>
2343 pop @{$self->{open_elements}};
2344
2345 ## Reprocess in the "after head" insertion mode...
2346 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2347 !!!cp ('t124');
2348 pop @{$self->{open_elements}};
2349
2350 ## Reprocess in the "after head" insertion mode...
2351 } else {
2352 !!!cp ('t125');
2353 }
2354
2355 ## "after head" insertion mode
2356 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2357 if ($token->{tag_name} eq 'body') {
2358 !!!cp ('t126');
2359 $self->{insertion_mode} = IN_BODY_IM;
2360 } elsif ($token->{tag_name} eq 'frameset') {
2361 !!!cp ('t127');
2362 $self->{insertion_mode} = IN_FRAMESET_IM;
2363 } else {
2364 die "$0: tag name: $self->{tag_name}";
2365 }
2366 !!!nack ('t127.1');
2367 !!!next-token;
2368 next B;
2369 } else {
2370 !!!cp ('t128');
2371 #
2372 }
2373
2374 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2375 !!!cp ('t129');
2376 ## As if </noscript>
2377 pop @{$self->{open_elements}};
2378 !!!parse-error (type => 'in noscript:/',
2379 text => $token->{tag_name}, token => $token);
2380
2381 ## Reprocess in the "in head" insertion mode...
2382 ## As if </head>
2383 pop @{$self->{open_elements}};
2384
2385 ## Reprocess in the "after head" insertion mode...
2386 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2387 !!!cp ('t130');
2388 ## As if </head>
2389 pop @{$self->{open_elements}};
2390
2391 ## Reprocess in the "after head" insertion mode...
2392 } else {
2393 !!!cp ('t131');
2394 }
2395
2396 ## "after head" insertion mode
2397 ## As if <body>
2398 !!!insert-element ('body',, $token);
2399 $self->{insertion_mode} = IN_BODY_IM;
2400 ## reprocess
2401 !!!ack-later;
2402 next B;
2403 } elsif ($token->{type} == END_TAG_TOKEN) {
2404 if ($token->{tag_name} eq 'head') {
2405 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2406 !!!cp ('t132');
2407 ## As if <head>
2408 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2409 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2410 push @{$self->{open_elements}},
2411 [$self->{head_element}, $el_category->{head}];
2412
2413 ## Reprocess in the "in head" insertion mode...
2414 pop @{$self->{open_elements}};
2415 $self->{insertion_mode} = AFTER_HEAD_IM;
2416 !!!next-token;
2417 next B;
2418 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2419 !!!cp ('t133');
2420 ## As if </noscript>
2421 pop @{$self->{open_elements}};
2422 !!!parse-error (type => 'in noscript:/',
2423 text => 'head', token => $token);
2424
2425 ## Reprocess in the "in head" insertion mode...
2426 pop @{$self->{open_elements}};
2427 $self->{insertion_mode} = AFTER_HEAD_IM;
2428 !!!next-token;
2429 next B;
2430 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2431 !!!cp ('t134');
2432 pop @{$self->{open_elements}};
2433 $self->{insertion_mode} = AFTER_HEAD_IM;
2434 !!!next-token;
2435 next B;
2436 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2437 !!!cp ('t134.1');
2438 !!!parse-error (type => 'unmatched end tag', text => 'head',
2439 token => $token);
2440 ## Ignore the token
2441 !!!next-token;
2442 next B;
2443 } else {
2444 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2445 }
2446 } elsif ($token->{tag_name} eq 'noscript') {
2447 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2448 !!!cp ('t136');
2449 pop @{$self->{open_elements}};
2450 $self->{insertion_mode} = IN_HEAD_IM;
2451 !!!next-token;
2452 next B;
2453 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
2454 $self->{insertion_mode} == AFTER_HEAD_IM) {
2455 !!!cp ('t137');
2456 !!!parse-error (type => 'unmatched end tag',
2457 text => 'noscript', token => $token);
2458 ## Ignore the token ## ISSUE: An issue in the spec.
2459 !!!next-token;
2460 next B;
2461 } else {
2462 !!!cp ('t138');
2463 #
2464 }
2465 } elsif ({
2466 body => 1, html => 1,
2467 }->{$token->{tag_name}}) {
2468 ## TODO: This branch is entirely redundant.
2469 if ($self->{insertion_mode} == BEFORE_HEAD_IM or
2470 $self->{insertion_mode} == IN_HEAD_IM or
2471 $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2472 !!!cp ('t140');
2473 !!!parse-error (type => 'unmatched end tag',
2474 text => $token->{tag_name}, token => $token);
2475 ## Ignore the token
2476 !!!next-token;
2477 next B;
2478 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2479 !!!cp ('t140.1');
2480 !!!parse-error (type => 'unmatched end tag',
2481 text => $token->{tag_name}, token => $token);
2482 ## Ignore the token
2483 !!!next-token;
2484 next B;
2485 } else {
2486 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2487 }
2488 } elsif ($token->{tag_name} eq 'p') {
2489 !!!cp ('t142');
2490 !!!parse-error (type => 'unmatched end tag',
2491 text => $token->{tag_name}, token => $token);
2492 ## Ignore the token
2493 !!!next-token;
2494 next B;
2495 } elsif ($token->{tag_name} eq 'br') {
2496 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2497 !!!cp ('t142.2');
2498 ## (before head) as if <head>, (in head) as if </head>
2499 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2500 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2501 $self->{insertion_mode} = AFTER_HEAD_IM;
2502
2503 ## Reprocess in the "after head" insertion mode...
2504 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2505 !!!cp ('t143.2');
2506 ## As if </head>
2507 pop @{$self->{open_elements}};
2508 $self->{insertion_mode} = AFTER_HEAD_IM;
2509
2510 ## Reprocess in the "after head" insertion mode...
2511 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2512 !!!cp ('t143.3');
2513 ## NOTE: Two parse errors for <head><noscript></br>
2514 !!!parse-error (type => 'unmatched end tag',
2515 text => 'br', token => $token);
2516 ## As if </noscript>
2517 pop @{$self->{open_elements}};
2518 $self->{insertion_mode} = IN_HEAD_IM;
2519
2520 ## Reprocess in the "in head" insertion mode...
2521 ## As if </head>
2522 pop @{$self->{open_elements}};
2523 $self->{insertion_mode} = AFTER_HEAD_IM;
2524
2525 ## Reprocess in the "after head" insertion mode...
2526 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2527 !!!cp ('t143.4');
2528 #
2529 } else {
2530 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2531 }
2532
2533 ## ISSUE: does not agree with IE7 - it doesn't ignore </br>.
2534 !!!parse-error (type => 'unmatched end tag',
2535 text => 'br', token => $token);
2536 ## Ignore the token
2537 !!!next-token;
2538 next B;
2539 } else {
2540 !!!cp ('t145');
2541 !!!parse-error (type => 'unmatched end tag',
2542 text => $token->{tag_name}, token => $token);
2543 ## Ignore the token
2544 !!!next-token;
2545 next B;
2546 }
2547
2548 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2549 !!!cp ('t146');
2550 ## As if </noscript>
2551 pop @{$self->{open_elements}};
2552 !!!parse-error (type => 'in noscript:/',
2553 text => $token->{tag_name}, token => $token);
2554
2555 ## Reprocess in the "in head" insertion mode...
2556 ## As if </head>
2557 pop @{$self->{open_elements}};
2558
2559 ## Reprocess in the "after head" insertion mode...
2560 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2561 !!!cp ('t147');
2562 ## As if </head>
2563 pop @{$self->{open_elements}};
2564
2565 ## Reprocess in the "after head" insertion mode...
2566 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2567 ## ISSUE: This case cannot be reached?
2568 !!!cp ('t148');
2569 !!!parse-error (type => 'unmatched end tag',
2570 text => $token->{tag_name}, token => $token);
2571 ## Ignore the token ## ISSUE: An issue in the spec.
2572 !!!next-token;
2573 next B;
2574 } else {
2575 !!!cp ('t149');
2576 }
2577
2578 ## "after head" insertion mode
2579 ## As if <body>
2580 !!!insert-element ('body',, $token);
2581 $self->{insertion_mode} = IN_BODY_IM;
2582 ## reprocess
2583 next B;
2584 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2585 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2586 !!!cp ('t149.1');
2587
2588 ## NOTE: As if <head>
2589 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2590 $self->{open_elements}->[-1]->[0]->append_child
2591 ($self->{head_element});
2592 #push @{$self->{open_elements}},
2593 # [$self->{head_element}, $el_category->{head}];
2594 #$self->{insertion_mode} = IN_HEAD_IM;
2595 ## NOTE: Reprocess.
2596
2597 ## NOTE: As if </head>
2598 #pop @{$self->{open_elements}};
2599 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2600 ## NOTE: Reprocess.
2601
2602 #
2603 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2604 !!!cp ('t149.2');
2605
2606 ## NOTE: As if </head>
2607 pop @{$self->{open_elements}};
2608 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2609 ## NOTE: Reprocess.
2610
2611 #
2612 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2613 !!!cp ('t149.3');
2614
2615 !!!parse-error (type => 'in noscript:#eof', token => $token);
2616
2617 ## As if </noscript>
2618 pop @{$self->{open_elements}};
2619 #$self->{insertion_mode} = IN_HEAD_IM;
2620 ## NOTE: Reprocess.
2621
2622 ## NOTE: As if </head>
2623 pop @{$self->{open_elements}};
2624 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2625 ## NOTE: Reprocess.
2626
2627 #
2628 } else {
2629 !!!cp ('t149.4');
2630 #
2631 }
2632
2633 ## NOTE: As if <body>
2634 !!!insert-element ('body',, $token);
2635 $self->{insertion_mode} = IN_BODY_IM;
2636 ## NOTE: Reprocess.
2637 next B;
2638 } else {
2639 die "$0: $token->{type}: Unknown token type";
2640 }
2641 } elsif ($self->{insertion_mode} & BODY_IMS) {
2642 if ($token->{type} == CHARACTER_TOKEN) {
2643 !!!cp ('t150');
2644 ## NOTE: There is a code clone of "character in body".
2645 $reconstruct_active_formatting_elements->($insert_to_current);
2646
2647 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
2648
2649 !!!next-token;
2650 next B;
2651 } elsif ($token->{type} == START_TAG_TOKEN) {
2652 if ({
2653 caption => 1, col => 1, colgroup => 1, tbody => 1,
2654 td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2655 }->{$token->{tag_name}}) {
2656 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2657 ## have an element in table scope
2658 for (reverse 0..$#{$self->{open_elements}}) {
2659 my $node = $self->{open_elements}->[$_];
2660 if ($node->[1] == TABLE_CELL_EL) {
2661 !!!cp ('t151');
2662
2663 ## Close the cell
2664 !!!back-token; # <x>
2665 $token = {type => END_TAG_TOKEN,
2666 tag_name => $node->[0]->manakai_local_name,
2667 line => $token->{line},
2668 column => $token->{column}};
2669 next B;
2670 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2671 !!!cp ('t152');
2672 ## ISSUE: This case can never be reached, maybe.
2673 last;
2674 }
2675 }
2676
2677 !!!cp ('t153');
2678 !!!parse-error (type => 'start tag not allowed',
2679 text => $token->{tag_name}, token => $token);
2680 ## Ignore the token
2681 !!!nack ('t153.1');
2682 !!!next-token;
2683 next B;
2684 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2685 !!!parse-error (type => 'not closed', text => 'caption',
2686 token => $token);
2687
2688 ## NOTE: As if </caption>.
2689 ## have a table element in table scope
2690 my $i;
2691 INSCOPE: {
2692 for (reverse 0..$#{$self->{open_elements}}) {
2693 my $node = $self->{open_elements}->[$_];
2694 if ($node->[1] == CAPTION_EL) {
2695 !!!cp ('t155');
2696 $i = $_;
2697 last INSCOPE;
2698 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2699 !!!cp ('t156');
2700 last;
2701 }
2702 }
2703
2704 !!!cp ('t157');
2705 !!!parse-error (type => 'start tag not allowed',
2706 text => $token->{tag_name}, token => $token);
2707 ## Ignore the token
2708 !!!nack ('t157.1');
2709 !!!next-token;
2710 next B;
2711 } # INSCOPE
2712
2713 ## generate implied end tags
2714 while ($self->{open_elements}->[-1]->[1]
2715 & END_TAG_OPTIONAL_EL) {
2716 !!!cp ('t158');
2717 pop @{$self->{open_elements}};
2718 }
2719
2720 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2721 !!!cp ('t159');
2722 !!!parse-error (type => 'not closed',
2723 text => $self->{open_elements}->[-1]->[0]
2724 ->manakai_local_name,
2725 token => $token);
2726 } else {
2727 !!!cp ('t160');
2728 }
2729
2730 splice @{$self->{open_elements}}, $i;
2731
2732 $clear_up_to_marker->();
2733
2734 $self->{insertion_mode} = IN_TABLE_IM;
2735
2736 ## reprocess
2737 !!!ack-later;
2738 next B;
2739 } else {
2740 !!!cp ('t161');
2741 #
2742 }
2743 } else {
2744 !!!cp ('t162');
2745 #
2746 }
2747 } elsif ($token->{type} == END_TAG_TOKEN) {
2748 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
2749 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2750 ## have an element in table scope
2751 my $i;
2752 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2753 my $node = $self->{open_elements}->[$_];
2754 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2755 !!!cp ('t163');
2756 $i = $_;
2757 last INSCOPE;
2758 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2759 !!!cp ('t164');
2760 last INSCOPE;
2761 }
2762 } # INSCOPE
2763 unless (defined $i) {
2764 !!!cp ('t165');
2765 !!!parse-error (type => 'unmatched end tag',
2766 text => $token->{tag_name},
2767 token => $token);
2768 ## Ignore the token
2769 !!!next-token;
2770 next B;
2771 }
2772
2773 ## generate implied end tags
2774 while ($self->{open_elements}->[-1]->[1]
2775 & END_TAG_OPTIONAL_EL) {
2776 !!!cp ('t166');
2777 pop @{$self->{open_elements}};
2778 }
2779
2780 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
2781 ne $token->{tag_name}) {
2782 !!!cp ('t167');
2783 !!!parse-error (type => 'not closed',
2784 text => $self->{open_elements}->[-1]->[0]
2785 ->manakai_local_name,
2786 token => $token);
2787 } else {
2788 !!!cp ('t168');
2789 }
2790
2791 splice @{$self->{open_elements}}, $i;
2792
2793 $clear_up_to_marker->();
2794
2795 $self->{insertion_mode} = IN_ROW_IM;
2796
2797 !!!next-token;
2798 next B;
2799 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2800 !!!cp ('t169');
2801 !!!parse-error (type => 'unmatched end tag',
2802 text => $token->{tag_name}, token => $token);
2803 ## Ignore the token
2804 !!!next-token;
2805 next B;
2806 } else {
2807 !!!cp ('t170');
2808 #
2809 }
2810 } elsif ($token->{tag_name} eq 'caption') {
2811 if (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2812 ## have a table element in table scope
2813 my $i;
2814 INSCOPE: {
2815 for (reverse 0..$#{$self->{open_elements}}) {
2816 my $node = $self->{open_elements}->[$_];
2817 if ($node->[1] == CAPTION_EL) {
2818 !!!cp ('t171');
2819 $i = $_;
2820 last INSCOPE;
2821 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2822 !!!cp ('t172');
2823 last;
2824 }
2825 }
2826
2827 !!!cp ('t173');
2828 !!!parse-error (type => 'unmatched end tag',
2829 text => $token->{tag_name}, token => $token);
2830 ## Ignore the token
2831 !!!next-token;
2832 next B;
2833 } # INSCOPE
2834
2835 ## generate implied end tags
2836 while ($self->{open_elements}->[-1]->[1]
2837 & END_TAG_OPTIONAL_EL) {
2838 !!!cp ('t174');
2839 pop @{$self->{open_elements}};
2840 }
2841
2842 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2843 !!!cp ('t175');
2844 !!!parse-error (type => 'not closed',
2845 text => $self->{open_elements}->[-1]->[0]
2846 ->manakai_local_name,
2847 token => $token);
2848 } else {
2849 !!!cp ('t176');
2850 }
2851
2852 splice @{$self->{open_elements}}, $i;
2853
2854 $clear_up_to_marker->();
2855
2856 $self->{insertion_mode} = IN_TABLE_IM;
2857
2858 !!!next-token;
2859 next B;
2860 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2861 !!!cp ('t177');
2862 !!!parse-error (type => 'unmatched end tag',
2863 text => $token->{tag_name}, token => $token);
2864 ## Ignore the token
2865 !!!next-token;
2866 next B;
2867 } else {
2868 !!!cp ('t178');
2869 #
2870 }
2871 } elsif ({
2872 table => 1, tbody => 1, tfoot => 1,
2873 thead => 1, tr => 1,
2874 }->{$token->{tag_name}} and
2875 ($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2876 ## have an element in table scope
2877 my $i;
2878 my $tn;
2879 INSCOPE: {
2880 for (reverse 0..$#{$self->{open_elements}}) {
2881 my $node = $self->{open_elements}->[$_];
2882 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2883 !!!cp ('t179');
2884 $i = $_;
2885
2886 ## Close the cell
2887 !!!back-token; # </x>
2888 $token = {type => END_TAG_TOKEN, tag_name => $tn,
2889 line => $token->{line},
2890 column => $token->{column}};
2891 next B;
2892 } elsif ($node->[1] == TABLE_CELL_EL) {
2893 !!!cp ('t180');
2894 $tn = $node->[0]->manakai_local_name;
2895 ## NOTE: There is exactly one |td| or |th| element
2896 ## in scope in the stack of open elements by definition.
2897 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2898 ## ISSUE: Can this be reached?
2899 !!!cp ('t181');
2900 last;
2901 }
2902 }
2903
2904 !!!cp ('t182');
2905 !!!parse-error (type => 'unmatched end tag',
2906 text => $token->{tag_name}, token => $token);
2907 ## Ignore the token
2908 !!!next-token;
2909 next B;
2910 } # INSCOPE
2911 } elsif ($token->{tag_name} eq 'table' and
2912 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2913 !!!parse-error (type => 'not closed', text => 'caption',
2914 token => $token);
2915
2916 ## As if </caption>
2917 ## have a table element in table scope
2918 my $i;
2919 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2920 my $node = $self->{open_elements}->[$_];
2921 if ($node->[1] == CAPTION_EL) {
2922 !!!cp ('t184');
2923 $i = $_;
2924 last INSCOPE;
2925 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2926 !!!cp ('t185');
2927 last INSCOPE;
2928 }
2929 } # INSCOPE
2930 unless (defined $i) {
2931 !!!cp ('t186');
2932 ## TODO: Wrong error type?
2933 !!!parse-error (type => 'unmatched end tag',
2934 text => 'caption', token => $token);
2935 ## Ignore the token
2936 !!!next-token;
2937 next B;
2938 }
2939
2940 ## generate implied end tags
2941 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
2942 !!!cp ('t187');
2943 pop @{$self->{open_elements}};
2944 }
2945
2946 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2947 !!!cp ('t188');
2948 !!!parse-error (type => 'not closed',
2949 text => $self->{open_elements}->[-1]->[0]
2950 ->manakai_local_name,
2951 token => $token);
2952 } else {
2953 !!!cp ('t189');
2954 }
2955
2956 splice @{$self->{open_elements}}, $i;
2957
2958 $clear_up_to_marker->();
2959
2960 $self->{insertion_mode} = IN_TABLE_IM;
2961
2962 ## reprocess
2963 next B;
2964 } elsif ({
2965 body => 1, col => 1, colgroup => 1, html => 1,
2966 }->{$token->{tag_name}}) {
2967 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
2968 !!!cp ('t190');
2969 !!!parse-error (type => 'unmatched end tag',
2970 text => $token->{tag_name}, token => $token);
2971 ## Ignore the token
2972 !!!next-token;
2973 next B;
2974 } else {
2975 !!!cp ('t191');
2976 #
2977 }
2978 } elsif ({
2979 tbody => 1, tfoot => 1,
2980 thead => 1, tr => 1,
2981 }->{$token->{tag_name}} and
2982 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2983 !!!cp ('t192');
2984 !!!parse-error (type => 'unmatched end tag',
2985 text => $token->{tag_name}, token => $token);
2986 ## Ignore the token
2987 !!!next-token;
2988 next B;
2989 } else {
2990 !!!cp ('t193');
2991 #
2992 }
2993 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2994 for my $entry (@{$self->{open_elements}}) {
2995 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
2996 !!!cp ('t75');
2997 !!!parse-error (type => 'in body:#eof', token => $token);
2998 last;
2999 }
3000 }
3001
3002 ## Stop parsing.
3003 last B;
3004 } else {
3005 die "$0: $token->{type}: Unknown token type";
3006 }
3007
3008 $insert = $insert_to_current;
3009 #
3010 } elsif ($self->{insertion_mode} & TABLE_IMS) {
3011 if ($token->{type} == CHARACTER_TOKEN) {
3012 if (not $open_tables->[-1]->[1] and # tainted
3013 $token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3014 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3015
3016 unless (length $token->{data}) {
3017 !!!cp ('t194');
3018 !!!next-token;
3019 next B;
3020 } else {
3021 !!!cp ('t195');
3022 }
3023 }
3024
3025 !!!parse-error (type => 'in table:#text', token => $token);
3026
3027 ## NOTE: As if in body, but insert into the foster parent element.
3028 $reconstruct_active_formatting_elements->($insert_to_foster);
3029
3030 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
3031 # MUST
3032 my $foster_parent_element;
3033 my $next_sibling;
3034 my $prev_sibling;
3035 OE: for (reverse 0..$#{$self->{open_elements}}) {
3036 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
3037 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3038 if (defined $parent and $parent->node_type == 1) {
3039 $foster_parent_element = $parent;
3040 !!!cp ('t196');
3041 $next_sibling = $self->{open_elements}->[$_]->[0];
3042 $prev_sibling = $next_sibling->previous_sibling;
3043 #
3044 } else {
3045 !!!cp ('t197');
3046 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3047 $prev_sibling = $foster_parent_element->last_child;
3048 #
3049 }
3050 last OE;
3051 }
3052 } # OE
3053 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3054 $prev_sibling = $foster_parent_element->last_child
3055 unless defined $foster_parent_element;
3056 undef $prev_sibling unless $open_tables->[-1]->[2]; # ~node inserted
3057 if (defined $prev_sibling and
3058 $prev_sibling->node_type == 3) {
3059 !!!cp ('t198');
3060 $prev_sibling->manakai_append_text ($token->{data});
3061 } else {
3062 !!!cp ('t199');
3063 $foster_parent_element->insert_before
3064 ($self->{document}->create_text_node ($token->{data}),
3065 $next_sibling);
3066 }
3067 $open_tables->[-1]->[1] = 1; # tainted
3068 $open_tables->[-1]->[2] = 1; # ~node inserted
3069 } else {
3070 ## NOTE: Fragment case or in a foster parent'ed element
3071 ## (e.g. |<table><span>a|). In fragment case, whether the
3072 ## character is appended to existing node or a new node is
3073 ## created is irrelevant, since the foster parent'ed nodes
3074 ## are discarded and fragment parsing does not invoke any
3075 ## script.
3076 !!!cp ('t200');
3077 $self->{open_elements}->[-1]->[0]->manakai_append_text
3078 ($token->{data});
3079 }
3080
3081 !!!next-token;
3082 next B;
3083 } elsif ($token->{type} == START_TAG_TOKEN) {
3084 if ({
3085 tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM),
3086 th => 1, td => 1,
3087 }->{$token->{tag_name}}) {
3088 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_IM) {
3089 ## Clear back to table context
3090 while (not ($self->{open_elements}->[-1]->[1]
3091 & TABLE_SCOPING_EL)) {
3092 !!!cp ('t201');
3093 pop @{$self->{open_elements}};
3094 }
3095
3096 !!!insert-element ('tbody',, $token);
3097 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3098 ## reprocess in the "in table body" insertion mode...
3099 }
3100
3101 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3102 unless ($token->{tag_name} eq 'tr') {
3103 !!!cp ('t202');
3104 !!!parse-error (type => 'missing start tag:tr', token => $token);
3105 }
3106
3107 ## Clear back to table body context
3108 while (not ($self->{open_elements}->[-1]->[1]
3109 & TABLE_ROWS_SCOPING_EL)) {
3110 !!!cp ('t203');
3111 ## ISSUE: Can this case be reached?
3112 pop @{$self->{open_elements}};
3113 }
3114
3115 $self->{insertion_mode} = IN_ROW_IM;
3116 if ($token->{tag_name} eq 'tr') {
3117 !!!cp ('t204');
3118 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3119 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3120 !!!nack ('t204');
3121 !!!next-token;
3122 next B;
3123 } else {
3124 !!!cp ('t205');
3125 !!!insert-element ('tr',, $token);
3126 ## reprocess in the "in row" insertion mode
3127 }
3128 } else {
3129 !!!cp ('t206');
3130 }
3131
3132 ## Clear back to table row context
3133 while (not ($self->{open_elements}->[-1]->[1]
3134 & TABLE_ROW_SCOPING_EL)) {
3135 !!!cp ('t207');
3136 pop @{$self->{open_elements}};
3137 }
3138
3139 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3140 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3141 $self->{insertion_mode} = IN_CELL_IM;
3142
3143 push @$active_formatting_elements, ['#marker', ''];
3144
3145 !!!nack ('t207.1');
3146 !!!next-token;
3147 next B;
3148 } elsif ({
3149 caption => 1, col => 1, colgroup => 1,
3150 tbody => 1, tfoot => 1, thead => 1,
3151 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3152 }->{$token->{tag_name}}) {
3153 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3154 ## As if </tr>
3155 ## have an element in table scope
3156 my $i;
3157 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3158 my $node = $self->{open_elements}->[$_];
3159 if ($node->[1] == TABLE_ROW_EL) {
3160 !!!cp ('t208');
3161 $i = $_;
3162 last INSCOPE;
3163 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3164 !!!cp ('t209');
3165 last INSCOPE;
3166 }
3167 } # INSCOPE
3168 unless (defined $i) {
3169 !!!cp ('t210');
3170 ## TODO: This type is wrong.
3171 !!!parse-error (type => 'unmacthed end tag',
3172 text => $token->{tag_name}, token => $token);
3173 ## Ignore the token
3174 !!!nack ('t210.1');
3175 !!!next-token;
3176 next B;
3177 }
3178
3179 ## Clear back to table row context
3180 while (not ($self->{open_elements}->[-1]->[1]
3181 & TABLE_ROW_SCOPING_EL)) {
3182 !!!cp ('t211');
3183 ## ISSUE: Can this case be reached?
3184 pop @{$self->{open_elements}};
3185 }
3186
3187 pop @{$self->{open_elements}}; # tr
3188 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3189 if ($token->{tag_name} eq 'tr') {
3190 !!!cp ('t212');
3191 ## reprocess
3192 !!!ack-later;
3193 next B;
3194 } else {
3195 !!!cp ('t213');
3196 ## reprocess in the "in table body" insertion mode...
3197 }
3198 }
3199
3200 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3201 ## have an element in table scope
3202 my $i;
3203 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3204 my $node = $self->{open_elements}->[$_];
3205 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3206 !!!cp ('t214');
3207 $i = $_;
3208 last INSCOPE;
3209 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3210 !!!cp ('t215');
3211 last INSCOPE;
3212 }
3213 } # INSCOPE
3214 unless (defined $i) {
3215 !!!cp ('t216');
3216 ## TODO: This erorr type is wrong.
3217 !!!parse-error (type => 'unmatched end tag',
3218 text => $token->{tag_name}, token => $token);
3219 ## Ignore the token
3220 !!!nack ('t216.1');
3221 !!!next-token;
3222 next B;
3223 }
3224
3225 ## Clear back to table body context
3226 while (not ($self->{open_elements}->[-1]->[1]
3227 & TABLE_ROWS_SCOPING_EL)) {
3228 !!!cp ('t217');
3229 ## ISSUE: Can this state be reached?
3230 pop @{$self->{open_elements}};
3231 }
3232
3233 ## As if <{current node}>
3234 ## have an element in table scope
3235 ## true by definition
3236
3237 ## Clear back to table body context
3238 ## nop by definition
3239
3240 pop @{$self->{open_elements}};
3241 $self->{insertion_mode} = IN_TABLE_IM;
3242 ## reprocess in "in table" insertion mode...
3243 } else {
3244 !!!cp ('t218');
3245 }
3246
3247 if ($token->{tag_name} eq 'col') {
3248 ## Clear back to table context
3249 while (not ($self->{open_elements}->[-1]->[1]
3250 & TABLE_SCOPING_EL)) {
3251 !!!cp ('t219');
3252 ## ISSUE: Can this state be reached?
3253 pop @{$self->{open_elements}};
3254 }
3255
3256 !!!insert-element ('colgroup',, $token);
3257 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
3258 ## reprocess
3259 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3260 !!!ack-later;
3261 next B;
3262 } elsif ({
3263 caption => 1,
3264 colgroup => 1,
3265 tbody => 1, tfoot => 1, thead => 1,
3266 }->{$token->{tag_name}}) {
3267 ## Clear back to table context
3268 while (not ($self->{open_elements}->[-1]->[1]
3269 & TABLE_SCOPING_EL)) {
3270 !!!cp ('t220');
3271 ## ISSUE: Can this state be reached?
3272 pop @{$self->{open_elements}};
3273 }
3274
3275 push @$active_formatting_elements, ['#marker', '']
3276 if $token->{tag_name} eq 'caption';
3277
3278 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3279 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3280 $self->{insertion_mode} = {
3281 caption => IN_CAPTION_IM,
3282 colgroup => IN_COLUMN_GROUP_IM,
3283 tbody => IN_TABLE_BODY_IM,
3284 tfoot => IN_TABLE_BODY_IM,
3285 thead => IN_TABLE_BODY_IM,
3286 }->{$token->{tag_name}};
3287 !!!next-token;
3288 !!!nack ('t220.1');
3289 next B;
3290 } else {
3291 die "$0: in table: <>: $token->{tag_name}";
3292 }
3293 } elsif ($token->{tag_name} eq 'table') {
3294 !!!parse-error (type => 'not closed',
3295 text => $self->{open_elements}->[-1]->[0]
3296 ->manakai_local_name,
3297 token => $token);
3298
3299 ## As if </table>
3300 ## have a table element in table scope
3301 my $i;
3302 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3303 my $node = $self->{open_elements}->[$_];
3304 if ($node->[1] == TABLE_EL) {
3305 !!!cp ('t221');
3306 $i = $_;
3307 last INSCOPE;
3308 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3309 !!!cp ('t222');
3310 last INSCOPE;
3311 }
3312 } # INSCOPE
3313 unless (defined $i) {
3314 !!!cp ('t223');
3315 ## TODO: The following is wrong, maybe.
3316 !!!parse-error (type => 'unmatched end tag', text => 'table',
3317 token => $token);
3318 ## Ignore tokens </table><table>
3319 !!!nack ('t223.1');
3320 !!!next-token;
3321 next B;
3322 }
3323
3324 ## TODO: Followings are removed from the latest spec.
3325 ## generate implied end tags
3326 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3327 !!!cp ('t224');
3328 pop @{$self->{open_elements}};
3329 }
3330
3331 unless ($self->{open_elements}->[-1]->[1] == TABLE_EL) {
3332 !!!cp ('t225');
3333 ## NOTE: |<table><tr><table>|
3334 !!!parse-error (type => 'not closed',
3335 text => $self->{open_elements}->[-1]->[0]
3336 ->manakai_local_name,
3337 token => $token);
3338 } else {
3339 !!!cp ('t226');
3340 }
3341
3342 splice @{$self->{open_elements}}, $i;
3343 pop @{$open_tables};
3344
3345 $self->_reset_insertion_mode;
3346
3347 ## reprocess
3348 !!!ack-later;
3349 next B;
3350 } elsif ($token->{tag_name} eq 'style') {
3351 if (not $open_tables->[-1]->[1]) { # tainted
3352 !!!cp ('t227.8');
3353 ## NOTE: This is a "as if in head" code clone.
3354 $parse_rcdata->(CDATA_CONTENT_MODEL);
3355 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3356 next B;
3357 } else {
3358 !!!cp ('t227.7');
3359 #
3360 }
3361 } elsif ($token->{tag_name} eq 'script') {
3362 if (not $open_tables->[-1]->[1]) { # tainted
3363 !!!cp ('t227.6');
3364 ## NOTE: This is a "as if in head" code clone.
3365 $script_start_tag->();
3366 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3367 next B;
3368 } else {
3369 !!!cp ('t227.5');
3370 #
3371 }
3372 } elsif ($token->{tag_name} eq 'input') {
3373 if (not $open_tables->[-1]->[1]) { # tainted
3374 if ($token->{attributes}->{type}) { ## TODO: case
3375 my $type = lc $token->{attributes}->{type}->{value};
3376 if ($type eq 'hidden') {
3377 !!!cp ('t227.3');
3378 !!!parse-error (type => 'in table',
3379 text => $token->{tag_name}, token => $token);
3380
3381 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3382 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3383
3384 ## TODO: form element pointer
3385
3386 pop @{$self->{open_elements}};
3387
3388 !!!next-token;
3389 !!!ack ('t227.2.1');
3390 next B;
3391 } else {
3392 !!!cp ('t227.2');
3393 #
3394 }
3395 } else {
3396 !!!cp ('t227.1');
3397 #
3398 }
3399 } else {
3400 !!!cp ('t227.4');
3401 #
3402 }
3403 } else {
3404 !!!cp ('t227');
3405 #
3406 }
3407
3408 !!!parse-error (type => 'in table', text => $token->{tag_name},
3409 token => $token);
3410
3411 $insert = $insert_to_foster;
3412 #
3413 } elsif ($token->{type} == END_TAG_TOKEN) {
3414 if ($token->{tag_name} eq 'tr' and
3415 ($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3416 ## have an element in table scope
3417 my $i;
3418 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3419 my $node = $self->{open_elements}->[$_];
3420 if ($node->[1] == TABLE_ROW_EL) {
3421 !!!cp ('t228');
3422 $i = $_;
3423 last INSCOPE;
3424 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3425 !!!cp ('t229');
3426 last INSCOPE;
3427 }
3428 } # INSCOPE
3429 unless (defined $i) {
3430 !!!cp ('t230');
3431 !!!parse-error (type => 'unmatched end tag',
3432 text => $token->{tag_name}, token => $token);
3433 ## Ignore the token
3434 !!!nack ('t230.1');
3435 !!!next-token;
3436 next B;
3437 } else {
3438 !!!cp ('t232');
3439 }
3440
3441 ## Clear back to table row context
3442 while (not ($self->{open_elements}->[-1]->[1]
3443 & TABLE_ROW_SCOPING_EL)) {
3444 !!!cp ('t231');
3445 ## ISSUE: Can this state be reached?
3446 pop @{$self->{open_elements}};
3447 }
3448
3449 pop @{$self->{open_elements}}; # tr
3450 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3451 !!!next-token;
3452 !!!nack ('t231.1');
3453 next B;
3454 } elsif ($token->{tag_name} eq 'table') {
3455 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3456 ## As if </tr>
3457 ## have an element in table scope
3458 my $i;
3459 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3460 my $node = $self->{open_elements}->[$_];
3461 if ($node->[1] == TABLE_ROW_EL) {
3462 !!!cp ('t233');
3463 $i = $_;
3464 last INSCOPE;
3465 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3466 !!!cp ('t234');
3467 last INSCOPE;
3468 }
3469 } # INSCOPE
3470 unless (defined $i) {
3471 !!!cp ('t235');
3472 ## TODO: The following is wrong.
3473 !!!parse-error (type => 'unmatched end tag',
3474 text => $token->{type}, token => $token);
3475 ## Ignore the token
3476 !!!nack ('t236.1');
3477 !!!next-token;
3478 next B;
3479 }
3480
3481 ## Clear back to table row context
3482 while (not ($self->{open_elements}->[-1]->[1]
3483 & TABLE_ROW_SCOPING_EL)) {
3484 !!!cp ('t236');
3485 ## ISSUE: Can this state be reached?
3486 pop @{$self->{open_elements}};
3487 }
3488
3489 pop @{$self->{open_elements}}; # tr
3490 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3491 ## reprocess in the "in table body" insertion mode...
3492 }
3493
3494 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3495 ## have an element in table scope
3496 my $i;
3497 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3498 my $node = $self->{open_elements}->[$_];
3499 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3500 !!!cp ('t237');
3501 $i = $_;
3502 last INSCOPE;
3503 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3504 !!!cp ('t238');
3505 last INSCOPE;
3506 }
3507 } # INSCOPE
3508 unless (defined $i) {
3509 !!!cp ('t239');
3510 !!!parse-error (type => 'unmatched end tag',
3511 text => $token->{tag_name}, token => $token);
3512 ## Ignore the token
3513 !!!nack ('t239.1');
3514 !!!next-token;
3515 next B;
3516 }
3517
3518 ## Clear back to table body context
3519 while (not ($self->{open_elements}->[-1]->[1]
3520 & TABLE_ROWS_SCOPING_EL)) {
3521 !!!cp ('t240');
3522 pop @{$self->{open_elements}};
3523 }
3524
3525 ## As if <{current node}>
3526 ## have an element in table scope
3527 ## true by definition
3528
3529 ## Clear back to table body context
3530 ## nop by definition
3531
3532 pop @{$self->{open_elements}};
3533 $self->{insertion_mode} = IN_TABLE_IM;
3534 ## reprocess in the "in table" insertion mode...
3535 }
3536
3537 ## NOTE: </table> in the "in table" insertion mode.
3538 ## When you edit the code fragment below, please ensure that
3539 ## the code for <table> in the "in table" insertion mode
3540 ## is synced with it.
3541
3542 ## have a table element in table scope
3543 my $i;
3544 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3545 my $node = $self->{open_elements}->[$_];
3546 if ($node->[1] == TABLE_EL) {
3547 !!!cp ('t241');
3548 $i = $_;
3549 last INSCOPE;
3550 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3551 !!!cp ('t242');
3552 last INSCOPE;
3553 }
3554 } # INSCOPE
3555 unless (defined $i) {
3556 !!!cp ('t243');
3557 !!!parse-error (type => 'unmatched end tag',
3558 text => $token->{tag_name}, token => $token);
3559 ## Ignore the token
3560 !!!nack ('t243.1');
3561 !!!next-token;
3562 next B;
3563 }
3564
3565 splice @{$self->{open_elements}}, $i;
3566 pop @{$open_tables};
3567
3568 $self->_reset_insertion_mode;
3569
3570 !!!next-token;
3571 next B;
3572 } elsif ({
3573 tbody => 1, tfoot => 1, thead => 1,
3574 }->{$token->{tag_name}} and
3575 $self->{insertion_mode} & ROW_IMS) {
3576 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3577 ## have an element in table scope
3578 my $i;
3579 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3580 my $node = $self->{open_elements}->[$_];
3581 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3582 !!!cp ('t247');
3583 $i = $_;
3584 last INSCOPE;
3585 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3586 !!!cp ('t248');
3587 last INSCOPE;
3588 }
3589 } # INSCOPE
3590 unless (defined $i) {
3591 !!!cp ('t249');
3592 !!!parse-error (type => 'unmatched end tag',
3593 text => $token->{tag_name}, token => $token);
3594 ## Ignore the token
3595 !!!nack ('t249.1');
3596 !!!next-token;
3597 next B;
3598 }
3599
3600 ## As if </tr>
3601 ## have an element in table scope
3602 my $i;
3603 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3604 my $node = $self->{open_elements}->[$_];
3605 if ($node->[1] == TABLE_ROW_EL) {
3606 !!!cp ('t250');
3607 $i = $_;
3608 last INSCOPE;
3609 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3610 !!!cp ('t251');
3611 last INSCOPE;
3612 }
3613 } # INSCOPE
3614 unless (defined $i) {
3615 !!!cp ('t252');
3616 !!!parse-error (type => 'unmatched end tag',
3617 text => 'tr', token => $token);
3618 ## Ignore the token
3619 !!!nack ('t252.1');
3620 !!!next-token;
3621 next B;
3622 }
3623
3624 ## Clear back to table row context
3625 while (not ($self->{open_elements}->[-1]->[1]
3626 & TABLE_ROW_SCOPING_EL)) {
3627 !!!cp ('t253');
3628 ## ISSUE: Can this case be reached?
3629 pop @{$self->{open_elements}};
3630 }
3631
3632 pop @{$self->{open_elements}}; # tr
3633 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3634 ## reprocess in the "in table body" insertion mode...
3635 }
3636
3637 ## have an element in table scope
3638 my $i;
3639 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3640 my $node = $self->{open_elements}->[$_];
3641 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3642 !!!cp ('t254');
3643 $i = $_;
3644 last INSCOPE;
3645 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3646 !!!cp ('t255');
3647 last INSCOPE;
3648 }
3649 } # INSCOPE
3650 unless (defined $i) {
3651 !!!cp ('t256');
3652 !!!parse-error (type => 'unmatched end tag',
3653 text => $token->{tag_name}, token => $token);
3654 ## Ignore the token
3655 !!!nack ('t256.1');
3656 !!!next-token;
3657 next B;
3658 }
3659
3660 ## Clear back to table body context
3661 while (not ($self->{open_elements}->[-1]->[1]
3662 & TABLE_ROWS_SCOPING_EL)) {
3663 !!!cp ('t257');
3664 ## ISSUE: Can this case be reached?
3665 pop @{$self->{open_elements}};
3666 }
3667
3668 pop @{$self->{open_elements}};
3669 $self->{insertion_mode} = IN_TABLE_IM;
3670 !!!nack ('t257.1');
3671 !!!next-token;
3672 next B;
3673 } elsif ({
3674 body => 1, caption => 1, col => 1, colgroup => 1,
3675 html => 1, td => 1, th => 1,
3676 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3677 tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
3678 }->{$token->{tag_name}}) {
3679 !!!cp ('t258');
3680 !!!parse-error (type => 'unmatched end tag',
3681 text => $token->{tag_name}, token => $token);
3682 ## Ignore the token
3683 !!!nack ('t258.1');
3684 !!!next-token;
3685 next B;
3686 } else {
3687 !!!cp ('t259');
3688 !!!parse-error (type => 'in table:/',
3689 text => $token->{tag_name}, token => $token);
3690
3691 $insert = $insert_to_foster;
3692 #
3693 }
3694 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3695 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
3696 @{$self->{open_elements}} == 1) { # redundant, maybe
3697 !!!parse-error (type => 'in body:#eof', token => $token);
3698 !!!cp ('t259.1');
3699 #
3700 } else {
3701 !!!cp ('t259.2');
3702 #
3703 }
3704
3705 ## Stop parsing
3706 last B;
3707 } else {
3708 die "$0: $token->{type}: Unknown token type";
3709 }
3710 } elsif (($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
3711 if ($token->{type} == CHARACTER_TOKEN) {
3712 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3713 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3714 unless (length $token->{data}) {
3715 !!!cp ('t260');
3716 !!!next-token;
3717 next B;
3718 }
3719 }
3720
3721 !!!cp ('t261');
3722 #
3723 } elsif ($token->{type} == START_TAG_TOKEN) {
3724 if ($token->{tag_name} eq 'col') {
3725 !!!cp ('t262');
3726 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3727 pop @{$self->{open_elements}};
3728 !!!ack ('t262.1');
3729 !!!next-token;
3730 next B;
3731 } else {
3732 !!!cp ('t263');
3733 #
3734 }
3735 } elsif ($token->{type} == END_TAG_TOKEN) {
3736 if ($token->{tag_name} eq 'colgroup') {
3737 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3738 !!!cp ('t264');
3739 !!!parse-error (type => 'unmatched end tag',
3740 text => 'colgroup', token => $token);
3741 ## Ignore the token
3742 !!!next-token;
3743 next B;
3744 } else {
3745 !!!cp ('t265');
3746 pop @{$self->{open_elements}}; # colgroup
3747 $self->{insertion_mode} = IN_TABLE_IM;
3748 !!!next-token;
3749 next B;
3750 }
3751 } elsif ($token->{tag_name} eq 'col') {
3752 !!!cp ('t266');
3753 !!!parse-error (type => 'unmatched end tag',
3754 text => 'col', token => $token);
3755 ## Ignore the token
3756 !!!next-token;
3757 next B;
3758 } else {
3759 !!!cp ('t267');
3760 #
3761 }
3762 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3763 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
3764 @{$self->{open_elements}} == 1) { # redundant, maybe
3765 !!!cp ('t270.2');
3766 ## Stop parsing.
3767 last B;
3768 } else {
3769 ## NOTE: As if </colgroup>.
3770 !!!cp ('t270.1');
3771 pop @{$self->{open_elements}}; # colgroup
3772 $self->{insertion_mode} = IN_TABLE_IM;
3773 ## Reprocess.
3774 next B;
3775 }
3776 } else {
3777 die "$0: $token->{type}: Unknown token type";
3778 }
3779
3780 ## As if </colgroup>
3781 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3782 !!!cp ('t269');
3783 ## TODO: Wrong error type?
3784 !!!parse-error (type => 'unmatched end tag',
3785 text => 'colgroup', token => $token);
3786 ## Ignore the token
3787 !!!nack ('t269.1');
3788 !!!next-token;
3789 next B;
3790 } else {
3791 !!!cp ('t270');
3792 pop @{$self->{open_elements}}; # colgroup
3793 $self->{insertion_mode} = IN_TABLE_IM;
3794 !!!ack-later;
3795 ## reprocess
3796 next B;
3797 }
3798 } elsif ($self->{insertion_mode} & SELECT_IMS) {
3799 if ($token->{type} == CHARACTER_TOKEN) {
3800 !!!cp ('t271');
3801 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3802 !!!next-token;
3803 next B;
3804 } elsif ($token->{type} == START_TAG_TOKEN) {
3805 if ($token->{tag_name} eq 'option') {
3806 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3807 !!!cp ('t272');
3808 ## As if </option>
3809 pop @{$self->{open_elements}};
3810 } else {
3811 !!!cp ('t273');
3812 }
3813
3814 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3815 !!!nack ('t273.1');
3816 !!!next-token;
3817 next B;
3818 } elsif ($token->{tag_name} eq 'optgroup') {
3819 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3820 !!!cp ('t274');
3821 ## As if </option>
3822 pop @{$self->{open_elements}};
3823 } else {
3824 !!!cp ('t275');
3825 }
3826
3827 if ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3828 !!!cp ('t276');
3829 ## As if </optgroup>
3830 pop @{$self->{open_elements}};
3831 } else {
3832 !!!cp ('t277');
3833 }
3834
3835 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3836 !!!nack ('t277.1');
3837 !!!next-token;
3838 next B;
3839 } elsif ({
3840 select => 1, input => 1, textarea => 1, keygen => 1,
3841 }->{$token->{tag_name}} or
3842 (($self->{insertion_mode} & IM_MASK)
3843 == IN_SELECT_IN_TABLE_IM and
3844 {
3845 caption => 1, table => 1,
3846 tbody => 1, tfoot => 1, thead => 1,
3847 tr => 1, td => 1, th => 1,
3848 }->{$token->{tag_name}})) {
3849 ## TODO: The type below is not good - <select> is replaced by </select>
3850 !!!parse-error (type => 'not closed', text => 'select',
3851 token => $token);
3852 ## NOTE: As if the token were </select> (<select> case) or
3853 ## as if there were </select> (otherwise).
3854 ## have an element in table scope
3855 my $i;
3856 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3857 my $node = $self->{open_elements}->[$_];
3858 if ($node->[1] == SELECT_EL) {
3859 !!!cp ('t278');
3860 $i = $_;
3861 last INSCOPE;
3862 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3863 !!!cp ('t279');
3864 last INSCOPE;
3865 }
3866 } # INSCOPE
3867 unless (defined $i) {
3868 !!!cp ('t280');
3869 !!!parse-error (type => 'unmatched end tag',
3870 text => 'select', token => $token);
3871 ## Ignore the token
3872 !!!nack ('t280.1');
3873 !!!next-token;
3874 next B;
3875 }
3876
3877 !!!cp ('t281');
3878 splice @{$self->{open_elements}}, $i;
3879
3880 $self->_reset_insertion_mode;
3881
3882 if ($token->{tag_name} eq 'select') {
3883 !!!nack ('t281.2');
3884 !!!next-token;
3885 next B;
3886 } else {
3887 !!!cp ('t281.1');
3888 !!!ack-later;
3889 ## Reprocess the token.
3890 next B;
3891 }
3892 } else {
3893 !!!cp ('t282');
3894 !!!parse-error (type => 'in select',
3895 text => $token->{tag_name}, token => $token);
3896 ## Ignore the token
3897 !!!nack ('t282.1');
3898 !!!next-token;
3899 next B;
3900 }
3901 } elsif ($token->{type} == END_TAG_TOKEN) {
3902 if ($token->{tag_name} eq 'optgroup') {
3903 if ($self->{open_elements}->[-1]->[1] == OPTION_EL and
3904 $self->{open_elements}->[-2]->[1] == OPTGROUP_EL) {
3905 !!!cp ('t283');
3906 ## As if </option>
3907 splice @{$self->{open_elements}}, -2;
3908 } elsif ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3909 !!!cp ('t284');
3910 pop @{$self->{open_elements}};
3911 } else {
3912 !!!cp ('t285');
3913 !!!parse-error (type => 'unmatched end tag',
3914 text => $token->{tag_name}, token => $token);
3915 ## Ignore the token
3916 }
3917 !!!nack ('t285.1');
3918 !!!next-token;
3919 next B;
3920 } elsif ($token->{tag_name} eq 'option') {
3921 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3922 !!!cp ('t286');
3923 pop @{$self->{open_elements}};
3924 } else {
3925 !!!cp ('t287');
3926 !!!parse-error (type => 'unmatched end tag',
3927 text => $token->{tag_name}, token => $token);
3928 ## Ignore the token
3929 }
3930 !!!nack ('t287.1');
3931 !!!next-token;
3932 next B;
3933 } elsif ($token->{tag_name} eq 'select') {
3934 ## have an element in table scope
3935 my $i;
3936 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3937 my $node = $self->{open_elements}->[$_];
3938 if ($node->[1] == SELECT_EL) {
3939 !!!cp ('t288');
3940 $i = $_;
3941 last INSCOPE;
3942 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3943 !!!cp ('t289');
3944 last INSCOPE;
3945 }
3946 } # INSCOPE
3947 unless (defined $i) {
3948 !!!cp ('t290');
3949 !!!parse-error (type => 'unmatched end tag',
3950 text => $token->{tag_name}, token => $token);
3951 ## Ignore the token
3952 !!!nack ('t290.1');
3953 !!!next-token;
3954 next B;
3955 }
3956
3957 !!!cp ('t291');
3958 splice @{$self->{open_elements}}, $i;
3959
3960 $self->_reset_insertion_mode;
3961
3962 !!!nack ('t291.1');
3963 !!!next-token;
3964 next B;
3965 } elsif (($self->{insertion_mode} & IM_MASK)
3966 == IN_SELECT_IN_TABLE_IM and
3967 {
3968 caption => 1, table => 1, tbody => 1,
3969 tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
3970 }->{$token->{tag_name}}) {
3971 ## TODO: The following is wrong?
3972 !!!parse-error (type => 'unmatched end tag',
3973 text => $token->{tag_name}, token => $token);
3974
3975 ## have an element in table scope
3976 my $i;
3977 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3978 my $node = $self->{open_elements}->[$_];
3979 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3980 !!!cp ('t292');
3981 $i = $_;
3982 last INSCOPE;
3983 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3984 !!!cp ('t293');
3985 last INSCOPE;
3986 }
3987 } # INSCOPE
3988 unless (defined $i) {
3989 !!!cp ('t294');
3990 ## Ignore the token
3991 !!!nack ('t294.1');
3992 !!!next-token;
3993 next B;
3994 }
3995
3996 ## As if </select>
3997 ## have an element in table scope
3998 undef $i;
3999 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4000 my $node = $self->{open_elements}->[$_];
4001 if ($node->[1] == SELECT_EL) {
4002 !!!cp ('t295');
4003 $i = $_;
4004 last INSCOPE;
4005 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4006 ## ISSUE: Can this state be reached?
4007 !!!cp ('t296');
4008 last INSCOPE;
4009 }
4010 } # INSCOPE
4011 unless (defined $i) {
4012 !!!cp ('t297');
4013 ## TODO: The following error type is correct?
4014 !!!parse-error (type => 'unmatched end tag',
4015 text => 'select', token => $token);
4016 ## Ignore the </select> token
4017 !!!nack ('t297.1');
4018 !!!next-token; ## TODO: ok?
4019 next B;
4020 }
4021
4022 !!!cp ('t298');
4023 splice @{$self->{open_elements}}, $i;
4024
4025 $self->_reset_insertion_mode;
4026
4027 !!!ack-later;
4028 ## reprocess
4029 next B;
4030 } else {
4031 !!!cp ('t299');
4032 !!!parse-error (type => 'in select:/',
4033 text => $token->{tag_name}, token => $token);
4034 ## Ignore the token
4035 !!!nack ('t299.3');
4036 !!!next-token;
4037 next B;
4038 }
4039 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4040 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4041 @{$self->{open_elements}} == 1) { # redundant, maybe
4042 !!!cp ('t299.1');
4043 !!!parse-error (type => 'in body:#eof', token => $token);
4044 } else {
4045 !!!cp ('t299.2');
4046 }
4047
4048 ## Stop parsing.
4049 last B;
4050 } else {
4051 die "$0: $token->{type}: Unknown token type";
4052 }
4053 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
4054 if ($token->{type} == CHARACTER_TOKEN) {
4055 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4056 my $data = $1;
4057 ## As if in body
4058 $reconstruct_active_formatting_elements->($insert_to_current);
4059
4060 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4061
4062 unless (length $token->{data}) {
4063 !!!cp ('t300');
4064 !!!next-token;
4065 next B;
4066 }
4067 }
4068
4069 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4070 !!!cp ('t301');
4071 !!!parse-error (type => 'after html:#text', token => $token);
4072 #
4073 } else {
4074 !!!cp ('t302');
4075 ## "after body" insertion mode
4076 !!!parse-error (type => 'after body:#text', token => $token);
4077 #
4078 }
4079
4080 $self->{insertion_mode} = IN_BODY_IM;
4081 ## reprocess
4082 next B;
4083 } elsif ($token->{type} == START_TAG_TOKEN) {
4084 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4085 !!!cp ('t303');
4086 !!!parse-error (type => 'after html',
4087 text => $token->{tag_name}, token => $token);
4088 #
4089 } else {
4090 !!!cp ('t304');
4091 ## "after body" insertion mode
4092 !!!parse-error (type => 'after body',
4093 text => $token->{tag_name}, token => $token);
4094 #
4095 }
4096
4097 $self->{insertion_mode} = IN_BODY_IM;
4098 !!!ack-later;
4099 ## reprocess
4100 next B;
4101 } elsif ($token->{type} == END_TAG_TOKEN) {
4102 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4103 !!!cp ('t305');
4104 !!!parse-error (type => 'after html:/',
4105 text => $token->{tag_name}, token => $token);
4106
4107 $self->{insertion_mode} = IN_BODY_IM;
4108 ## Reprocess.
4109 next B;
4110 } else {
4111 !!!cp ('t306');
4112 }
4113
4114 ## "after body" insertion mode
4115 if ($token->{tag_name} eq 'html') {
4116 if (defined $self->{inner_html_node}) {
4117 !!!cp ('t307');
4118 !!!parse-error (type => 'unmatched end tag',
4119 text => 'html', token => $token);
4120 ## Ignore the token
4121 !!!next-token;
4122 next B;
4123 } else {
4124 !!!cp ('t308');
4125 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
4126 !!!next-token;
4127 next B;
4128 }
4129 } else {
4130 !!!cp ('t309');
4131 !!!parse-error (type => 'after body:/',
4132 text => $token->{tag_name}, token => $token);
4133
4134 $self->{insertion_mode} = IN_BODY_IM;
4135 ## reprocess
4136 next B;
4137 }
4138 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4139 !!!cp ('t309.2');
4140 ## Stop parsing
4141 last B;
4142 } else {
4143 die "$0: $token->{type}: Unknown token type";
4144 }
4145 } elsif ($self->{insertion_mode} & FRAME_IMS) {
4146 if ($token->{type} == CHARACTER_TOKEN) {
4147 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4148 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4149
4150 unless (length $token->{data}) {
4151 !!!cp ('t310');
4152 !!!next-token;
4153 next B;
4154 }
4155 }
4156
4157 if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
4158 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4159 !!!cp ('t311');
4160 !!!parse-error (type => 'in frameset:#text', token => $token);
4161 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4162 !!!cp ('t312');
4163 !!!parse-error (type => 'after frameset:#text', token => $token);
4164 } else { # "after after frameset"
4165 !!!cp ('t313');
4166 !!!parse-error (type => 'after html:#text', token => $token);
4167 }
4168
4169 ## Ignore the token.
4170 if (length $token->{data}) {
4171 !!!cp ('t314');
4172 ## reprocess the rest of characters
4173 } else {
4174 !!!cp ('t315');
4175 !!!next-token;
4176 }
4177 next B;
4178 }
4179
4180 die qq[$0: Character "$token->{data}"];
4181 } elsif ($token->{type} == START_TAG_TOKEN) {
4182 if ($token->{tag_name} eq 'frameset' and
4183 $self->{insertion_mode} == IN_FRAMESET_IM) {
4184 !!!cp ('t318');
4185 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4186 !!!nack ('t318.1');
4187 !!!next-token;
4188 next B;
4189 } elsif ($token->{tag_name} eq 'frame' and
4190 $self->{insertion_mode} == IN_FRAMESET_IM) {
4191 !!!cp ('t319');
4192 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4193 pop @{$self->{open_elements}};
4194 !!!ack ('t319.1');
4195 !!!next-token;
4196 next B;
4197 } elsif ($token->{tag_name} eq 'noframes') {
4198 !!!cp ('t320');
4199 ## NOTE: As if in head.
4200 $parse_rcdata->(CDATA_CONTENT_MODEL);
4201 next B;
4202
4203 ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
4204 ## has no parse error.
4205 } else {
4206 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4207 !!!cp ('t321');
4208 !!!parse-error (type => 'in frameset',
4209 text => $token->{tag_name}, token => $token);
4210 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4211 !!!cp ('t322');
4212 !!!parse-error (type => 'after frameset',
4213 text => $token->{tag_name}, token => $token);
4214 } else { # "after after frameset"
4215 !!!cp ('t322.2');
4216 !!!parse-error (type => 'after after frameset',
4217 text => $token->{tag_name}, token => $token);
4218 }
4219 ## Ignore the token
4220 !!!nack ('t322.1');
4221 !!!next-token;
4222 next B;
4223 }
4224 } elsif ($token->{type} == END_TAG_TOKEN) {
4225 if ($token->{tag_name} eq 'frameset' and
4226 $self->{insertion_mode} == IN_FRAMESET_IM) {
4227 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
4228 @{$self->{open_elements}} == 1) {
4229 !!!cp ('t325');
4230 !!!parse-error (type => 'unmatched end tag',
4231 text => $token->{tag_name}, token => $token);
4232 ## Ignore the token
4233 !!!next-token;
4234 } else {
4235 !!!cp ('t326');
4236 pop @{$self->{open_elements}};
4237 !!!next-token;
4238 }
4239
4240 if (not defined $self->{inner_html_node} and
4241 not ($self->{open_elements}->[-1]->[1] == FRAMESET_EL)) {
4242 !!!cp ('t327');
4243 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4244 } else {
4245 !!!cp ('t328');
4246 }
4247 next B;
4248 } elsif ($token->{tag_name} eq 'html' and
4249 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
4250 !!!cp ('t329');
4251 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
4252 !!!next-token;
4253 next B;
4254 } else {
4255 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4256 !!!cp ('t330');
4257 !!!parse-error (type => 'in frameset:/',
4258 text => $token->{tag_name}, token => $token);
4259 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4260 !!!cp ('t330.1');
4261 !!!parse-error (type => 'after frameset:/',
4262 text => $token->{tag_name}, token => $token);
4263 } else { # "after after html"
4264 !!!cp ('t331');
4265 !!!parse-error (type => 'after after frameset:/',
4266 text => $token->{tag_name}, token => $token);
4267 }
4268 ## Ignore the token
4269 !!!next-token;
4270 next B;
4271 }
4272 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4273 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4274 @{$self->{open_elements}} == 1) { # redundant, maybe
4275 !!!cp ('t331.1');
4276 !!!parse-error (type => 'in body:#eof', token => $token);
4277 } else {
4278 !!!cp ('t331.2');
4279 }
4280
4281 ## Stop parsing
4282 last B;
4283 } else {
4284 die "$0: $token->{type}: Unknown token type";
4285 }
4286 } else {
4287 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4288 }
4289
4290 ## "in body" insertion mode
4291 if ($token->{type} == START_TAG_TOKEN) {
4292 if ($token->{tag_name} eq 'script') {
4293 !!!cp ('t332');
4294 ## NOTE: This is an "as if in head" code clone
4295 $script_start_tag->();
4296 next B;
4297 } elsif ($token->{tag_name} eq 'style') {
4298 !!!cp ('t333');
4299 ## NOTE: This is an "as if in head" code clone
4300 $parse_rcdata->(CDATA_CONTENT_MODEL);
4301 next B;
4302 } elsif ({
4303 base => 1, command => 1, eventsource => 1, link => 1,
4304 }->{$token->{tag_name}}) {
4305 !!!cp ('t334');
4306 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4307 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4308 pop @{$self->{open_elements}};
4309 !!!ack ('t334.1');
4310 !!!next-token;
4311 next B;
4312 } elsif ($token->{tag_name} eq 'meta') {
4313 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4314 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4315 my $meta_el = pop @{$self->{open_elements}};
4316
4317 unless ($self->{confident}) {
4318 if ($token->{attributes}->{charset}) {
4319 !!!cp ('t335');
4320 ## NOTE: Whether the encoding is supported or not is handled
4321 ## in the {change_encoding} callback.
4322 $self->{change_encoding}
4323 ->($self, $token->{attributes}->{charset}->{value}, $token);
4324
4325 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4326 ->set_user_data (manakai_has_reference =>
4327 $token->{attributes}->{charset}
4328 ->{has_reference});
4329 } elsif ($token->{attributes}->{content}) {
4330 if ($token->{attributes}->{content}->{value}
4331 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4332 [\x09\x0A\x0C\x0D\x20]*=
4333 [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4334 ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
4335 /x) {
4336 !!!cp ('t336');
4337 ## NOTE: Whether the encoding is supported or not is handled
4338 ## in the {change_encoding} callback.
4339 $self->{change_encoding}
4340 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
4341 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4342 ->set_user_data (manakai_has_reference =>
4343 $token->{attributes}->{content}
4344 ->{has_reference});
4345 }
4346 }
4347 } else {
4348 if ($token->{attributes}->{charset}) {
4349 !!!cp ('t337');
4350 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4351 ->set_user_data (manakai_has_reference =>
4352 $token->{attributes}->{charset}
4353 ->{has_reference});
4354 }
4355 if ($token->{attributes}->{content}) {
4356 !!!cp ('t338');
4357 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4358 ->set_user_data (manakai_has_reference =>
4359 $token->{attributes}->{content}
4360 ->{has_reference});
4361 }
4362 }
4363
4364 !!!ack ('t338.1');
4365 !!!next-token;
4366 next B;
4367 } elsif ($token->{tag_name} eq 'title') {
4368 !!!cp ('t341');
4369 ## NOTE: This is an "as if in head" code clone
4370 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4371 next B;
4372 } elsif ($token->{tag_name} eq 'body') {
4373 !!!parse-error (type => 'in body', text => 'body', token => $token);
4374
4375 if (@{$self->{open_elements}} == 1 or
4376 not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
4377 !!!cp ('t342');
4378 ## Ignore the token
4379 } else {
4380 my $body_el = $self->{open_elements}->[1]->[0];
4381 for my $attr_name (keys %{$token->{attributes}}) {
4382 unless ($body_el->has_attribute_ns (undef, $attr_name)) {
4383 !!!cp ('t343');
4384 $body_el->set_attribute_ns
4385 (undef, [undef, $attr_name],
4386 $token->{attributes}->{$attr_name}->{value});
4387 }
4388 }
4389 }
4390 !!!nack ('t343.1');
4391 !!!next-token;
4392 next B;
4393 } elsif ({
4394 ## NOTE: Start tags for non-phrasing flow content elements
4395
4396 ## NOTE: The normal one
4397 address => 1, article => 1, aside => 1, blockquote => 1,
4398 center => 1, datagrid => 1, details => 1, dialog => 1,
4399 dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
4400 footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1,
4401 h6 => 1, header => 1, menu => 1, nav => 1, ol => 1, p => 1,
4402 section => 1, ul => 1,
4403 ## NOTE: As normal, but drops leading newline
4404 pre => 1, listing => 1,
4405 ## NOTE: As normal, but interacts with the form element pointer
4406 form => 1,
4407
4408 table => 1,
4409 hr => 1,
4410 }->{$token->{tag_name}}) {
4411 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
4412 !!!cp ('t350');
4413 !!!parse-error (type => 'in form:form', token => $token);
4414 ## Ignore the token
4415 !!!nack ('t350.1');
4416 !!!next-token;
4417 next B;
4418 }
4419
4420 if ($token->{tag_name} ne 'table' or # The Hixie Quirk
4421 $self->{document}->manakai_compat_mode ne 'quirks') {
4422 ## has a p element in scope
4423 INSCOPE: for (reverse @{$self->{open_elements}}) {
4424 if ($_->[1] == P_EL) {
4425 !!!cp ('t344');
4426 !!!back-token; # <form>
4427 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4428 line => $token->{line}, column => $token->{column}};
4429 next B;
4430 } elsif ($_->[1] & SCOPING_EL) {
4431 !!!cp ('t345');
4432 last INSCOPE;
4433 }
4434 } # INSCOPE
4435 }
4436
4437 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4438 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
4439 !!!nack ('t346.1');
4440 !!!next-token;
4441 if ($token->{type} == CHARACTER_TOKEN) {
4442 $token->{data} =~ s/^\x0A//;
4443 unless (length $token->{data}) {
4444 !!!cp ('t346');
4445 !!!next-token;
4446 } else {
4447 !!!cp ('t349');
4448 }
4449 } else {
4450 !!!cp ('t348');
4451 }
4452 } elsif ($token->{tag_name} eq 'form') {
4453 !!!cp ('t347.1');
4454 $self->{form_element} = $self->{open_elements}->[-1]->[0];
4455
4456 !!!nack ('t347.2');
4457 !!!next-token;
4458 } elsif ($token->{tag_name} eq 'table') {
4459 !!!cp ('t382');
4460 push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
4461
4462 $self->{insertion_mode} = IN_TABLE_IM;
4463
4464 !!!nack ('t382.1');
4465 !!!next-token;
4466 } elsif ($token->{tag_name} eq 'hr') {
4467 !!!cp ('t386');
4468 pop @{$self->{open_elements}};
4469
4470 !!!nack ('t386.1');
4471 !!!next-token;
4472 } else {
4473 !!!nack ('t347.1');
4474 !!!next-token;
4475 }
4476 next B;
4477 } elsif ($token->{tag_name} eq 'li') {
4478 ## NOTE: As normal, but imply </li> when there's another <li> ...
4479
4480 ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)
4481 ## Interpreted as <li><foo/></li><li/> (non-conforming)
4482 ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
4483 ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
4484 ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
4485 ## object (Fx)
4486 ## Generate non-tree (non-conforming)
4487 ## basefont (IE7 (where basefont is non-void)), center (IE),
4488 ## form (IE), hn (IE)
4489 ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)
4490 ## Interpreted as <li><foo><li/></foo></li> (non-conforming)
4491 ## div (Fx, S)
4492
4493 my $non_optional;
4494 my $i = -1;
4495
4496 ## 1.
4497 for my $node (reverse @{$self->{open_elements}}) {
4498 if ($node->[1] == LI_EL) {
4499 ## 2. (a) As if </li>
4500 {
4501 ## If no </li> - not applied
4502 #
4503
4504 ## Otherwise
4505
4506 ## 1. generate implied end tags, except for </li>
4507 #
4508
4509 ## 2. If current node != "li", parse error
4510 if ($non_optional) {
4511 !!!parse-error (type => 'not closed',
4512 text => $non_optional->[0]->manakai_local_name,
4513 token => $token);
4514 !!!cp ('t355');
4515 } else {
4516 !!!cp ('t356');
4517 }
4518
4519 ## 3. Pop
4520 splice @{$self->{open_elements}}, $i;
4521 }
4522
4523 last; ## 2. (b) goto 5.
4524 } elsif (
4525 ## NOTE: not "formatting" and not "phrasing"
4526 ($node->[1] & SPECIAL_EL or
4527 $node->[1] & SCOPING_EL) and
4528 ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4529 (not $node->[1] & ADDRESS_DIV_P_EL)
4530 ) {
4531 ## 3.
4532 !!!cp ('t357');
4533 last; ## goto 5.
4534 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4535 !!!cp ('t358');
4536 #
4537 } else {
4538 !!!cp ('t359');
4539 $non_optional ||= $node;
4540 #
4541 }
4542 ## 4.
4543 ## goto 2.
4544 $i--;
4545 }
4546
4547 ## 5. (a) has a |p| element in scope
4548 INSCOPE: for (reverse @{$self->{open_elements}}) {
4549 if ($_->[1] == P_EL) {
4550 !!!cp ('t353');
4551
4552 ## NOTE: |<p><li>|, for example.
4553
4554 !!!back-token; # <x>
4555 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4556 line => $token->{line}, column => $token->{column}};
4557 next B;
4558 } elsif ($_->[1] & SCOPING_EL) {
4559 !!!cp ('t354');
4560 last INSCOPE;
4561 }
4562 } # INSCOPE
4563
4564 ## 5. (b) insert
4565 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4566 !!!nack ('t359.1');
4567 !!!next-token;
4568 next B;
4569 } elsif ($token->{tag_name} eq 'dt' or
4570 $token->{tag_name} eq 'dd') {
4571 ## NOTE: As normal, but imply </dt> or </dd> when ...
4572
4573 my $non_optional;
4574 my $i = -1;
4575
4576 ## 1.
4577 for my $node (reverse @{$self->{open_elements}}) {
4578 if ($node->[1] == DTDD_EL) {
4579 ## 2. (a) As if </li>
4580 {
4581 ## If no </li> - not applied
4582 #
4583
4584 ## Otherwise
4585
4586 ## 1. generate implied end tags, except for </dt> or </dd>
4587 #
4588
4589 ## 2. If current node != "dt"|"dd", parse error
4590 if ($non_optional) {
4591 !!!parse-error (type => 'not closed',
4592 text => $non_optional->[0]->manakai_local_name,
4593 token => $token);
4594 !!!cp ('t355.1');
4595 } else {
4596 !!!cp ('t356.1');
4597 }
4598
4599 ## 3. Pop
4600 splice @{$self->{open_elements}}, $i;
4601 }
4602
4603 last; ## 2. (b) goto 5.
4604 } elsif (
4605 ## NOTE: not "formatting" and not "phrasing"
4606 ($node->[1] & SPECIAL_EL or
4607 $node->[1] & SCOPING_EL) and
4608 ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4609
4610 (not $node->[1] & ADDRESS_DIV_P_EL)
4611 ) {
4612 ## 3.
4613 !!!cp ('t357.1');
4614 last; ## goto 5.
4615 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4616 !!!cp ('t358.1');
4617 #
4618 } else {
4619 !!!cp ('t359.1');
4620 $non_optional ||= $node;
4621 #
4622 }
4623 ## 4.
4624 ## goto 2.
4625 $i--;
4626 }
4627
4628 ## 5. (a) has a |p| element in scope
4629 INSCOPE: for (reverse @{$self->{open_elements}}) {
4630 if ($_->[1] == P_EL) {
4631 !!!cp ('t353.1');
4632 !!!back-token; # <x>
4633 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4634 line => $token->{line}, column => $token->{column}};
4635 next B;
4636 } elsif ($_->[1] & SCOPING_EL) {
4637 !!!cp ('t354.1');
4638 last INSCOPE;
4639 }
4640 } # INSCOPE
4641
4642 ## 5. (b) insert
4643 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4644 !!!nack ('t359.2');
4645 !!!next-token;
4646 next B;
4647 } elsif ($token->{tag_name} eq 'plaintext') {
4648 ## NOTE: As normal, but effectively ends parsing
4649
4650 ## has a p element in scope
4651 INSCOPE: for (reverse @{$self->{open_elements}}) {
4652 if ($_->[1] == P_EL) {
4653 !!!cp ('t367');
4654 !!!back-token; # <plaintext>
4655 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4656 line => $token->{line}, column => $token->{column}};
4657 next B;
4658 } elsif ($_->[1] & SCOPING_EL) {
4659 !!!cp ('t368');
4660 last INSCOPE;
4661 }
4662 } # INSCOPE
4663
4664 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4665
4666 $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
4667
4668 !!!nack ('t368.1');
4669 !!!next-token;
4670 next B;
4671 } elsif ($token->{tag_name} eq 'a') {
4672 AFE: for my $i (reverse 0..$#$active_formatting_elements) {
4673 my $node = $active_formatting_elements->[$i];
4674 if ($node->[1] == A_EL) {
4675 !!!cp ('t371');
4676 !!!parse-error (type => 'in a:a', token => $token);
4677
4678 !!!back-token; # <a>
4679 $token = {type => END_TAG_TOKEN, tag_name => 'a',
4680 line => $token->{line}, column => $token->{column}};
4681 $formatting_end_tag->($token);
4682
4683 AFE2: for (reverse 0..$#$active_formatting_elements) {
4684 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4685 !!!cp ('t372');
4686 splice @$active_formatting_elements, $_, 1;
4687 last AFE2;
4688 }
4689 } # AFE2
4690 OE: for (reverse 0..$#{$self->{open_elements}}) {
4691 if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
4692 !!!cp ('t373');
4693 splice @{$self->{open_elements}}, $_, 1;
4694 last OE;
4695 }
4696 } # OE
4697 last AFE;
4698 } elsif ($node->[0] eq '#marker') {
4699 !!!cp ('t374');
4700 last AFE;
4701 }
4702 } # AFE
4703
4704 $reconstruct_active_formatting_elements->($insert_to_current);
4705
4706 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4707 push @$active_formatting_elements, $self->{open_elements}->[-1];
4708
4709 !!!nack ('t374.1');
4710 !!!next-token;
4711 next B;
4712 } elsif ($token->{tag_name} eq 'nobr') {
4713 $reconstruct_active_formatting_elements->($insert_to_current);
4714
4715 ## has a |nobr| element in scope
4716 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4717 my $node = $self->{open_elements}->[$_];
4718 if ($node->[1] == NOBR_EL) {
4719 !!!cp ('t376');
4720 !!!parse-error (type => 'in nobr:nobr', token => $token);
4721 !!!back-token; # <nobr>
4722 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
4723 line => $token->{line}, column => $token->{column}};
4724 next B;
4725 } elsif ($node->[1] & SCOPING_EL) {
4726 !!!cp ('t377');
4727 last INSCOPE;
4728 }
4729 } # INSCOPE
4730
4731 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4732 push @$active_formatting_elements, $self->{open_elements}->[-1];
4733
4734 !!!nack ('t377.1');
4735 !!!next-token;
4736 next B;
4737 } elsif ($token->{tag_name} eq 'button') {
4738 ## has a button element in scope
4739 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4740 my $node = $self->{open_elements}->[$_];
4741 if ($node->[1] == BUTTON_EL) {
4742 !!!cp ('t378');
4743 !!!parse-error (type => 'in button:button', token => $token);
4744 !!!back-token; # <button>
4745 $token = {type => END_TAG_TOKEN, tag_name => 'button',
4746 line => $token->{line}, column => $token->{column}};
4747 next B;
4748 } elsif ($node->[1] & SCOPING_EL) {
4749 !!!cp ('t379');
4750 last INSCOPE;
4751 }
4752 } # INSCOPE
4753
4754 $reconstruct_active_formatting_elements->($insert_to_current);
4755
4756 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4757
4758 ## TODO: associate with $self->{form_element} if defined
4759
4760 push @$active_formatting_elements, ['#marker', ''];
4761
4762 !!!nack ('t379.1');
4763 !!!next-token;
4764 next B;
4765 } elsif ({
4766 xmp => 1,
4767 iframe => 1,
4768 noembed => 1,
4769 noframes => 1, ## NOTE: This is an "as if in head" code clone.
4770 noscript => 0, ## TODO: 1 if scripting is enabled
4771 }->{$token->{tag_name}}) {
4772 if ($token->{tag_name} eq 'xmp') {
4773 !!!cp ('t381');
4774 $reconstruct_active_formatting_elements->($insert_to_current);
4775 } else {
4776 !!!cp ('t399');
4777 }
4778 ## NOTE: There is an "as if in body" code clone.
4779 $parse_rcdata->(CDATA_CONTENT_MODEL);
4780 next B;
4781 } elsif ($token->{tag_name} eq 'isindex') {
4782 !!!parse-error (type => 'isindex', token => $token);
4783
4784 if (defined $self->{form_element}) {
4785 !!!cp ('t389');
4786 ## Ignore the token
4787 !!!nack ('t389'); ## NOTE: Not acknowledged.
4788 !!!next-token;
4789 next B;
4790 } else {
4791 !!!ack ('t391.1');
4792
4793 my $at = $token->{attributes};
4794 my $form_attrs;
4795 $form_attrs->{action} = $at->{action} if $at->{action};
4796 my $prompt_attr = $at->{prompt};
4797 $at->{name} = {name => 'name', value => 'isindex'};
4798 delete $at->{action};
4799 delete $at->{prompt};
4800 my @tokens = (
4801 {type => START_TAG_TOKEN, tag_name => 'form',
4802 attributes => $form_attrs,
4803 line => $token->{line}, column => $token->{column}},
4804 {type => START_TAG_TOKEN, tag_name => 'hr',
4805 line => $token->{line}, column => $token->{column}},
4806 {type => START_TAG_TOKEN, tag_name => 'label',
4807 line => $token->{line}, column => $token->{column}},
4808 );
4809 if ($prompt_attr) {
4810 !!!cp ('t390');
4811 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
4812 #line => $token->{line}, column => $token->{column},
4813 };
4814 } else {
4815 !!!cp ('t391');
4816 push @tokens, {type => CHARACTER_TOKEN,
4817 data => 'This is a searchable index. Insert your search keywords here: ',
4818 #line => $token->{line}, column => $token->{column},
4819 }; # SHOULD
4820 ## TODO: make this configurable
4821 }
4822 push @tokens,
4823 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
4824 line => $token->{line}, column => $token->{column}},
4825 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
4826 {type => END_TAG_TOKEN, tag_name => 'label',
4827 line => $token->{line}, column => $token->{column}},
4828 {type => START_TAG_TOKEN, tag_name => 'hr',
4829 line => $token->{line}, column => $token->{column}},
4830 {type => END_TAG_TOKEN, tag_name => 'form',
4831 line => $token->{line}, column => $token->{column}};
4832 !!!back-token (@tokens);
4833 !!!next-token;
4834 next B;
4835 }
4836 } elsif ($token->{tag_name} eq 'textarea') {
4837 ## Step 1
4838 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4839
4840 ## Step 2
4841 ## TODO: $self->{form_element} if defined
4842
4843 ## Step 3
4844 $self->{ignore_newline} = 1;
4845
4846 ## Step 4
4847 ## ISSUE: This step is wrong. (r2302 enbugged)
4848
4849 ## Step 5
4850 $self->{content_model} = RCDATA_CONTENT_MODEL;
4851 delete $self->{escape}; # MUST
4852
4853 ## Step 6-7
4854 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
4855
4856 !!!nack ('t392.1');
4857 !!!next-token;
4858 next B;
4859 } elsif ($token->{tag_name} eq 'optgroup' or
4860 $token->{tag_name} eq 'option') {
4861 ## has an |option| element in scope
4862 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4863 my $node = $self->{open_elements}->[$_];
4864 if ($node->[1] == OPTION_EL) {
4865 !!!cp ('t397.1');
4866 ## NOTE: As if </option>
4867 !!!back-token; # <option> or <optgroup>
4868 $token = {type => END_TAG_TOKEN, tag_name => 'option',
4869 line => $token->{line}, column => $token->{column}};
4870 next B;
4871 } elsif ($node->[1] & SCOPING_EL) {
4872 !!!cp ('t397.2');
4873 last INSCOPE;
4874 }
4875 } # INSCOPE
4876
4877 $reconstruct_active_formatting_elements->($insert_to_current);
4878
4879 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4880
4881 !!!nack ('t397.3');
4882 !!!next-token;
4883 redo B;
4884 } elsif ($token->{tag_name} eq 'rt' or
4885 $token->{tag_name} eq 'rp') {
4886 ## has a |ruby| element in scope
4887 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4888 my $node = $self->{open_elements}->[$_];
4889 if ($node->[1] == RUBY_EL) {
4890 !!!cp ('t398.1');
4891 ## generate implied end tags
4892 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4893 !!!cp ('t398.2');
4894 pop @{$self->{open_elements}};
4895 }
4896 unless ($self->{open_elements}->[-1]->[1] == RUBY_EL) {
4897 !!!cp ('t398.3');
4898 !!!parse-error (type => 'not closed',
4899 text => $self->{open_elements}->[-1]->[0]
4900 ->manakai_local_name,
4901 token => $token);
4902 pop @{$self->{open_elements}}
4903 while not $self->{open_elements}->[-1]->[1] == RUBY_EL;
4904 }
4905 last INSCOPE;
4906 } elsif ($node->[1] & SCOPING_EL) {
4907 !!!cp ('t398.4');
4908 last INSCOPE;
4909 }
4910 } # INSCOPE
4911
4912 ## TODO: <non-ruby><rt> is not allowed.
4913
4914 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4915
4916 !!!nack ('t398.5');
4917 !!!next-token;
4918 redo B;
4919 } elsif ($token->{tag_name} eq 'math' or
4920 $token->{tag_name} eq 'svg') {
4921 $reconstruct_active_formatting_elements->($insert_to_current);
4922
4923 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
4924
4925 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
4926
4927 ## "adjust foreign attributes" - done in insert-element-f
4928
4929 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
4930
4931 if ($self->{self_closing}) {
4932 pop @{$self->{open_elements}};
4933 !!!ack ('t398.6');
4934 } else {
4935 !!!cp ('t398.7');
4936 $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
4937 ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
4938 ## mode, "in body" (not "in foreign content") secondary insertion
4939 ## mode, maybe.
4940 }
4941
4942 !!!next-token;
4943 next B;
4944 } elsif ({
4945 caption => 1, col => 1, colgroup => 1, frame => 1,
4946 frameset => 1, head => 1,
4947 tbody => 1, td => 1, tfoot => 1, th => 1,
4948 thead => 1, tr => 1,
4949 }->{$token->{tag_name}}) {
4950 !!!cp ('t401');
4951 !!!parse-error (type => 'in body',
4952 text => $token->{tag_name}, token => $token);
4953 ## Ignore the token
4954 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
4955 !!!next-token;
4956 next B;
4957 } elsif ($token->{tag_name} eq 'param' or
4958 $token->{tag_name} eq 'source') {
4959 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4960 pop @{$self->{open_elements}};
4961
4962 !!!ack ('t398.5');
4963 !!!next-token;
4964 redo B;
4965 } else {
4966 if ($token->{tag_name} eq 'image') {
4967 !!!cp ('t384');
4968 !!!parse-error (type => 'image', token => $token);
4969 $token->{tag_name} = 'img';
4970 } else {
4971 !!!cp ('t385');
4972 }
4973
4974 ## NOTE: There is an "as if <br>" code clone.
4975 $reconstruct_active_formatting_elements->($insert_to_current);
4976
4977 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4978
4979 if ({
4980 applet => 1, marquee => 1, object => 1,
4981 }->{$token->{tag_name}}) {
4982 !!!cp ('t380');
4983 push @$active_formatting_elements, ['#marker', ''];
4984 !!!nack ('t380.1');
4985 } elsif ({
4986 b => 1, big => 1, em => 1, font => 1, i => 1,
4987 s => 1, small => 1, strike => 1,
4988 strong => 1, tt => 1, u => 1,
4989 }->{$token->{tag_name}}) {
4990 !!!cp ('t375');
4991 push @$active_formatting_elements, $self->{open_elements}->[-1];
4992 !!!nack ('t375.1');
4993 } elsif ($token->{tag_name} eq 'input') {
4994 !!!cp ('t388');
4995 ## TODO: associate with $self->{form_element} if defined
4996 pop @{$self->{open_elements}};
4997 !!!ack ('t388.2');
4998 } elsif ({
4999 area => 1, basefont => 1, bgsound => 1, br => 1,
5000 embed => 1, img => 1, spacer => 1, wbr => 1,
5001 }->{$token->{tag_name}}) {
5002 !!!cp ('t388.1');
5003 pop @{$self->{open_elements}};
5004 !!!ack ('t388.3');
5005 } elsif ($token->{tag_name} eq 'select') {
5006 ## TODO: associate with $self->{form_element} if defined
5007
5008 if ($self->{insertion_mode} & TABLE_IMS or
5009 $self->{insertion_mode} & BODY_TABLE_IMS or
5010 ($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
5011 !!!cp ('t400.1');
5012 $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
5013 } else {
5014 !!!cp ('t400.2');
5015 $self->{insertion_mode} = IN_SELECT_IM;
5016 }
5017 !!!nack ('t400.3');
5018 } else {
5019 !!!nack ('t402');
5020 }
5021
5022 !!!next-token;
5023 next B;
5024 }
5025 } elsif ($token->{type} == END_TAG_TOKEN) {
5026 if ($token->{tag_name} eq 'body') {
5027 ## has a |body| element in scope
5028 my $i;
5029 INSCOPE: {
5030 for (reverse @{$self->{open_elements}}) {
5031 if ($_->[1] == BODY_EL) {
5032 !!!cp ('t405');
5033 $i = $_;
5034 last INSCOPE;
5035 } elsif ($_->[1] & SCOPING_EL) {
5036 !!!cp ('t405.1');
5037 last;
5038 }
5039 }
5040
5041 ## NOTE: |<marquee></body>|, |<svg><foreignobject></body>|
5042
5043 !!!parse-error (type => 'unmatched end tag',
5044 text => $token->{tag_name}, token => $token);
5045 ## NOTE: Ignore the token.
5046 !!!next-token;
5047 next B;
5048 } # INSCOPE
5049
5050 for (@{$self->{open_elements}}) {
5051 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
5052 !!!cp ('t403');
5053 !!!parse-error (type => 'not closed',
5054 text => $_->[0]->manakai_local_name,
5055 token => $token);
5056 last;
5057 } else {
5058 !!!cp ('t404');
5059 }
5060 }
5061
5062 $self->{insertion_mode} = AFTER_BODY_IM;
5063 !!!next-token;
5064 next B;
5065 } elsif ($token->{tag_name} eq 'html') {
5066 ## TODO: Update this code. It seems that the code below is not
5067 ## up-to-date, though it has same effect as speced.
5068 if (@{$self->{open_elements}} > 1 and
5069 $self->{open_elements}->[1]->[1] == BODY_EL) {
5070 unless ($self->{open_elements}->[-1]->[1] == BODY_EL) {
5071 !!!cp ('t406');
5072 !!!parse-error (type => 'not closed',
5073 text => $self->{open_elements}->[1]->[0]
5074 ->manakai_local_name,
5075 token => $token);
5076 } else {
5077 !!!cp ('t407');
5078 }
5079 $self->{insertion_mode} = AFTER_BODY_IM;
5080 ## reprocess
5081 next B;
5082 } else {
5083 !!!cp ('t408');
5084 !!!parse-error (type => 'unmatched end tag',
5085 text => $token->{tag_name}, token => $token);
5086 ## Ignore the token
5087 !!!next-token;
5088 next B;
5089 }
5090 } elsif ({
5091 ## NOTE: End tags for non-phrasing flow content elements
5092
5093 ## NOTE: The normal ones
5094 address => 1, article => 1, aside => 1, blockquote => 1,
5095 center => 1, datagrid => 1, details => 1, dialog => 1,
5096 dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
5097 footer => 1, header => 1, listing => 1, menu => 1, nav => 1,
5098 ol => 1, pre => 1, section => 1, ul => 1,
5099
5100 ## NOTE: As normal, but ... optional tags
5101 dd => 1, dt => 1, li => 1,
5102
5103 applet => 1, button => 1, marquee => 1, object => 1,
5104 }->{$token->{tag_name}}) {
5105 ## NOTE: Code for <li> start tags includes "as if </li>" code.
5106 ## Code for <dt> or <dd> start tags includes "as if </dt> or
5107 ## </dd>" code.
5108
5109 ## has an element in scope
5110 my $i;
5111 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5112 my $node = $self->{open_elements}->[$_];
5113 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5114 !!!cp ('t410');
5115 $i = $_;
5116 last INSCOPE;
5117 } elsif ($node->[1] & SCOPING_EL) {
5118 !!!cp ('t411');
5119 last INSCOPE;
5120 }
5121 } # INSCOPE
5122
5123 unless (defined $i) { # has an element in scope
5124 !!!cp ('t413');
5125 !!!parse-error (type => 'unmatched end tag',
5126 text => $token->{tag_name}, token => $token);
5127 ## NOTE: Ignore the token.
5128 } else {
5129 ## Step 1. generate implied end tags
5130 while ({
5131 ## END_TAG_OPTIONAL_EL
5132 dd => ($token->{tag_name} ne 'dd'),
5133 dt => ($token->{tag_name} ne 'dt'),
5134 li => ($token->{tag_name} ne 'li'),
5135 option => 1,
5136 optgroup => 1,
5137 p => 1,
5138 rt => 1,
5139 rp => 1,
5140 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
5141 !!!cp ('t409');
5142 pop @{$self->{open_elements}};
5143 }
5144
5145 ## Step 2.
5146 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5147 ne $token->{tag_name}) {
5148 !!!cp ('t412');
5149 !!!parse-error (type => 'not closed',
5150 text => $self->{open_elements}->[-1]->[0]
5151 ->manakai_local_name,
5152 token => $token);
5153 } else {
5154 !!!cp ('t414');
5155 }
5156
5157 ## Step 3.
5158 splice @{$self->{open_elements}}, $i;
5159
5160 ## Step 4.
5161 $clear_up_to_marker->()
5162 if {
5163 applet => 1, button => 1, marquee => 1, object => 1,
5164 }->{$token->{tag_name}};
5165 }
5166 !!!next-token;
5167 next B;
5168 } elsif ($token->{tag_name} eq 'form') {
5169 ## NOTE: As normal, but interacts with the form element pointer
5170
5171 undef $self->{form_element};
5172
5173 ## has an element in scope
5174 my $i;
5175 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5176 my $node = $self->{open_elements}->[$_];
5177 if ($node->[1] == FORM_EL) {
5178 !!!cp ('t418');
5179 $i = $_;
5180 last INSCOPE;
5181 } elsif ($node->[1] & SCOPING_EL) {
5182 !!!cp ('t419');
5183 last INSCOPE;
5184 }
5185 } # INSCOPE
5186
5187 unless (defined $i) { # has an element in scope
5188 !!!cp ('t421');
5189 !!!parse-error (type => 'unmatched end tag',
5190 text => $token->{tag_name}, token => $token);
5191 ## NOTE: Ignore the token.
5192 } else {
5193 ## Step 1. generate implied end tags
5194 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5195 !!!cp ('t417');
5196 pop @{$self->{open_elements}};
5197 }
5198
5199 ## Step 2.
5200 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5201 ne $token->{tag_name}) {
5202 !!!cp ('t417.1');
5203 !!!parse-error (type => 'not closed',
5204 text => $self->{open_elements}->[-1]->[0]
5205 ->manakai_local_name,
5206 token => $token);
5207 } else {
5208 !!!cp ('t420');
5209 }
5210
5211 ## Step 3.
5212 splice @{$self->{open_elements}}, $i;
5213 }
5214
5215 !!!next-token;
5216 next B;
5217 } elsif ({
5218 ## NOTE: As normal, except acts as a closer for any ...
5219 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5220 }->{$token->{tag_name}}) {
5221 ## has an element in scope
5222 my $i;
5223 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5224 my $node = $self->{open_elements}->[$_];
5225 if ($node->[1] == HEADING_EL) {
5226 !!!cp ('t423');
5227 $i = $_;
5228 last INSCOPE;
5229 } elsif ($node->[1] & SCOPING_EL) {
5230 !!!cp ('t424');
5231 last INSCOPE;
5232 }
5233 } # INSCOPE
5234
5235 unless (defined $i) { # has an element in scope
5236 !!!cp ('t425.1');
5237 !!!parse-error (type => 'unmatched end tag',
5238 text => $token->{tag_name}, token => $token);
5239 ## NOTE: Ignore the token.
5240 } else {
5241 ## Step 1. generate implied end tags
5242 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5243 !!!cp ('t422');
5244 pop @{$self->{open_elements}};
5245 }
5246
5247 ## Step 2.
5248 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5249 ne $token->{tag_name}) {
5250 !!!cp ('t425');
5251 !!!parse-error (type => 'unmatched end tag',
5252 text => $token->{tag_name}, token => $token);
5253 } else {
5254 !!!cp ('t426');
5255 }
5256
5257 ## Step 3.
5258 splice @{$self->{open_elements}}, $i;
5259 }
5260
5261 !!!next-token;
5262 next B;
5263 } elsif ($token->{tag_name} eq 'p') {
5264 ## NOTE: As normal, except </p> implies <p> and ...
5265
5266 ## has an element in scope
5267 my $non_optional;
5268 my $i;
5269 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5270 my $node = $self->{open_elements}->[$_];
5271 if ($node->[1] == P_EL) {
5272 !!!cp ('t410.1');
5273 $i = $_;
5274 last INSCOPE;
5275 } elsif ($node->[1] & SCOPING_EL) {
5276 !!!cp ('t411.1');
5277 last INSCOPE;
5278 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
5279 ## NOTE: |END_TAG_OPTIONAL_EL| includes "p"
5280 !!!cp ('t411.2');
5281 #
5282 } else {
5283 !!!cp ('t411.3');
5284 $non_optional ||= $node;
5285 #
5286 }
5287 } # INSCOPE
5288
5289 if (defined $i) {
5290 ## 1. Generate implied end tags
5291 #
5292
5293 ## 2. If current node != "p", parse error
5294 if ($non_optional) {
5295 !!!cp ('t412.1');
5296 !!!parse-error (type => 'not closed',
5297 text => $non_optional->[0]->manakai_local_name,
5298 token => $token);
5299 } else {
5300 !!!cp ('t414.1');
5301 }
5302
5303 ## 3. Pop
5304 splice @{$self->{open_elements}}, $i;
5305 } else {
5306 !!!cp ('t413.1');
5307 !!!parse-error (type => 'unmatched end tag',
5308 text => $token->{tag_name}, token => $token);
5309
5310 !!!cp ('t415.1');
5311 ## As if <p>, then reprocess the current token
5312 my $el;
5313 !!!create-element ($el, $HTML_NS, 'p',, $token);
5314 $insert->($el);
5315 ## NOTE: Not inserted into |$self->{open_elements}|.
5316 }
5317
5318 !!!next-token;
5319 next B;
5320 } elsif ({
5321 a => 1,
5322 b => 1, big => 1, em => 1, font => 1, i => 1,
5323 nobr => 1, s => 1, small => 1, strike => 1,
5324 strong => 1, tt => 1, u => 1,
5325 }->{$token->{tag_name}}) {
5326 !!!cp ('t427');
5327 $formatting_end_tag->($token);
5328 next B;
5329 } elsif ($token->{tag_name} eq 'br') {
5330 !!!cp ('t428');
5331 !!!parse-error (type => 'unmatched end tag',
5332 text => 'br', token => $token);
5333
5334 ## As if <br>
5335 $reconstruct_active_formatting_elements->($insert_to_current);
5336
5337 my $el;
5338 !!!create-element ($el, $HTML_NS, 'br',, $token);
5339 $insert->($el);
5340
5341 ## Ignore the token.
5342 !!!next-token;
5343 next B;
5344 } else {
5345 if ($token->{tag_name} eq 'sarcasm') {
5346 sleep 0.001; # take a deep breath
5347 }
5348
5349 ## Step 1
5350 my $node_i = -1;
5351 my $node = $self->{open_elements}->[$node_i];
5352
5353 ## Step 2
5354 S2: {
5355 my $node_tag_name = $node->[0]->manakai_local_name;
5356 $node_tag_name =~ tr/A-Z/a-z/; # for SVG camelCase tag names
5357 if ($node_tag_name eq $token->{tag_name}) {
5358 ## Step 1
5359 ## generate implied end tags
5360 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5361 !!!cp ('t430');
5362 ## NOTE: |<ruby><rt></ruby>|.
5363 ## ISSUE: <ruby><rt></rt> will also take this code path,
5364 ## which seems wrong.
5365 pop @{$self->{open_elements}};
5366 $node_i++;
5367 }
5368
5369 ## Step 2
5370 my $current_tag_name
5371 = $self->{open_elements}->[-1]->[0]->manakai_local_name;
5372 $current_tag_name =~ tr/A-Z/a-z/;
5373 if ($current_tag_name ne $token->{tag_name}) {
5374 !!!cp ('t431');
5375 ## NOTE: <x><y></x>
5376 !!!parse-error (type => 'not closed',
5377 text => $self->{open_elements}->[-1]->[0]
5378 ->manakai_local_name,
5379 token => $token);
5380 } else {
5381 !!!cp ('t432');
5382 }
5383
5384 ## Step 3
5385 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
5386
5387 !!!next-token;
5388 last S2;
5389 } else {
5390 ## Step 3
5391 if (not ($node->[1] & FORMATTING_EL) and
5392 #not $phrasing_category->{$node->[1]} and
5393 ($node->[1] & SPECIAL_EL or
5394 $node->[1] & SCOPING_EL)) {
5395 !!!cp ('t433');
5396 !!!parse-error (type => 'unmatched end tag',
5397 text => $token->{tag_name}, token => $token);
5398 ## Ignore the token
5399 !!!next-token;
5400 last S2;
5401
5402 ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
5403 ## 9.27, "a" is a child of <dd> (conforming). In
5404 ## Firefox 3.0.2, "a" is a child of <body>. In WinIE 7,
5405 ## "a" is a child of both <body> and <dd>.
5406 }
5407
5408 !!!cp ('t434');
5409 }
5410
5411 ## Step 4
5412 $node_i--;
5413 $node = $self->{open_elements}->[$node_i];
5414
5415 ## Step 5;
5416 redo S2;
5417 } # S2
5418 next B;
5419 }
5420 }
5421 next B;
5422 } continue { # B
5423 if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
5424 ## NOTE: The code below is executed in cases where it does not have
5425 ## to be, but it it is harmless even in those cases.
5426 ## has an element in scope
5427 INSCOPE: {
5428 for (reverse 0..$#{$self->{open_elements}}) {
5429 my $node = $self->{open_elements}->[$_];
5430 if ($node->[1] & FOREIGN_EL) {
5431 last INSCOPE;
5432 } elsif ($node->[1] & SCOPING_EL) {
5433 last;
5434 }
5435 }
5436
5437 ## NOTE: No foreign element in scope.
5438 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
5439 } # INSCOPE
5440 }
5441 } # B
5442
5443 ## Stop parsing # MUST
5444
5445 ## TODO: script stuffs
5446 } # _tree_construct_main
5447
5448 sub set_inner_html ($$$$;$) {
5449 my $class = shift;
5450 my $node = shift;
5451 #my $s = \$_[0];
5452 my $onerror = $_[1];
5453 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
5454
5455 ## ISSUE: Should {confident} be true?
5456
5457 my $nt = $node->node_type;
5458 if ($nt == 9) {
5459 # MUST
5460
5461 ## Step 1 # MUST
5462 ## TODO: If the document has an active parser, ...
5463 ## ISSUE: There is an issue in the spec.
5464
5465 ## Step 2 # MUST
5466 my @cn = @{$node->child_nodes};
5467 for (@cn) {
5468 $node->remove_child ($_);
5469 }
5470
5471 ## Step 3, 4, 5 # MUST
5472 $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
5473 } elsif ($nt == 1) {
5474 ## TODO: If non-html element
5475
5476 ## NOTE: Most of this code is copied from |parse_string|
5477
5478 ## TODO: Support for $get_wrapper
5479
5480 ## Step 1 # MUST
5481 my $this_doc = $node->owner_document;
5482 my $doc = $this_doc->implementation->create_document;
5483 $doc->manakai_is_html (1);
5484 my $p = $class->new;
5485 $p->{document} = $doc;
5486
5487 ## Step 8 # MUST
5488 my $i = 0;
5489 $p->{line_prev} = $p->{line} = 1;
5490 $p->{column_prev} = $p->{column} = 0;
5491 require Whatpm::Charset::DecodeHandle;
5492 my $input = Whatpm::Charset::DecodeHandle::CharString->new (\($_[0]));
5493 $input = $get_wrapper->($input);
5494 $p->{set_nc} = sub {
5495 my $self = shift;
5496
5497 my $char = '';
5498 if (defined $self->{next_nc}) {
5499 $char = $self->{next_nc};
5500 delete $self->{next_nc};
5501 $self->{nc} = ord $char;
5502 } else {
5503 $self->{char_buffer} = '';
5504 $self->{char_buffer_pos} = 0;
5505
5506 my $count = $input->manakai_read_until
5507 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/,
5508 $self->{char_buffer_pos});
5509 if ($count) {
5510 $self->{line_prev} = $self->{line};
5511 $self->{column_prev} = $self->{column};
5512 $self->{column}++;
5513 $self->{nc}
5514 = ord substr ($self->{char_buffer},
5515 $self->{char_buffer_pos}++, 1);
5516 return;
5517 }
5518
5519 if ($input->read ($char, 1)) {
5520 $self->{nc} = ord $char;
5521 } else {
5522 $self->{nc} = -1;
5523 return;
5524 }
5525 }
5526
5527 ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
5528 $p->{column}++;
5529
5530 if ($self->{nc} == 0x000A) { # LF
5531 $p->{line}++;
5532 $p->{column} = 0;
5533 !!!cp ('i1');
5534 } elsif ($self->{nc} == 0x000D) { # CR
5535 ## TODO: support for abort/streaming
5536 my $next = '';
5537 if ($input->read ($next, 1) and $next ne "\x0A") {
5538 $self->{next_nc} = $next;
5539 }
5540 $self->{nc} = 0x000A; # LF # MUST
5541 $p->{line}++;
5542 $p->{column} = 0;
5543 !!!cp ('i2');
5544 } elsif ($self->{nc} == 0x0000) { # NULL
5545 !!!cp ('i4');
5546 !!!parse-error (type => 'NULL');
5547 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5548 }
5549 };
5550
5551 $p->{read_until} = sub {
5552 #my ($scalar, $specials_range, $offset) = @_;
5553 return 0 if defined $p->{next_nc};
5554
5555 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
5556 my $offset = $_[2] || 0;
5557
5558 if ($p->{char_buffer_pos} < length $p->{char_buffer}) {
5559 pos ($p->{char_buffer}) = $p->{char_buffer_pos};
5560 if ($p->{char_buffer} =~ /\G(?>$pattern)+/) {
5561 substr ($_[0], $offset)
5562 = substr ($p->{char_buffer}, $-[0], $+[0] - $-[0]);
5563 my $count = $+[0] - $-[0];
5564 if ($count) {
5565 $p->{column} += $count;
5566 $p->{char_buffer_pos} += $count;
5567 $p->{line_prev} = $p->{line};
5568 $p->{column_prev} = $p->{column} - 1;
5569 $p->{nc} = -1;
5570 }
5571 return $count;
5572 } else {
5573 return 0;
5574 }
5575 } else {
5576 my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
5577 if ($count) {
5578 $p->{column} += $count;
5579 $p->{column_prev} += $count;
5580 $p->{nc} = -1;
5581 }
5582 return $count;
5583 }
5584 }; # $p->{read_until}
5585
5586 my $ponerror = $onerror || sub {
5587 my (%opt) = @_;
5588 my $line = $opt{line};
5589 my $column = $opt{column};
5590 if (defined $opt{token} and defined $opt{token}->{line}) {
5591 $line = $opt{token}->{line};
5592 $column = $opt{token}->{column};
5593 }
5594 warn "Parse error ($opt{type}) at line $line column $column\n";
5595 };
5596 $p->{parse_error} = sub {
5597 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
5598 };
5599
5600 my $char_onerror = sub {
5601 my (undef, $type, %opt) = @_;
5602 $ponerror->(layer => 'encode',
5603 line => $p->{line}, column => $p->{column} + 1,
5604 %opt, type => $type);
5605 }; # $char_onerror
5606 $input->onerror ($char_onerror);
5607
5608 $p->_initialize_tokenizer;
5609 $p->_initialize_tree_constructor;
5610
5611 ## Step 2
5612 my $node_ln = $node->manakai_local_name;
5613 $p->{content_model} = {
5614 title => RCDATA_CONTENT_MODEL,
5615 textarea => RCDATA_CONTENT_MODEL,
5616 style => CDATA_CONTENT_MODEL,
5617 script => CDATA_CONTENT_MODEL,
5618 xmp => CDATA_CONTENT_MODEL,
5619 iframe => CDATA_CONTENT_MODEL,
5620 noembed => CDATA_CONTENT_MODEL,
5621 noframes => CDATA_CONTENT_MODEL,
5622 noscript => CDATA_CONTENT_MODEL,
5623 plaintext => PLAINTEXT_CONTENT_MODEL,
5624 }->{$node_ln};
5625 $p->{content_model} = PCDATA_CONTENT_MODEL
5626 unless defined $p->{content_model};
5627
5628 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
5629 ## TODO: Foreign element OK?
5630
5631 ## Step 3
5632 my $root = $doc->create_element_ns
5633 ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5634
5635 ## Step 4 # MUST
5636 $doc->append_child ($root);
5637
5638 ## Step 5 # MUST
5639 push @{$p->{open_elements}}, [$root, $el_category->{html}];
5640
5641 undef $p->{head_element};
5642 undef $p->{head_element_inserted};
5643
5644 ## Step 6 # MUST
5645 $p->_reset_insertion_mode;
5646
5647 ## Step 7 # MUST
5648 my $anode = $node;
5649 AN: while (defined $anode) {
5650 if ($anode->node_type == 1) {
5651 my $nsuri = $anode->namespace_uri;
5652 if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5653 if ($anode->manakai_local_name eq 'form') {
5654 !!!cp ('i5');
5655 $p->{form_element} = $anode;
5656 last AN;
5657 }
5658 }
5659 }
5660 $anode = $anode->parent_node;
5661 } # AN
5662
5663 ## Step 9 # MUST
5664 {
5665 my $self = $p;
5666 !!!next-token;
5667 }
5668 $p->_tree_construction_main;
5669
5670 ## Step 10 # MUST
5671 my @cn = @{$node->child_nodes};
5672 for (@cn) {
5673 $node->remove_child ($_);
5674 }
5675 ## ISSUE: mutation events? read-only?
5676
5677 ## Step 11 # MUST
5678 @cn = @{$root->child_nodes};
5679 for (@cn) {
5680 $this_doc->adopt_node ($_);
5681 $node->append_child ($_);
5682 }
5683 ## ISSUE: mutation events?
5684
5685 $p->_terminate_tree_constructor;
5686
5687 delete $p->{parse_error}; # delete loop
5688 } else {
5689 die "$0: |set_inner_html| is not defined for node of type $nt";
5690 }
5691 } # set_inner_html
5692
5693 } # tree construction stage
5694
5695 package Whatpm::HTML::RestartParser;
5696 push our @ISA, 'Error';
5697
5698 1;
5699 # $Date: 2009/07/02 23:15:37 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24