/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.232 - (show annotations) (download) (as text)
Sun Sep 6 01:30:08 2009 UTC (16 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.231: +6 -6 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	6 Sep 2009 01:29:37 -0000
	* tree-test-void.dat: Updated test results related to the
	|eventsource| tag (HTML5 revision 2870).

2009-09-06  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	6 Sep 2009 01:27:07 -0000
	* HTML.pm.src: Dropped the |eventsource| element (HTML5 revision
	2870).

2009-09-06  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::HTML;
2 use strict;
3 our $VERSION=do{my @r=(q$Revision: 1.213 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 use Error qw(:try);
5
6 use Whatpm::HTML::Tokenizer;
7
8 ## NOTE: This module don't check all HTML5 parse errors; character
9 ## encoding related parse errors are expected to be handled by relevant
10 ## modules.
11 ## Parse errors for control characters that are not allowed in HTML5
12 ## documents, for surrogate code points, and for noncharacter code
13 ## points, as well as U+FFFD substitions for characters whose code points
14 ## is higher than U+10FFFF may be detected by combining the parser with
15 ## the checker implemented by Whatpm::Charset::UnicodeChecker (for its
16 ## usage example, see |t/HTML-tree.t| in the Whatpm package or the
17 ## WebHACC::Language::HTML module in the WebHACC package).
18
19 ## ISSUE:
20 ## var doc = implementation.createDocument (null, null, null);
21 ## doc.write ('');
22 ## alert (doc.compatMode);
23
24 require IO::Handle;
25
26 ## Namespace URLs
27
28 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
29 my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
30 my $SVG_NS = q<http://www.w3.org/2000/svg>;
31 my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
32 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
33 my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
34
35 ## Element categories
36
37 ## Bits 12-15
38 sub SPECIAL_EL () { 0b1_000000000000000 }
39 sub SCOPING_EL () { 0b1_00000000000000 }
40 sub FORMATTING_EL () { 0b1_0000000000000 }
41 sub PHRASING_EL () { 0b1_000000000000 }
42
43 ## Bits 10-11
44 #sub FOREIGN_EL () { 0b1_00000000000 } # see Whatpm::HTML::Tokenizer
45 sub FOREIGN_FLOW_CONTENT_EL () { 0b1_0000000000 }
46
47 ## Bits 6-9
48 sub TABLE_SCOPING_EL () { 0b1_000000000 }
49 sub TABLE_ROWS_SCOPING_EL () { 0b1_00000000 }
50 sub TABLE_ROW_SCOPING_EL () { 0b1_0000000 }
51 sub TABLE_ROWS_EL () { 0b1_000000 }
52
53 ## Bit 5
54 sub ADDRESS_DIV_P_EL () { 0b1_00000 }
55
56 ## NOTE: Used in </body> and EOF algorithms.
57 ## Bit 4
58 sub ALL_END_TAG_OPTIONAL_EL () { 0b1_0000 }
59
60 ## NOTE: Used in "generate implied end tags" algorithm.
61 ## NOTE: There is a code where a modified version of
62 ## END_TAG_OPTIONAL_EL is used in "generate implied end tags"
63 ## implementation (search for the algorithm name).
64 ## Bit 3
65 sub END_TAG_OPTIONAL_EL () { 0b1_000 }
66
67 ## Bits 0-2
68
69 sub MISC_SPECIAL_EL () { SPECIAL_EL | 0b000 }
70 sub FORM_EL () { SPECIAL_EL | 0b001 }
71 sub FRAMESET_EL () { SPECIAL_EL | 0b010 }
72 sub HEADING_EL () { SPECIAL_EL | 0b011 }
73 sub SELECT_EL () { SPECIAL_EL | 0b100 }
74 sub SCRIPT_EL () { SPECIAL_EL | 0b101 }
75
76 sub ADDRESS_DIV_EL () { SPECIAL_EL | ADDRESS_DIV_P_EL | 0b001 }
77 sub BODY_EL () { SPECIAL_EL | ALL_END_TAG_OPTIONAL_EL | 0b001 }
78
79 sub DTDD_EL () {
80 SPECIAL_EL |
81 END_TAG_OPTIONAL_EL |
82 ALL_END_TAG_OPTIONAL_EL |
83 0b010
84 }
85 sub LI_EL () {
86 SPECIAL_EL |
87 END_TAG_OPTIONAL_EL |
88 ALL_END_TAG_OPTIONAL_EL |
89 0b100
90 }
91 sub P_EL () {
92 SPECIAL_EL |
93 ADDRESS_DIV_P_EL |
94 END_TAG_OPTIONAL_EL |
95 ALL_END_TAG_OPTIONAL_EL |
96 0b001
97 }
98
99 sub TABLE_ROW_EL () {
100 SPECIAL_EL |
101 TABLE_ROWS_EL |
102 TABLE_ROW_SCOPING_EL |
103 ALL_END_TAG_OPTIONAL_EL |
104 0b001
105 }
106 sub TABLE_ROW_GROUP_EL () {
107 SPECIAL_EL |
108 TABLE_ROWS_EL |
109 TABLE_ROWS_SCOPING_EL |
110 ALL_END_TAG_OPTIONAL_EL |
111 0b001
112 }
113
114 sub MISC_SCOPING_EL () { SCOPING_EL | 0b000 }
115 sub BUTTON_EL () { SCOPING_EL | 0b001 }
116 sub CAPTION_EL () { SCOPING_EL | 0b010 }
117 sub HTML_EL () {
118 SCOPING_EL |
119 TABLE_SCOPING_EL |
120 TABLE_ROWS_SCOPING_EL |
121 TABLE_ROW_SCOPING_EL |
122 ALL_END_TAG_OPTIONAL_EL |
123 0b001
124 }
125 sub TABLE_EL () {
126 SCOPING_EL |
127 TABLE_ROWS_EL |
128 TABLE_SCOPING_EL |
129 0b001
130 }
131 sub TABLE_CELL_EL () {
132 SCOPING_EL |
133 TABLE_ROW_SCOPING_EL |
134 ALL_END_TAG_OPTIONAL_EL |
135 0b001
136 }
137
138 sub MISC_FORMATTING_EL () { FORMATTING_EL | 0b000 }
139 sub A_EL () { FORMATTING_EL | 0b001 }
140 sub NOBR_EL () { FORMATTING_EL | 0b010 }
141
142 sub RUBY_EL () { PHRASING_EL | 0b001 }
143
144 ## ISSUE: ALL_END_TAG_OPTIONAL_EL?
145 sub OPTGROUP_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b001 }
146 sub OPTION_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b010 }
147 sub RUBY_COMPONENT_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b100 }
148
149 sub MML_AXML_EL () { PHRASING_EL | FOREIGN_EL | 0b001 }
150
151 my $el_category = {
152 a => A_EL,
153 address => ADDRESS_DIV_EL,
154 applet => MISC_SCOPING_EL,
155 area => MISC_SPECIAL_EL,
156 article => MISC_SPECIAL_EL,
157 aside => MISC_SPECIAL_EL,
158 b => FORMATTING_EL,
159 base => MISC_SPECIAL_EL,
160 basefont => MISC_SPECIAL_EL,
161 bgsound => MISC_SPECIAL_EL,
162 big => FORMATTING_EL,
163 blockquote => MISC_SPECIAL_EL,
164 body => BODY_EL,
165 br => MISC_SPECIAL_EL,
166 button => BUTTON_EL,
167 caption => CAPTION_EL,
168 center => MISC_SPECIAL_EL,
169 col => MISC_SPECIAL_EL,
170 colgroup => MISC_SPECIAL_EL,
171 command => MISC_SPECIAL_EL,
172 datagrid => MISC_SPECIAL_EL,
173 dd => DTDD_EL,
174 details => MISC_SPECIAL_EL,
175 dialog => MISC_SPECIAL_EL,
176 dir => MISC_SPECIAL_EL,
177 div => ADDRESS_DIV_EL,
178 dl => MISC_SPECIAL_EL,
179 dt => DTDD_EL,
180 em => FORMATTING_EL,
181 embed => MISC_SPECIAL_EL,
182 fieldset => MISC_SPECIAL_EL,
183 figure => MISC_SPECIAL_EL,
184 font => FORMATTING_EL,
185 footer => MISC_SPECIAL_EL,
186 form => FORM_EL,
187 frame => MISC_SPECIAL_EL,
188 frameset => FRAMESET_EL,
189 h1 => HEADING_EL,
190 h2 => HEADING_EL,
191 h3 => HEADING_EL,
192 h4 => HEADING_EL,
193 h5 => HEADING_EL,
194 h6 => HEADING_EL,
195 head => MISC_SPECIAL_EL,
196 header => MISC_SPECIAL_EL,
197 hr => MISC_SPECIAL_EL,
198 html => HTML_EL,
199 i => FORMATTING_EL,
200 iframe => MISC_SPECIAL_EL,
201 img => MISC_SPECIAL_EL,
202 #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
203 input => MISC_SPECIAL_EL,
204 isindex => MISC_SPECIAL_EL,
205 ## XXX keygen? (Whether a void element is in Special or not does not
206 ## affect to the processing, however.)
207 li => LI_EL,
208 link => MISC_SPECIAL_EL,
209 listing => MISC_SPECIAL_EL,
210 marquee => MISC_SCOPING_EL,
211 menu => MISC_SPECIAL_EL,
212 meta => MISC_SPECIAL_EL,
213 nav => MISC_SPECIAL_EL,
214 nobr => NOBR_EL,
215 noembed => MISC_SPECIAL_EL,
216 noframes => MISC_SPECIAL_EL,
217 noscript => MISC_SPECIAL_EL,
218 object => MISC_SCOPING_EL,
219 ol => MISC_SPECIAL_EL,
220 optgroup => OPTGROUP_EL,
221 option => OPTION_EL,
222 p => P_EL,
223 param => MISC_SPECIAL_EL,
224 plaintext => MISC_SPECIAL_EL,
225 pre => MISC_SPECIAL_EL,
226 rp => RUBY_COMPONENT_EL,
227 rt => RUBY_COMPONENT_EL,
228 ruby => RUBY_EL,
229 s => FORMATTING_EL,
230 script => MISC_SPECIAL_EL,
231 select => SELECT_EL,
232 section => MISC_SPECIAL_EL,
233 small => FORMATTING_EL,
234 spacer => MISC_SPECIAL_EL,
235 strike => FORMATTING_EL,
236 strong => FORMATTING_EL,
237 style => MISC_SPECIAL_EL,
238 table => TABLE_EL,
239 tbody => TABLE_ROW_GROUP_EL,
240 td => TABLE_CELL_EL,
241 textarea => MISC_SPECIAL_EL,
242 tfoot => TABLE_ROW_GROUP_EL,
243 th => TABLE_CELL_EL,
244 thead => TABLE_ROW_GROUP_EL,
245 title => MISC_SPECIAL_EL,
246 tr => TABLE_ROW_EL,
247 tt => FORMATTING_EL,
248 u => FORMATTING_EL,
249 ul => MISC_SPECIAL_EL,
250 wbr => MISC_SPECIAL_EL,
251 };
252
253 my $el_category_f = {
254 $MML_NS => {
255 'annotation-xml' => MML_AXML_EL,
256 mi => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
257 mo => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
258 mn => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
259 ms => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
260 mtext => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
261 },
262 $SVG_NS => {
263 foreignObject => SCOPING_EL | FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
264 desc => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
265 title => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
266 },
267 ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
268 };
269
270 my $svg_attr_name = {
271 attributename => 'attributeName',
272 attributetype => 'attributeType',
273 basefrequency => 'baseFrequency',
274 baseprofile => 'baseProfile',
275 calcmode => 'calcMode',
276 clippathunits => 'clipPathUnits',
277 contentscripttype => 'contentScriptType',
278 contentstyletype => 'contentStyleType',
279 diffuseconstant => 'diffuseConstant',
280 edgemode => 'edgeMode',
281 externalresourcesrequired => 'externalResourcesRequired',
282 filterres => 'filterRes',
283 filterunits => 'filterUnits',
284 glyphref => 'glyphRef',
285 gradienttransform => 'gradientTransform',
286 gradientunits => 'gradientUnits',
287 kernelmatrix => 'kernelMatrix',
288 kernelunitlength => 'kernelUnitLength',
289 keypoints => 'keyPoints',
290 keysplines => 'keySplines',
291 keytimes => 'keyTimes',
292 lengthadjust => 'lengthAdjust',
293 limitingconeangle => 'limitingConeAngle',
294 markerheight => 'markerHeight',
295 markerunits => 'markerUnits',
296 markerwidth => 'markerWidth',
297 maskcontentunits => 'maskContentUnits',
298 maskunits => 'maskUnits',
299 numoctaves => 'numOctaves',
300 pathlength => 'pathLength',
301 patterncontentunits => 'patternContentUnits',
302 patterntransform => 'patternTransform',
303 patternunits => 'patternUnits',
304 pointsatx => 'pointsAtX',
305 pointsaty => 'pointsAtY',
306 pointsatz => 'pointsAtZ',
307 preservealpha => 'preserveAlpha',
308 preserveaspectratio => 'preserveAspectRatio',
309 primitiveunits => 'primitiveUnits',
310 refx => 'refX',
311 refy => 'refY',
312 repeatcount => 'repeatCount',
313 repeatdur => 'repeatDur',
314 requiredextensions => 'requiredExtensions',
315 requiredfeatures => 'requiredFeatures',
316 specularconstant => 'specularConstant',
317 specularexponent => 'specularExponent',
318 spreadmethod => 'spreadMethod',
319 startoffset => 'startOffset',
320 stddeviation => 'stdDeviation',
321 stitchtiles => 'stitchTiles',
322 surfacescale => 'surfaceScale',
323 systemlanguage => 'systemLanguage',
324 tablevalues => 'tableValues',
325 targetx => 'targetX',
326 targety => 'targetY',
327 textlength => 'textLength',
328 viewbox => 'viewBox',
329 viewtarget => 'viewTarget',
330 xchannelselector => 'xChannelSelector',
331 ychannelselector => 'yChannelSelector',
332 zoomandpan => 'zoomAndPan',
333 };
334
335 my $foreign_attr_xname = {
336 'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
337 'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
338 'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
339 'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
340 'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
341 'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
342 'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
343 'xml:base' => [$XML_NS, ['xml', 'base']],
344 'xml:lang' => [$XML_NS, ['xml', 'lang']],
345 'xml:space' => [$XML_NS, ['xml', 'space']],
346 'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
347 'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
348 };
349
350 ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
351
352 ## TODO: Invoke the reset algorithm when a resettable element is
353 ## created (cf. HTML5 revision 2259).
354
355 sub parse_byte_string ($$$$;$) {
356 my $self = shift;
357 my $charset_name = shift;
358 open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
359 return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
360 } # parse_byte_string
361
362 sub parse_byte_stream ($$$$;$$) {
363 # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
364 my $self = ref $_[0] ? shift : shift->new;
365 my $charset_name = shift;
366 my $byte_stream = $_[0];
367
368 my $onerror = $_[2] || sub {
369 my (%opt) = @_;
370 warn "Parse error ($opt{type})\n";
371 };
372 $self->{parse_error} = $onerror; # updated later by parse_char_string
373
374 my $get_wrapper = $_[3] || sub ($) {
375 return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
376 };
377
378 ## HTML5 encoding sniffing algorithm
379 require Message::Charset::Info;
380 my $charset;
381 my $buffer;
382 my ($char_stream, $e_status);
383
384 SNIFFING: {
385 ## NOTE: By setting |allow_fallback| option true when the
386 ## |get_decode_handle| method is invoked, we ignore what the HTML5
387 ## spec requires, i.e. unsupported encoding should be ignored.
388 ## TODO: We should not do this unless the parser is invoked
389 ## in the conformance checking mode, in which this behavior
390 ## would be useful.
391
392 ## Step 1
393 if (defined $charset_name) {
394 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
395 ## TODO: Is this ok? Transfer protocol's parameter should be
396 ## interpreted in its semantics?
397
398 ($char_stream, $e_status) = $charset->get_decode_handle
399 ($byte_stream, allow_error_reporting => 1,
400 allow_fallback => 1);
401 if ($char_stream) {
402 $self->{confident} = 1;
403 last SNIFFING;
404 } else {
405 !!!parse-error (type => 'charset:not supported',
406 layer => 'encode',
407 line => 1, column => 1,
408 value => $charset_name,
409 level => $self->{level}->{uncertain});
410 }
411 }
412
413 ## Step 2
414 my $byte_buffer = '';
415 for (1..1024) {
416 my $char = $byte_stream->getc;
417 last unless defined $char;
418 $byte_buffer .= $char;
419 } ## TODO: timeout
420
421 ## Step 3
422 if ($byte_buffer =~ /^\xFE\xFF/) {
423 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
424 ($char_stream, $e_status) = $charset->get_decode_handle
425 ($byte_stream, allow_error_reporting => 1,
426 allow_fallback => 1, byte_buffer => \$byte_buffer);
427 $self->{confident} = 1;
428 last SNIFFING;
429 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
430 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
431 ($char_stream, $e_status) = $charset->get_decode_handle
432 ($byte_stream, allow_error_reporting => 1,
433 allow_fallback => 1, byte_buffer => \$byte_buffer);
434 $self->{confident} = 1;
435 last SNIFFING;
436 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
437 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
438 ($char_stream, $e_status) = $charset->get_decode_handle
439 ($byte_stream, allow_error_reporting => 1,
440 allow_fallback => 1, byte_buffer => \$byte_buffer);
441 $self->{confident} = 1;
442 last SNIFFING;
443 }
444
445 ## Step 4
446 ## TODO: <meta charset>
447
448 ## Step 5
449 ## TODO: from history
450
451 ## Step 6
452 require Whatpm::Charset::UniversalCharDet;
453 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
454 ($byte_buffer);
455 if (defined $charset_name) {
456 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
457
458 require Whatpm::Charset::DecodeHandle;
459 $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
460 ($byte_stream);
461 ($char_stream, $e_status) = $charset->get_decode_handle
462 ($buffer, allow_error_reporting => 1,
463 allow_fallback => 1, byte_buffer => \$byte_buffer);
464 if ($char_stream) {
465 $buffer->{buffer} = $byte_buffer;
466 !!!parse-error (type => 'sniffing:chardet',
467 text => $charset_name,
468 level => $self->{level}->{info},
469 layer => 'encode',
470 line => 1, column => 1);
471 $self->{confident} = 0;
472 last SNIFFING;
473 }
474 }
475
476 ## Step 7: default
477 ## TODO: Make this configurable.
478 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
479 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
480 ## detectable in the step 6.
481 require Whatpm::Charset::DecodeHandle;
482 $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
483 ($byte_stream);
484 ($char_stream, $e_status)
485 = $charset->get_decode_handle ($buffer,
486 allow_error_reporting => 1,
487 allow_fallback => 1,
488 byte_buffer => \$byte_buffer);
489 $buffer->{buffer} = $byte_buffer;
490 !!!parse-error (type => 'sniffing:default',
491 text => 'windows-1252',
492 level => $self->{level}->{info},
493 line => 1, column => 1,
494 layer => 'encode');
495 $self->{confident} = 0;
496 } # SNIFFING
497
498 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
499 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
500 !!!parse-error (type => 'chardecode:fallback',
501 #text => $self->{input_encoding},
502 level => $self->{level}->{uncertain},
503 line => 1, column => 1,
504 layer => 'encode');
505 } elsif (not ($e_status &
506 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
507 $self->{input_encoding} = $charset->get_iana_name;
508 !!!parse-error (type => 'chardecode:no error',
509 text => $self->{input_encoding},
510 level => $self->{level}->{uncertain},
511 line => 1, column => 1,
512 layer => 'encode');
513 } else {
514 $self->{input_encoding} = $charset->get_iana_name;
515 }
516
517 $self->{change_encoding} = sub {
518 my $self = shift;
519 $charset_name = shift;
520 my $token = shift;
521
522 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
523 ($char_stream, $e_status) = $charset->get_decode_handle
524 ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
525 byte_buffer => \ $buffer->{buffer});
526
527 if ($char_stream) { # if supported
528 ## "Change the encoding" algorithm:
529
530 ## Step 1
531 if (defined $self->{input_encoding} and
532 $self->{input_encoding} eq $charset_name) {
533 !!!parse-error (type => 'charset label:matching',
534 text => $charset_name,
535 level => $self->{level}->{info});
536 $self->{confident} = 1;
537 return;
538 }
539
540 ## Step 2 (HTML5 revision 3205)
541 if (defined $self->{input_encoding} and
542 Message::Charset::Info->get_by_html_name ($self->{input_encoding})
543 ->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
544 $self->{confident} = 1;
545 return;
546 }
547
548 ## Step 3
549 if ($charset->{category} &
550 Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
551 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
552 ($char_stream, $e_status) = $charset->get_decode_handle
553 ($byte_stream,
554 byte_buffer => \ $buffer->{buffer});
555 }
556 $charset_name = $charset->get_iana_name;
557
558 !!!parse-error (type => 'charset label detected',
559 text => $self->{input_encoding},
560 value => $charset_name,
561 level => $self->{level}->{warn},
562 token => $token);
563
564 ## Step 4
565 # if (can) {
566 ## change the encoding on the fly.
567 #$self->{confident} = 1;
568 #return;
569 # }
570
571 ## Step 5
572 throw Whatpm::HTML::RestartParser ();
573 }
574 }; # $self->{change_encoding}
575
576 my $char_onerror = sub {
577 my (undef, $type, %opt) = @_;
578 !!!parse-error (layer => 'encode',
579 line => $self->{line}, column => $self->{column} + 1,
580 %opt, type => $type);
581 if ($opt{octets}) {
582 ${$opt{octets}} = "\x{FFFD}"; # relacement character
583 }
584 };
585
586 my $wrapped_char_stream = $get_wrapper->($char_stream);
587 $wrapped_char_stream->onerror ($char_onerror);
588
589 my @args = ($_[1], $_[2]); # $doc, $onerror - $get_wrapper = undef;
590 my $return;
591 try {
592 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
593 } catch Whatpm::HTML::RestartParser with {
594 ## NOTE: Invoked after {change_encoding}.
595
596 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
597 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
598 !!!parse-error (type => 'chardecode:fallback',
599 level => $self->{level}->{uncertain},
600 #text => $self->{input_encoding},
601 line => 1, column => 1,
602 layer => 'encode');
603 } elsif (not ($e_status &
604 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
605 $self->{input_encoding} = $charset->get_iana_name;
606 !!!parse-error (type => 'chardecode:no error',
607 text => $self->{input_encoding},
608 level => $self->{level}->{uncertain},
609 line => 1, column => 1,
610 layer => 'encode');
611 } else {
612 $self->{input_encoding} = $charset->get_iana_name;
613 }
614 $self->{confident} = 1;
615
616 $wrapped_char_stream = $get_wrapper->($char_stream);
617 $wrapped_char_stream->onerror ($char_onerror);
618
619 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
620 };
621 return $return;
622 } # parse_byte_stream
623
624 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
625 ## and the HTML layer MUST ignore it. However, we does strip BOM in
626 ## the encoding layer and the HTML layer does not ignore any U+FEFF,
627 ## because the core part of our HTML parser expects a string of character,
628 ## not a string of bytes or code units or anything which might contain a BOM.
629 ## Therefore, any parser interface that accepts a string of bytes,
630 ## such as |parse_byte_string| in this module, must ensure that it does
631 ## strip the BOM and never strip any ZWNBSP.
632
633 sub parse_char_string ($$$;$$) {
634 #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
635 my $self = shift;
636 my $s = ref $_[0] ? $_[0] : \($_[0]);
637 require Whatpm::Charset::DecodeHandle;
638 my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
639 return $self->parse_char_stream ($input, @_[1..$#_]);
640 } # parse_char_string
641 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
642
643 sub parse_char_stream ($$$;$$) {
644 my $self = ref $_[0] ? shift : shift->new;
645 my $input = $_[0];
646 $self->{document} = $_[1];
647 @{$self->{document}->child_nodes} = ();
648
649 ## NOTE: |set_inner_html| copies most of this method's code
650
651 $self->{confident} = 1 unless exists $self->{confident};
652 $self->{document}->input_encoding ($self->{input_encoding})
653 if defined $self->{input_encoding};
654 ## TODO: |{input_encoding}| is needless?
655
656 $self->{line_prev} = $self->{line} = 1;
657 $self->{column_prev} = -1;
658 $self->{column} = 0;
659 $self->{set_nc} = sub {
660 my $self = shift;
661
662 my $char = '';
663 if (defined $self->{next_nc}) {
664 $char = $self->{next_nc};
665 delete $self->{next_nc};
666 $self->{nc} = ord $char;
667 } else {
668 $self->{char_buffer} = '';
669 $self->{char_buffer_pos} = 0;
670
671 my $count = $input->manakai_read_until
672 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/, $self->{char_buffer_pos});
673 if ($count) {
674 $self->{line_prev} = $self->{line};
675 $self->{column_prev} = $self->{column};
676 $self->{column}++;
677 $self->{nc}
678 = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
679 return;
680 }
681
682 if ($input->read ($char, 1)) {
683 $self->{nc} = ord $char;
684 } else {
685 $self->{nc} = -1;
686 return;
687 }
688 }
689
690 ($self->{line_prev}, $self->{column_prev})
691 = ($self->{line}, $self->{column});
692 $self->{column}++;
693
694 if ($self->{nc} == 0x000A) { # LF
695 !!!cp ('j1');
696 $self->{line}++;
697 $self->{column} = 0;
698 } elsif ($self->{nc} == 0x000D) { # CR
699 !!!cp ('j2');
700 ## TODO: support for abort/streaming
701 my $next = '';
702 if ($input->read ($next, 1) and $next ne "\x0A") {
703 $self->{next_nc} = $next;
704 }
705 $self->{nc} = 0x000A; # LF # MUST
706 $self->{line}++;
707 $self->{column} = 0;
708 } elsif ($self->{nc} == 0x0000) { # NULL
709 !!!cp ('j4');
710 !!!parse-error (type => 'NULL');
711 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
712 }
713 };
714
715 $self->{read_until} = sub {
716 #my ($scalar, $specials_range, $offset) = @_;
717 return 0 if defined $self->{next_nc};
718
719 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
720 my $offset = $_[2] || 0;
721
722 if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
723 pos ($self->{char_buffer}) = $self->{char_buffer_pos};
724 if ($self->{char_buffer} =~ /\G(?>$pattern)+/) {
725 substr ($_[0], $offset)
726 = substr ($self->{char_buffer}, $-[0], $+[0] - $-[0]);
727 my $count = $+[0] - $-[0];
728 if ($count) {
729 $self->{column} += $count;
730 $self->{char_buffer_pos} += $count;
731 $self->{line_prev} = $self->{line};
732 $self->{column_prev} = $self->{column} - 1;
733 $self->{nc} = -1;
734 }
735 return $count;
736 } else {
737 return 0;
738 }
739 } else {
740 my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
741 if ($count) {
742 $self->{column} += $count;
743 $self->{line_prev} = $self->{line};
744 $self->{column_prev} = $self->{column} - 1;
745 $self->{nc} = -1;
746 }
747 return $count;
748 }
749 }; # $self->{read_until}
750
751 my $onerror = $_[2] || sub {
752 my (%opt) = @_;
753 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
754 my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
755 warn "Parse error ($opt{type}) at line $line column $column\n";
756 };
757 $self->{parse_error} = sub {
758 $onerror->(line => $self->{line}, column => $self->{column}, @_);
759 };
760
761 my $char_onerror = sub {
762 my (undef, $type, %opt) = @_;
763 !!!parse-error (layer => 'encode',
764 line => $self->{line}, column => $self->{column} + 1,
765 %opt, type => $type);
766 }; # $char_onerror
767
768 if ($_[3]) {
769 $input = $_[3]->($input);
770 $input->onerror ($char_onerror);
771 } else {
772 $input->onerror ($char_onerror) unless defined $input->onerror;
773 }
774
775 $self->_initialize_tokenizer;
776 $self->_initialize_tree_constructor;
777 $self->_construct_tree;
778 $self->_terminate_tree_constructor;
779
780 delete $self->{parse_error}; # remove loop
781
782 return $self->{document};
783 } # parse_char_stream
784
785 sub new ($) {
786 my $class = shift;
787 my $self = bless {
788 level => {must => 'm',
789 should => 's',
790 warn => 'w',
791 info => 'i',
792 uncertain => 'u'},
793 }, $class;
794 $self->{set_nc} = sub {
795 $self->{nc} = -1;
796 };
797 $self->{parse_error} = sub {
798 #
799 };
800 $self->{change_encoding} = sub {
801 # if ($_[0] is a supported encoding) {
802 # run "change the encoding" algorithm;
803 # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
804 # }
805 };
806 $self->{application_cache_selection} = sub {
807 #
808 };
809 return $self;
810 } # new
811
812 ## Insertion modes
813
814 sub AFTER_HTML_IMS () { 0b100 }
815 sub HEAD_IMS () { 0b1000 }
816 sub BODY_IMS () { 0b10000 }
817 sub BODY_TABLE_IMS () { 0b100000 }
818 sub TABLE_IMS () { 0b1000000 }
819 sub ROW_IMS () { 0b10000000 }
820 sub BODY_AFTER_IMS () { 0b100000000 }
821 sub FRAME_IMS () { 0b1000000000 }
822 sub SELECT_IMS () { 0b10000000000 }
823 #sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 } # see Whatpm::HTML::Tokenizer
824 ## NOTE: "in foreign content" insertion mode is special; it is combined
825 ## with the secondary insertion mode. In this parser, they are stored
826 ## together in the bit-or'ed form.
827 sub IN_CDATA_RCDATA_IM () { 0b1000000000000 }
828 ## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is
829 ## combined with the original insertion mode. In thie parser,
830 ## they are stored together in the bit-or'ed form.
831
832 sub IM_MASK () { 0b11111111111 }
833
834 ## NOTE: "initial" and "before html" insertion modes have no constants.
835
836 ## NOTE: "after after body" insertion mode.
837 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
838
839 ## NOTE: "after after frameset" insertion mode.
840 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
841
842 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
843 sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
844 sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
845 sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
846 sub IN_BODY_IM () { BODY_IMS }
847 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
848 sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
849 sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
850 sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
851 sub IN_TABLE_IM () { TABLE_IMS }
852 sub AFTER_BODY_IM () { BODY_AFTER_IMS }
853 sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
854 sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
855 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
856 sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
857 sub IN_COLUMN_GROUP_IM () { 0b10 }
858
859 sub _initialize_tree_constructor ($) {
860 my $self = shift;
861 ## NOTE: $self->{document} MUST be specified before this method is called
862 $self->{document}->strict_error_checking (0);
863 ## TODO: Turn mutation events off # MUST
864 ## TODO: Turn loose Document option (manakai extension) on
865 $self->{document}->manakai_is_html (1); # MUST
866 $self->{document}->set_user_data (manakai_source_line => 1);
867 $self->{document}->set_user_data (manakai_source_column => 1);
868 } # _initialize_tree_constructor
869
870 sub _terminate_tree_constructor ($) {
871 my $self = shift;
872 $self->{document}->strict_error_checking (1);
873 ## TODO: Turn mutation events on
874 } # _terminate_tree_constructor
875
876 ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
877
878 { # tree construction stage
879 my $token;
880
881 sub _construct_tree ($) {
882 my ($self) = @_;
883
884 ## When an interactive UA render the $self->{document} available
885 ## to the user, or when it begin accepting user input, are
886 ## not defined.
887
888 !!!next-token;
889
890 undef $self->{form_element};
891 undef $self->{head_element};
892 undef $self->{head_element_inserted};
893 $self->{open_elements} = [];
894 undef $self->{inner_html_node};
895 undef $self->{ignore_newline};
896
897 ## NOTE: The "initial" insertion mode.
898 $self->_tree_construction_initial; # MUST
899
900 ## NOTE: The "before html" insertion mode.
901 $self->_tree_construction_root_element;
902 $self->{insertion_mode} = BEFORE_HEAD_IM;
903
904 ## NOTE: The "before head" insertion mode and so on.
905 $self->_tree_construction_main;
906 } # _construct_tree
907
908 sub _tree_construction_initial ($) {
909 my $self = shift;
910
911 ## NOTE: "initial" insertion mode
912
913 INITIAL: {
914 if ($token->{type} == DOCTYPE_TOKEN) {
915 ## NOTE: Conformance checkers MAY, instead of reporting "not
916 ## HTML5" error, switch to a conformance checking mode for
917 ## another language. (We don't support such mode switchings; it
918 ## is nonsense to do anything different from what browsers do.)
919 my $doctype_name = $token->{name};
920 $doctype_name = '' unless defined $doctype_name;
921 my $doctype = $self->{document}->create_document_type_definition
922 ($doctype_name);
923
924 $doctype_name =~ tr/A-Z/a-z/; # ASCII case-insensitive
925 if ($doctype_name ne 'html') {
926 !!!cp ('t1');
927 !!!parse-error (type => 'not HTML5', token => $token);
928 } elsif (defined $token->{pubid}) {
929 !!!cp ('t2');
930 ## XXX Obsolete permitted DOCTYPEs
931 !!!parse-error (type => 'not HTML5', token => $token);
932 } elsif (defined $token->{sysid}) {
933 if ($token->{sysid} eq 'about:legacy-compat') {
934 !!!cp ('t1.2'); ## <!DOCTYPE HTML SYSTEM "about:legacy-compat">
935 !!!parse-error (type => 'XSLT-compat', token => $token,
936 level => $self->{level}->{should});
937 } else {
938 !!!parse-error (type => 'not HTML5', token => $token);
939 }
940 } else { ## <!DOCTYPE HTML>
941 !!!cp ('t3');
942 #
943 }
944
945 ## NOTE: Default value for both |public_id| and |system_id| attributes
946 ## are empty strings, so that we don't set any value in missing cases.
947 $doctype->public_id ($token->{pubid}) if defined $token->{pubid};
948 $doctype->system_id ($token->{sysid}) if defined $token->{sysid};
949
950 ## NOTE: Other DocumentType attributes are null or empty lists.
951 ## In Firefox3, |internalSubset| attribute is set to the empty
952 ## string, while |null| is an allowed value for the attribute
953 ## according to DOM3 Core.
954 $self->{document}->append_child ($doctype);
955
956 if ($token->{quirks} or $doctype_name ne 'html') {
957 !!!cp ('t4');
958 $self->{document}->manakai_compat_mode ('quirks');
959 } elsif (defined $token->{pubid}) {
960 my $pubid = $token->{pubid};
961 $pubid =~ tr/a-z/A-z/;
962 my $prefix = [
963 "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
964 "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
965 "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
966 "-//IETF//DTD HTML 2.0 LEVEL 1//",
967 "-//IETF//DTD HTML 2.0 LEVEL 2//",
968 "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
969 "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
970 "-//IETF//DTD HTML 2.0 STRICT//",
971 "-//IETF//DTD HTML 2.0//",
972 "-//IETF//DTD HTML 2.1E//",
973 "-//IETF//DTD HTML 3.0//",
974 "-//IETF//DTD HTML 3.2 FINAL//",
975 "-//IETF//DTD HTML 3.2//",
976 "-//IETF//DTD HTML 3//",
977 "-//IETF//DTD HTML LEVEL 0//",
978 "-//IETF//DTD HTML LEVEL 1//",
979 "-//IETF//DTD HTML LEVEL 2//",
980 "-//IETF//DTD HTML LEVEL 3//",
981 "-//IETF//DTD HTML STRICT LEVEL 0//",
982 "-//IETF//DTD HTML STRICT LEVEL 1//",
983 "-//IETF//DTD HTML STRICT LEVEL 2//",
984 "-//IETF//DTD HTML STRICT LEVEL 3//",
985 "-//IETF//DTD HTML STRICT//",
986 "-//IETF//DTD HTML//",
987 "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
988 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
989 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
990 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
991 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
992 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
993 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
994 "-//NETSCAPE COMM. CORP.//DTD HTML//",
995 "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
996 "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
997 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
998 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
999 "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
1000 "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
1001 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
1002 "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
1003 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
1004 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
1005 "-//W3C//DTD HTML 3 1995-03-24//",
1006 "-//W3C//DTD HTML 3.2 DRAFT//",
1007 "-//W3C//DTD HTML 3.2 FINAL//",
1008 "-//W3C//DTD HTML 3.2//",
1009 "-//W3C//DTD HTML 3.2S DRAFT//",
1010 "-//W3C//DTD HTML 4.0 FRAMESET//",
1011 "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
1012 "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
1013 "-//W3C//DTD HTML EXPERIMENTAL 970421//",
1014 "-//W3C//DTD W3 HTML//",
1015 "-//W3O//DTD W3 HTML 3.0//",
1016 "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
1017 "-//WEBTECHS//DTD MOZILLA HTML//",
1018 ]; # $prefix
1019 my $match;
1020 for (@$prefix) {
1021 if (substr ($prefix, 0, length $_) eq $_) {
1022 $match = 1;
1023 last;
1024 }
1025 }
1026 if ($match or
1027 $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
1028 $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
1029 $pubid eq "HTML") {
1030 !!!cp ('t5');
1031 $self->{document}->manakai_compat_mode ('quirks');
1032 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
1033 $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
1034 if (defined $token->{sysid}) {
1035 !!!cp ('t6');
1036 $self->{document}->manakai_compat_mode ('quirks');
1037 } else {
1038 !!!cp ('t7');
1039 $self->{document}->manakai_compat_mode ('limited quirks');
1040 }
1041 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
1042 $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
1043 !!!cp ('t8');
1044 $self->{document}->manakai_compat_mode ('limited quirks');
1045 } else {
1046 !!!cp ('t9');
1047 }
1048 } else {
1049 !!!cp ('t10');
1050 }
1051 if (defined $token->{sysid}) {
1052 my $sysid = $token->{sysid};
1053 $sysid =~ tr/A-Z/a-z/;
1054 if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1055 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
1056 ## marked as quirks.
1057 $self->{document}->manakai_compat_mode ('quirks');
1058 !!!cp ('t11');
1059 } else {
1060 !!!cp ('t12');
1061 }
1062 } else {
1063 !!!cp ('t13');
1064 }
1065
1066 ## Go to the "before html" insertion mode.
1067 !!!next-token;
1068 return;
1069 } elsif ({
1070 START_TAG_TOKEN, 1,
1071 END_TAG_TOKEN, 1,
1072 END_OF_FILE_TOKEN, 1,
1073 }->{$token->{type}}) {
1074 !!!cp ('t14');
1075 !!!parse-error (type => 'no DOCTYPE', token => $token);
1076 $self->{document}->manakai_compat_mode ('quirks');
1077 ## Go to the "before html" insertion mode.
1078 ## reprocess
1079 !!!ack-later;
1080 return;
1081 } elsif ($token->{type} == CHARACTER_TOKEN) {
1082 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1083 ## Ignore the token
1084
1085 unless (length $token->{data}) {
1086 !!!cp ('t15');
1087 ## Stay in the insertion mode.
1088 !!!next-token;
1089 redo INITIAL;
1090 } else {
1091 !!!cp ('t16');
1092 }
1093 } else {
1094 !!!cp ('t17');
1095 }
1096
1097 !!!parse-error (type => 'no DOCTYPE', token => $token);
1098 $self->{document}->manakai_compat_mode ('quirks');
1099 ## Go to the "before html" insertion mode.
1100 ## reprocess
1101 return;
1102 } elsif ($token->{type} == COMMENT_TOKEN) {
1103 !!!cp ('t18');
1104 my $comment = $self->{document}->create_comment ($token->{data});
1105 $self->{document}->append_child ($comment);
1106
1107 ## Stay in the insertion mode.
1108 !!!next-token;
1109 redo INITIAL;
1110 } else {
1111 die "$0: $token->{type}: Unknown token type";
1112 }
1113 } # INITIAL
1114
1115 die "$0: _tree_construction_initial: This should be never reached";
1116 } # _tree_construction_initial
1117
1118 sub _tree_construction_root_element ($) {
1119 my $self = shift;
1120
1121 ## NOTE: "before html" insertion mode.
1122
1123 B: {
1124 if ($token->{type} == DOCTYPE_TOKEN) {
1125 !!!cp ('t19');
1126 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1127 ## Ignore the token
1128 ## Stay in the insertion mode.
1129 !!!next-token;
1130 redo B;
1131 } elsif ($token->{type} == COMMENT_TOKEN) {
1132 !!!cp ('t20');
1133 my $comment = $self->{document}->create_comment ($token->{data});
1134 $self->{document}->append_child ($comment);
1135 ## Stay in the insertion mode.
1136 !!!next-token;
1137 redo B;
1138 } elsif ($token->{type} == CHARACTER_TOKEN) {
1139 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1140 ## Ignore the token.
1141
1142 unless (length $token->{data}) {
1143 !!!cp ('t21');
1144 ## Stay in the insertion mode.
1145 !!!next-token;
1146 redo B;
1147 } else {
1148 !!!cp ('t22');
1149 }
1150 } else {
1151 !!!cp ('t23');
1152 }
1153
1154 $self->{application_cache_selection}->(undef);
1155
1156 #
1157 } elsif ($token->{type} == START_TAG_TOKEN) {
1158 if ($token->{tag_name} eq 'html') {
1159 my $root_element;
1160 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
1161 $self->{document}->append_child ($root_element);
1162 push @{$self->{open_elements}},
1163 [$root_element, $el_category->{html}];
1164
1165 if ($token->{attributes}->{manifest}) {
1166 !!!cp ('t24');
1167 $self->{application_cache_selection}
1168 ->($token->{attributes}->{manifest}->{value});
1169 ## ISSUE: Spec is unclear on relative references.
1170 ## According to Hixie (#whatwg 2008-03-19), it should be
1171 ## resolved against the base URI of the document in HTML
1172 ## or xml:base of the element in XHTML.
1173 } else {
1174 !!!cp ('t25');
1175 $self->{application_cache_selection}->(undef);
1176 }
1177
1178 !!!nack ('t25c');
1179
1180 !!!next-token;
1181 return; ## Go to the "before head" insertion mode.
1182 } else {
1183 !!!cp ('t25.1');
1184 #
1185 }
1186 } elsif ({
1187 END_TAG_TOKEN, 1,
1188 END_OF_FILE_TOKEN, 1,
1189 }->{$token->{type}}) {
1190 !!!cp ('t26');
1191 #
1192 } else {
1193 die "$0: $token->{type}: Unknown token type";
1194 }
1195
1196 my $root_element;
1197 !!!create-element ($root_element, $HTML_NS, 'html',, $token);
1198 $self->{document}->append_child ($root_element);
1199 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
1200
1201 $self->{application_cache_selection}->(undef);
1202
1203 ## NOTE: Reprocess the token.
1204 !!!ack-later;
1205 return; ## Go to the "before head" insertion mode.
1206 } # B
1207
1208 die "$0: _tree_construction_root_element: This should never be reached";
1209 } # _tree_construction_root_element
1210
1211 sub _reset_insertion_mode ($) {
1212 my $self = shift;
1213
1214 ## Step 1
1215 my $last;
1216
1217 ## Step 2
1218 my $i = -1;
1219 my $node = $self->{open_elements}->[$i];
1220
1221 ## Step 3
1222 S3: {
1223 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
1224 $last = 1;
1225 if (defined $self->{inner_html_node}) {
1226 !!!cp ('t28');
1227 $node = $self->{inner_html_node};
1228 } else {
1229 die "_reset_insertion_mode: t27";
1230 }
1231 }
1232
1233 ## Step 4..14
1234 my $new_mode;
1235 if ($node->[1] & FOREIGN_EL) {
1236 !!!cp ('t28.1');
1237 ## NOTE: Strictly spaking, the line below only applies to MathML and
1238 ## SVG elements. Currently the HTML syntax supports only MathML and
1239 ## SVG elements as foreigners.
1240 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
1241 } elsif ($node->[1] == TABLE_CELL_EL) {
1242 if ($last) {
1243 !!!cp ('t28.2');
1244 #
1245 } else {
1246 !!!cp ('t28.3');
1247 $new_mode = IN_CELL_IM;
1248 }
1249 } else {
1250 !!!cp ('t28.4');
1251 $new_mode = {
1252 select => IN_SELECT_IM,
1253 ## NOTE: |option| and |optgroup| do not set
1254 ## insertion mode to "in select" by themselves.
1255 tr => IN_ROW_IM,
1256 tbody => IN_TABLE_BODY_IM,
1257 thead => IN_TABLE_BODY_IM,
1258 tfoot => IN_TABLE_BODY_IM,
1259 caption => IN_CAPTION_IM,
1260 colgroup => IN_COLUMN_GROUP_IM,
1261 table => IN_TABLE_IM,
1262 head => IN_BODY_IM, # not in head!
1263 body => IN_BODY_IM,
1264 frameset => IN_FRAMESET_IM,
1265 }->{$node->[0]->manakai_local_name};
1266 }
1267 $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1268
1269 ## Step 15
1270 if ($node->[1] == HTML_EL) {
1271 unless (defined $self->{head_element}) {
1272 !!!cp ('t29');
1273 $self->{insertion_mode} = BEFORE_HEAD_IM;
1274 } else {
1275 ## ISSUE: Can this state be reached?
1276 !!!cp ('t30');
1277 $self->{insertion_mode} = AFTER_HEAD_IM;
1278 }
1279 return;
1280 } else {
1281 !!!cp ('t31');
1282 }
1283
1284 ## Step 16
1285 $self->{insertion_mode} = IN_BODY_IM and return if $last;
1286
1287 ## Step 17
1288 $i--;
1289 $node = $self->{open_elements}->[$i];
1290
1291 ## Step 18
1292 redo S3;
1293 } # S3
1294
1295 die "$0: _reset_insertion_mode: This line should never be reached";
1296 } # _reset_insertion_mode
1297
1298 sub _tree_construction_main ($) {
1299 my $self = shift;
1300
1301 my $active_formatting_elements = [];
1302
1303 my $reconstruct_active_formatting_elements = sub { # MUST
1304 my $insert = shift;
1305
1306 ## Step 1
1307 return unless @$active_formatting_elements;
1308
1309 ## Step 3
1310 my $i = -1;
1311 my $entry = $active_formatting_elements->[$i];
1312
1313 ## Step 2
1314 return if $entry->[0] eq '#marker';
1315 for (@{$self->{open_elements}}) {
1316 if ($entry->[0] eq $_->[0]) {
1317 !!!cp ('t32');
1318 return;
1319 }
1320 }
1321
1322 S4: {
1323 ## Step 4
1324 last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1325
1326 ## Step 5
1327 $i--;
1328 $entry = $active_formatting_elements->[$i];
1329
1330 ## Step 6
1331 if ($entry->[0] eq '#marker') {
1332 !!!cp ('t33_1');
1333 #
1334 } else {
1335 my $in_open_elements;
1336 OE: for (@{$self->{open_elements}}) {
1337 if ($entry->[0] eq $_->[0]) {
1338 !!!cp ('t33');
1339 $in_open_elements = 1;
1340 last OE;
1341 }
1342 }
1343 if ($in_open_elements) {
1344 !!!cp ('t34');
1345 #
1346 } else {
1347 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
1348 !!!cp ('t35');
1349 redo S4;
1350 }
1351 }
1352
1353 ## Step 7
1354 $i++;
1355 $entry = $active_formatting_elements->[$i];
1356 } # S4
1357
1358 S7: {
1359 ## Step 8
1360 my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1361
1362 ## Step 9
1363 $insert->($clone->[0]);
1364 push @{$self->{open_elements}}, $clone;
1365
1366 ## Step 10
1367 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1368
1369 ## Step 11
1370 unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1371 !!!cp ('t36');
1372 ## Step 7'
1373 $i++;
1374 $entry = $active_formatting_elements->[$i];
1375
1376 redo S7;
1377 }
1378
1379 !!!cp ('t37');
1380 } # S7
1381 }; # $reconstruct_active_formatting_elements
1382
1383 my $clear_up_to_marker = sub {
1384 for (reverse 0..$#$active_formatting_elements) {
1385 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1386 !!!cp ('t38');
1387 splice @$active_formatting_elements, $_;
1388 return;
1389 }
1390 }
1391
1392 !!!cp ('t39');
1393 }; # $clear_up_to_marker
1394
1395 my $insert;
1396
1397 my $parse_rcdata = sub ($) {
1398 my ($content_model_flag) = @_;
1399
1400 ## Step 1
1401 my $start_tag_name = $token->{tag_name};
1402 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
1403
1404 ## Step 2
1405 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
1406 delete $self->{escape}; # MUST
1407
1408 ## Step 3, 4
1409 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1410
1411 !!!nack ('t40.1');
1412 !!!next-token;
1413 }; # $parse_rcdata
1414
1415 my $script_start_tag = sub () {
1416 ## Step 1
1417 my $script_el;
1418 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
1419
1420 ## Step 2
1421 ## TODO: mark as "parser-inserted"
1422
1423 ## Step 3
1424 ## TODO: Mark as "already executed", if ...
1425
1426 ## Step 4 (HTML5 revision 2702)
1427 $insert->($script_el);
1428 push @{$self->{open_elements}}, [$script_el, $el_category->{script}];
1429
1430 ## Step 5
1431 $self->{content_model} = CDATA_CONTENT_MODEL;
1432 delete $self->{escape}; # MUST
1433
1434 ## Step 6-7
1435 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1436
1437 !!!nack ('t40.2');
1438 !!!next-token;
1439 }; # $script_start_tag
1440
1441 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
1442 ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag (OBSOLETE; unused).
1443 ## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted.
1444 my $open_tables = [[$self->{open_elements}->[0]->[0]]];
1445
1446 my $formatting_end_tag = sub {
1447 my $end_tag_token = shift;
1448 my $tag_name = $end_tag_token->{tag_name};
1449
1450 ## NOTE: The adoption agency algorithm (AAA).
1451
1452 FET: {
1453 ## Step 1
1454 my $formatting_element;
1455 my $formatting_element_i_in_active;
1456 AFE: for (reverse 0..$#$active_formatting_elements) {
1457 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1458 !!!cp ('t52');
1459 last AFE;
1460 } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
1461 eq $tag_name) {
1462 !!!cp ('t51');
1463 $formatting_element = $active_formatting_elements->[$_];
1464 $formatting_element_i_in_active = $_;
1465 last AFE;
1466 }
1467 } # AFE
1468 unless (defined $formatting_element) {
1469 !!!cp ('t53');
1470 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
1471 ## Ignore the token
1472 !!!next-token;
1473 return;
1474 }
1475 ## has an element in scope
1476 my $in_scope = 1;
1477 my $formatting_element_i_in_open;
1478 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1479 my $node = $self->{open_elements}->[$_];
1480 if ($node->[0] eq $formatting_element->[0]) {
1481 if ($in_scope) {
1482 !!!cp ('t54');
1483 $formatting_element_i_in_open = $_;
1484 last INSCOPE;
1485 } else { # in open elements but not in scope
1486 !!!cp ('t55');
1487 !!!parse-error (type => 'unmatched end tag',
1488 text => $token->{tag_name},
1489 token => $end_tag_token);
1490 ## Ignore the token
1491 !!!next-token;
1492 return;
1493 }
1494 } elsif ($node->[1] & SCOPING_EL) {
1495 !!!cp ('t56');
1496 $in_scope = 0;
1497 }
1498 } # INSCOPE
1499 unless (defined $formatting_element_i_in_open) {
1500 !!!cp ('t57');
1501 !!!parse-error (type => 'unmatched end tag',
1502 text => $token->{tag_name},
1503 token => $end_tag_token);
1504 pop @$active_formatting_elements; # $formatting_element
1505 !!!next-token; ## TODO: ok?
1506 return;
1507 }
1508 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1509 !!!cp ('t58');
1510 !!!parse-error (type => 'not closed',
1511 text => $self->{open_elements}->[-1]->[0]
1512 ->manakai_local_name,
1513 token => $end_tag_token);
1514 }
1515
1516 ## Step 2
1517 my $furthest_block;
1518 my $furthest_block_i_in_open;
1519 OE: for (reverse 0..$#{$self->{open_elements}}) {
1520 my $node = $self->{open_elements}->[$_];
1521 if (not ($node->[1] & FORMATTING_EL) and
1522 #not $phrasing_category->{$node->[1]} and
1523 ($node->[1] & SPECIAL_EL or
1524 $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
1525 !!!cp ('t59');
1526 $furthest_block = $node;
1527 $furthest_block_i_in_open = $_;
1528 ## NOTE: The topmost (eldest) node.
1529 } elsif ($node->[0] eq $formatting_element->[0]) {
1530 !!!cp ('t60');
1531 last OE;
1532 }
1533 } # OE
1534
1535 ## Step 3
1536 unless (defined $furthest_block) { # MUST
1537 !!!cp ('t61');
1538 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
1539 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1540 !!!next-token;
1541 return;
1542 }
1543
1544 ## Step 4
1545 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
1546
1547 ## Step 5
1548 my $furthest_block_parent = $furthest_block->[0]->parent_node;
1549 if (defined $furthest_block_parent) {
1550 !!!cp ('t62');
1551 $furthest_block_parent->remove_child ($furthest_block->[0]);
1552 }
1553
1554 ## Step 6
1555 my $bookmark_prev_el
1556 = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1557 ->[0];
1558
1559 ## Step 7
1560 my $node = $furthest_block;
1561 my $node_i_in_open = $furthest_block_i_in_open;
1562 my $last_node = $furthest_block;
1563 S7: {
1564 ## Step 1
1565 $node_i_in_open--;
1566 $node = $self->{open_elements}->[$node_i_in_open];
1567
1568 ## Step 2
1569 my $node_i_in_active;
1570 S7S2: {
1571 for (reverse 0..$#$active_formatting_elements) {
1572 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1573 !!!cp ('t63');
1574 $node_i_in_active = $_;
1575 last S7S2;
1576 }
1577 }
1578 splice @{$self->{open_elements}}, $node_i_in_open, 1;
1579 redo S7;
1580 } # S7S2
1581
1582 ## Step 3
1583 last S7 if $node->[0] eq $formatting_element->[0];
1584
1585 ## Step 4
1586 if ($last_node->[0] eq $furthest_block->[0]) {
1587 !!!cp ('t64');
1588 $bookmark_prev_el = $node->[0];
1589 }
1590
1591 ## Step 5
1592 if ($node->[0]->has_child_nodes ()) {
1593 !!!cp ('t65');
1594 my $clone = [$node->[0]->clone_node (0), $node->[1]];
1595 $active_formatting_elements->[$node_i_in_active] = $clone;
1596 $self->{open_elements}->[$node_i_in_open] = $clone;
1597 $node = $clone;
1598 }
1599
1600 ## Step 6
1601 $node->[0]->append_child ($last_node->[0]);
1602
1603 ## Step 7
1604 $last_node = $node;
1605
1606 ## Step 8
1607 redo S7;
1608 } # S7
1609
1610 ## Step 8
1611 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
1612 my $foster_parent_element;
1613 my $next_sibling;
1614 OE: for (reverse 0..$#{$self->{open_elements}}) {
1615 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1616 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1617 if (defined $parent and $parent->node_type == 1) {
1618 !!!cp ('t65.1');
1619 $foster_parent_element = $parent;
1620 $next_sibling = $self->{open_elements}->[$_]->[0];
1621 } else {
1622 !!!cp ('t65.2');
1623 $foster_parent_element
1624 = $self->{open_elements}->[$_ - 1]->[0];
1625 }
1626 last OE;
1627 }
1628 } # OE
1629 $foster_parent_element = $self->{open_elements}->[0]->[0]
1630 unless defined $foster_parent_element;
1631 $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
1632 $open_tables->[-1]->[1] = 1; # tainted
1633 } else {
1634 !!!cp ('t65.3');
1635 $common_ancestor_node->[0]->append_child ($last_node->[0]);
1636 }
1637
1638 ## Step 9
1639 my $clone = [$formatting_element->[0]->clone_node (0),
1640 $formatting_element->[1]];
1641
1642 ## Step 10
1643 my @cn = @{$furthest_block->[0]->child_nodes};
1644 $clone->[0]->append_child ($_) for @cn;
1645
1646 ## Step 11
1647 $furthest_block->[0]->append_child ($clone->[0]);
1648
1649 ## Step 12
1650 my $i;
1651 AFE: for (reverse 0..$#$active_formatting_elements) {
1652 if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1653 !!!cp ('t66');
1654 splice @$active_formatting_elements, $_, 1;
1655 $i-- and last AFE if defined $i;
1656 } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1657 !!!cp ('t67');
1658 $i = $_;
1659 }
1660 } # AFE
1661 splice @$active_formatting_elements, $i + 1, 0, $clone;
1662
1663 ## Step 13
1664 undef $i;
1665 OE: for (reverse 0..$#{$self->{open_elements}}) {
1666 if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
1667 !!!cp ('t68');
1668 splice @{$self->{open_elements}}, $_, 1;
1669 $i-- and last OE if defined $i;
1670 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
1671 !!!cp ('t69');
1672 $i = $_;
1673 }
1674 } # OE
1675 splice @{$self->{open_elements}}, $i + 1, 0, $clone;
1676
1677 ## Step 14
1678 redo FET;
1679 } # FET
1680 }; # $formatting_end_tag
1681
1682 $insert = my $insert_to_current = sub {
1683 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
1684 }; # $insert_to_current
1685
1686 my $insert_to_foster = sub {
1687 my $child = shift;
1688 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1689 # MUST
1690 my $foster_parent_element;
1691 my $next_sibling;
1692 OE: for (reverse 0..$#{$self->{open_elements}}) {
1693 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1694 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1695 if (defined $parent and $parent->node_type == 1) {
1696 !!!cp ('t70');
1697 $foster_parent_element = $parent;
1698 $next_sibling = $self->{open_elements}->[$_]->[0];
1699 } else {
1700 !!!cp ('t71');
1701 $foster_parent_element
1702 = $self->{open_elements}->[$_ - 1]->[0];
1703 }
1704 last OE;
1705 }
1706 } # OE
1707 $foster_parent_element = $self->{open_elements}->[0]->[0]
1708 unless defined $foster_parent_element;
1709 $foster_parent_element->insert_before
1710 ($child, $next_sibling);
1711 $open_tables->[-1]->[1] = 1; # tainted
1712 } else {
1713 !!!cp ('t72');
1714 $self->{open_elements}->[-1]->[0]->append_child ($child);
1715 }
1716 }; # $insert_to_foster
1717
1718 ## NOTE: Insert a character (MUST): When a character is inserted, if
1719 ## the last node that was inserted by the parser is a Text node and
1720 ## the character has to be inserted after that node, then the
1721 ## character is appended to the Text node. However, if any other
1722 ## node is inserted by the parser, then a new Text node is created
1723 ## and the character is appended as that Text node. If I'm not
1724 ## wrong, for a parser with scripting disabled, there are only two
1725 ## cases where this occurs. One is the case where an element node
1726 ## is inserted to the |head| element. This is covered by using the
1727 ## |$self->{head_element_inserted}| flag. Another is the case where
1728 ## an element or comment is inserted into the |table| subtree while
1729 ## foster parenting happens. This is covered by using the [2] flag
1730 ## of the |$open_tables| structure. All other cases are handled
1731 ## simply by calling |manakai_append_text| method.
1732
1733 ## TODO: |<body><script>document.write("a<br>");
1734 ## document.body.removeChild (document.body.lastChild);
1735 ## document.write ("b")</script>|
1736
1737 B: while (1) {
1738
1739 ## The "in table text" insertion mode.
1740 if ($self->{insertion_mode} & TABLE_IMS and
1741 not $self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
1742 not $self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1743 C: {
1744 my $s;
1745 if ($token->{type} == CHARACTER_TOKEN) {
1746 !!!cp ('t194');
1747 $self->{pending_chars} ||= [];
1748 push @{$self->{pending_chars}}, $token;
1749 !!!next-token;
1750 next B;
1751 } else {
1752 if ($self->{pending_chars}) {
1753 $s = join '', map { $_->{data} } @{$self->{pending_chars}};
1754 delete $self->{pending_chars};
1755 if ($s =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1756 !!!cp ('t195');
1757 #
1758 } else {
1759 !!!cp ('t195.1');
1760 #$self->{open_elements}->[-1]->[0]->manakai_append_text ($s);
1761 $self->{open_elements}->[-1]->[0]->append_child
1762 ($self->{document}->create_text_node ($s));
1763 last C;
1764 }
1765 } else {
1766 !!!cp ('t195.2');
1767 last C;
1768 }
1769 }
1770
1771 ## Foster parenting
1772 !!!parse-error (type => 'in table:#text', token => $token);
1773
1774 ## NOTE: As if in body, but insert into the foster parent element.
1775 $reconstruct_active_formatting_elements->($insert_to_foster);
1776
1777 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1778 # MUST
1779 my $foster_parent_element;
1780 my $next_sibling;
1781 #my $prev_sibling;
1782 OE: for (reverse 0..$#{$self->{open_elements}}) {
1783 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1784 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1785 if (defined $parent and $parent->node_type == 1) {
1786 $foster_parent_element = $parent;
1787 !!!cp ('t196');
1788 $next_sibling = $self->{open_elements}->[$_]->[0];
1789 # $prev_sibling = $next_sibling->previous_sibling;
1790 #
1791 } else {
1792 !!!cp ('t197');
1793 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1794 # $prev_sibling = $foster_parent_element->last_child;
1795 #
1796 }
1797 last OE;
1798 }
1799 } # OE
1800 $foster_parent_element = $self->{open_elements}->[0]->[0] #and
1801 #$prev_sibling = $foster_parent_element->last_child
1802 unless defined $foster_parent_element;
1803 #undef $prev_sibling unless $open_tables->[-1]->[2]; # ~node inserted
1804 #if (defined $prev_sibling and
1805 # $prev_sibling->node_type == 3) {
1806 # !!! cp ('t198');
1807 # $prev_sibling->manakai_append_text ($s);
1808 #} else {
1809 !!!cp ('t199');
1810 $foster_parent_element->insert_before
1811 ($self->{document}->create_text_node ($s), $next_sibling);
1812 #}
1813 $open_tables->[-1]->[1] = 1; # tainted
1814 $open_tables->[-1]->[2] = 1; # ~node inserted
1815 } else {
1816 ## NOTE: Fragment case or in a foster parent'ed element
1817 ## (e.g. |<table><span>a|). In fragment case, whether the
1818 ## character is appended to existing node or a new node is
1819 ## created is irrelevant, since the foster parent'ed nodes
1820 ## are discarded and fragment parsing does not invoke any
1821 ## script.
1822 !!!cp ('t200');
1823 $self->{open_elements}->[-1]->[0]->manakai_append_text ($s);
1824 }
1825 } # C
1826 } # TABLE_IMS
1827
1828 if ($token->{type} == DOCTYPE_TOKEN) {
1829 !!!cp ('t73');
1830 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1831 ## Ignore the token
1832 ## Stay in the phase
1833 !!!next-token;
1834 next B;
1835 } elsif ($token->{type} == START_TAG_TOKEN and
1836 $token->{tag_name} eq 'html') {
1837 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
1838 !!!cp ('t79');
1839 !!!parse-error (type => 'after html', text => 'html', token => $token);
1840 $self->{insertion_mode} = AFTER_BODY_IM;
1841 } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
1842 !!!cp ('t80');
1843 !!!parse-error (type => 'after html', text => 'html', token => $token);
1844 $self->{insertion_mode} = AFTER_FRAMESET_IM;
1845 } else {
1846 !!!cp ('t81');
1847 }
1848
1849 !!!cp ('t82');
1850 !!!parse-error (type => 'not first start tag', token => $token);
1851 my $top_el = $self->{open_elements}->[0]->[0];
1852 for my $attr_name (keys %{$token->{attributes}}) {
1853 unless ($top_el->has_attribute_ns (undef, $attr_name)) {
1854 !!!cp ('t84');
1855 $top_el->set_attribute_ns
1856 (undef, [undef, $attr_name],
1857 $token->{attributes}->{$attr_name}->{value});
1858 }
1859 }
1860 !!!nack ('t84.1');
1861 !!!next-token;
1862 next B;
1863 } elsif ($token->{type} == COMMENT_TOKEN) {
1864 my $comment = $self->{document}->create_comment ($token->{data});
1865 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
1866 !!!cp ('t85');
1867 $self->{document}->append_child ($comment);
1868 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
1869 !!!cp ('t86');
1870 $self->{open_elements}->[0]->[0]->append_child ($comment);
1871 } else {
1872 !!!cp ('t87');
1873 $self->{open_elements}->[-1]->[0]->append_child ($comment);
1874 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
1875 }
1876 !!!next-token;
1877 next B;
1878 } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1879 if ($token->{type} == CHARACTER_TOKEN) {
1880 $token->{data} =~ s/^\x0A// if $self->{ignore_newline};
1881 delete $self->{ignore_newline};
1882
1883 if (length $token->{data}) {
1884 !!!cp ('t43');
1885 $self->{open_elements}->[-1]->[0]->manakai_append_text
1886 ($token->{data});
1887 } else {
1888 !!!cp ('t43.1');
1889 }
1890 !!!next-token;
1891 next B;
1892 } elsif ($token->{type} == END_TAG_TOKEN) {
1893 delete $self->{ignore_newline};
1894
1895 if ($token->{tag_name} eq 'script') {
1896 !!!cp ('t50');
1897
1898 ## Para 1-2
1899 my $script = pop @{$self->{open_elements}};
1900
1901 ## Para 3
1902 $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1903
1904 ## Para 4
1905 ## TODO: $old_insertion_point = $current_insertion_point;
1906 ## TODO: $current_insertion_point = just before $self->{nc};
1907
1908 ## Para 5
1909 ## TODO: Run the $script->[0].
1910
1911 ## Para 6
1912 ## TODO: $current_insertion_point = $old_insertion_point;
1913
1914 ## Para 7
1915 ## TODO: if ($pending_external_script) {
1916 ## TODO: ...
1917 ## TODO: }
1918
1919 !!!next-token;
1920 next B;
1921 } else {
1922 !!!cp ('t42');
1923
1924 pop @{$self->{open_elements}};
1925
1926 $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1927 !!!next-token;
1928 next B;
1929 }
1930 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1931 delete $self->{ignore_newline};
1932
1933 !!!cp ('t44');
1934 !!!parse-error (type => 'not closed',
1935 text => $self->{open_elements}->[-1]->[0]
1936 ->manakai_local_name,
1937 token => $token);
1938
1939 #if ($self->{open_elements}->[-1]->[1] == SCRIPT_EL) {
1940 # ## TODO: Mark as "already executed"
1941 #}
1942
1943 pop @{$self->{open_elements}};
1944
1945 $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1946 ## Reprocess.
1947 next B;
1948 } else {
1949 die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type";
1950 }
1951 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
1952 if ($token->{type} == CHARACTER_TOKEN) {
1953 !!!cp ('t87.1');
1954 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
1955 !!!next-token;
1956 next B;
1957 } elsif ($token->{type} == START_TAG_TOKEN) {
1958 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
1959 $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
1960 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1961 ($token->{tag_name} eq 'svg' and
1962 $self->{open_elements}->[-1]->[1] == MML_AXML_EL)) {
1963 ## NOTE: "using the rules for secondary insertion mode"then"continue"
1964 !!!cp ('t87.2');
1965 #
1966 } elsif ({
1967 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
1968 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
1969 em => 1, embed => 1, h1 => 1, h2 => 1, h3 => 1,
1970 h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
1971 img => 1, li => 1, listing => 1, menu => 1, meta => 1,
1972 nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
1973 small => 1, span => 1, strong => 1, strike => 1, sub => 1,
1974 sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
1975 }->{$token->{tag_name}} or
1976 ($token->{tag_name} eq 'font' and
1977 ($token->{attributes}->{color} or
1978 $token->{attributes}->{face} or
1979 $token->{attributes}->{size}))) {
1980 !!!cp ('t87.2');
1981 !!!parse-error (type => 'not closed',
1982 text => $self->{open_elements}->[-1]->[0]
1983 ->manakai_local_name,
1984 token => $token);
1985
1986 pop @{$self->{open_elements}}
1987 while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1988
1989 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1990 ## Reprocess.
1991 next B;
1992 } else {
1993 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
1994 my $tag_name = $token->{tag_name};
1995 if ($nsuri eq $SVG_NS) {
1996 $tag_name = {
1997 altglyph => 'altGlyph',
1998 altglyphdef => 'altGlyphDef',
1999 altglyphitem => 'altGlyphItem',
2000 animatecolor => 'animateColor',
2001 animatemotion => 'animateMotion',
2002 animatetransform => 'animateTransform',
2003 clippath => 'clipPath',
2004 feblend => 'feBlend',
2005 fecolormatrix => 'feColorMatrix',
2006 fecomponenttransfer => 'feComponentTransfer',
2007 fecomposite => 'feComposite',
2008 feconvolvematrix => 'feConvolveMatrix',
2009 fediffuselighting => 'feDiffuseLighting',
2010 fedisplacementmap => 'feDisplacementMap',
2011 fedistantlight => 'feDistantLight',
2012 feflood => 'feFlood',
2013 fefunca => 'feFuncA',
2014 fefuncb => 'feFuncB',
2015 fefuncg => 'feFuncG',
2016 fefuncr => 'feFuncR',
2017 fegaussianblur => 'feGaussianBlur',
2018 feimage => 'feImage',
2019 femerge => 'feMerge',
2020 femergenode => 'feMergeNode',
2021 femorphology => 'feMorphology',
2022 feoffset => 'feOffset',
2023 fepointlight => 'fePointLight',
2024 fespecularlighting => 'feSpecularLighting',
2025 fespotlight => 'feSpotLight',
2026 fetile => 'feTile',
2027 feturbulence => 'feTurbulence',
2028 foreignobject => 'foreignObject',
2029 glyphref => 'glyphRef',
2030 lineargradient => 'linearGradient',
2031 radialgradient => 'radialGradient',
2032 #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
2033 textpath => 'textPath',
2034 }->{$tag_name} || $tag_name;
2035 }
2036
2037 ## "adjust SVG attributes" (SVG only) - done in insert-element-f
2038
2039 ## "adjust foreign attributes" - done in insert-element-f
2040
2041 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
2042
2043 if ($self->{self_closing}) {
2044 pop @{$self->{open_elements}};
2045 !!!ack ('t87.3');
2046 } else {
2047 !!!cp ('t87.4');
2048 }
2049
2050 !!!next-token;
2051 next B;
2052 }
2053 } elsif ($token->{type} == END_TAG_TOKEN) {
2054 ## NOTE: "using the rules for secondary insertion mode" then "continue"
2055 if ($token->{tag_name} eq 'script') {
2056 !!!cp ('t87.41');
2057 #
2058 ## XXXscript: Execute script here.
2059 } else {
2060 !!!cp ('t87.5');
2061 #
2062 }
2063 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2064 !!!cp ('t87.6');
2065 !!!parse-error (type => 'not closed',
2066 text => $self->{open_elements}->[-1]->[0]
2067 ->manakai_local_name,
2068 token => $token);
2069
2070 pop @{$self->{open_elements}}
2071 while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
2072
2073 ## NOTE: |<span><svg>| ... two parse errors, |<svg>| ... a parse error.
2074
2075 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
2076 ## Reprocess.
2077 next B;
2078 } else {
2079 die "$0: $token->{type}: Unknown token type";
2080 }
2081 }
2082
2083 if ($self->{insertion_mode} & HEAD_IMS) {
2084 if ($token->{type} == CHARACTER_TOKEN) {
2085 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
2086 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2087 if ($self->{head_element_inserted}) {
2088 !!!cp ('t88.3');
2089 $self->{open_elements}->[-1]->[0]->append_child
2090 ($self->{document}->create_text_node ($1));
2091 delete $self->{head_element_inserted};
2092 ## NOTE: |</head> <link> |
2093 #
2094 } else {
2095 !!!cp ('t88.2');
2096 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2097 ## NOTE: |</head> &#x20;|
2098 #
2099 }
2100 } else {
2101 !!!cp ('t88.1');
2102 ## Ignore the token.
2103 #
2104 }
2105 unless (length $token->{data}) {
2106 !!!cp ('t88');
2107 !!!next-token;
2108 next B;
2109 }
2110 ## TODO: set $token->{column} appropriately
2111 }
2112
2113 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2114 !!!cp ('t89');
2115 ## As if <head>
2116 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2117 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2118 push @{$self->{open_elements}},
2119 [$self->{head_element}, $el_category->{head}];
2120
2121 ## Reprocess in the "in head" insertion mode...
2122 pop @{$self->{open_elements}};
2123
2124 ## Reprocess in the "after head" insertion mode...
2125 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2126 !!!cp ('t90');
2127 ## As if </noscript>
2128 pop @{$self->{open_elements}};
2129 !!!parse-error (type => 'in noscript:#text', token => $token);
2130
2131 ## Reprocess in the "in head" insertion mode...
2132 ## As if </head>
2133 pop @{$self->{open_elements}};
2134
2135 ## Reprocess in the "after head" insertion mode...
2136 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2137 !!!cp ('t91');
2138 pop @{$self->{open_elements}};
2139
2140 ## Reprocess in the "after head" insertion mode...
2141 } else {
2142 !!!cp ('t92');
2143 }
2144
2145 ## "after head" insertion mode
2146 ## As if <body>
2147 !!!insert-element ('body',, $token);
2148 $self->{insertion_mode} = IN_BODY_IM;
2149 ## reprocess
2150 next B;
2151 } elsif ($token->{type} == START_TAG_TOKEN) {
2152 if ($token->{tag_name} eq 'head') {
2153 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2154 !!!cp ('t93');
2155 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
2156 $self->{open_elements}->[-1]->[0]->append_child
2157 ($self->{head_element});
2158 push @{$self->{open_elements}},
2159 [$self->{head_element}, $el_category->{head}];
2160 $self->{insertion_mode} = IN_HEAD_IM;
2161 !!!nack ('t93.1');
2162 !!!next-token;
2163 next B;
2164 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2165 !!!cp ('t93.2');
2166 !!!parse-error (type => 'after head', text => 'head',
2167 token => $token);
2168 ## Ignore the token
2169 !!!nack ('t93.3');
2170 !!!next-token;
2171 next B;
2172 } else {
2173 !!!cp ('t95');
2174 !!!parse-error (type => 'in head:head',
2175 token => $token); # or in head noscript
2176 ## Ignore the token
2177 !!!nack ('t95.1');
2178 !!!next-token;
2179 next B;
2180 }
2181 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2182 !!!cp ('t96');
2183 ## As if <head>
2184 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2185 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2186 push @{$self->{open_elements}},
2187 [$self->{head_element}, $el_category->{head}];
2188
2189 $self->{insertion_mode} = IN_HEAD_IM;
2190 ## Reprocess in the "in head" insertion mode...
2191 } else {
2192 !!!cp ('t97');
2193 }
2194
2195 if ($token->{tag_name} eq 'base') {
2196 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2197 !!!cp ('t98');
2198 ## As if </noscript>
2199 pop @{$self->{open_elements}};
2200 !!!parse-error (type => 'in noscript', text => 'base',
2201 token => $token);
2202
2203 $self->{insertion_mode} = IN_HEAD_IM;
2204 ## Reprocess in the "in head" insertion mode...
2205 } else {
2206 !!!cp ('t99');
2207 }
2208
2209 ## NOTE: There is a "as if in head" code clone.
2210 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2211 !!!cp ('t100');
2212 !!!parse-error (type => 'after head',
2213 text => $token->{tag_name}, token => $token);
2214 push @{$self->{open_elements}},
2215 [$self->{head_element}, $el_category->{head}];
2216 $self->{head_element_inserted} = 1;
2217 } else {
2218 !!!cp ('t101');
2219 }
2220 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2221 pop @{$self->{open_elements}};
2222 pop @{$self->{open_elements}} # <head>
2223 if $self->{insertion_mode} == AFTER_HEAD_IM;
2224 !!!nack ('t101.1');
2225 !!!next-token;
2226 next B;
2227 } elsif ($token->{tag_name} eq 'link') {
2228 ## NOTE: There is a "as if in head" code clone.
2229 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2230 !!!cp ('t102');
2231 !!!parse-error (type => 'after head',
2232 text => $token->{tag_name}, token => $token);
2233 push @{$self->{open_elements}},
2234 [$self->{head_element}, $el_category->{head}];
2235 $self->{head_element_inserted} = 1;
2236 } else {
2237 !!!cp ('t103');
2238 }
2239 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2240 pop @{$self->{open_elements}};
2241 pop @{$self->{open_elements}} # <head>
2242 if $self->{insertion_mode} == AFTER_HEAD_IM;
2243 !!!ack ('t103.1');
2244 !!!next-token;
2245 next B;
2246 } elsif ($token->{tag_name} eq 'command') {
2247 if ($self->{insertion_mode} == IN_HEAD_IM) {
2248 ## NOTE: If the insertion mode at the time of the emission
2249 ## of the token was "before head", $self->{insertion_mode}
2250 ## is already changed to |IN_HEAD_IM|.
2251
2252 ## NOTE: There is a "as if in head" code clone.
2253 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2254 pop @{$self->{open_elements}};
2255 pop @{$self->{open_elements}} # <head>
2256 if $self->{insertion_mode} == AFTER_HEAD_IM;
2257 !!!ack ('t103.2');
2258 !!!next-token;
2259 next B;
2260 } else {
2261 ## NOTE: "in head noscript" or "after head" insertion mode
2262 ## - in these cases, these tags are treated as same as
2263 ## normal in-body tags.
2264 !!!cp ('t103.3');
2265 #
2266 }
2267 } elsif ($token->{tag_name} eq 'meta') {
2268 ## NOTE: There is a "as if in head" code clone.
2269 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2270 !!!cp ('t104');
2271 !!!parse-error (type => 'after head',
2272 text => $token->{tag_name}, token => $token);
2273 push @{$self->{open_elements}},
2274 [$self->{head_element}, $el_category->{head}];
2275 $self->{head_element_inserted} = 1;
2276 } else {
2277 !!!cp ('t105');
2278 }
2279 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2280 my $meta_el = pop @{$self->{open_elements}};
2281
2282 unless ($self->{confident}) {
2283 if ($token->{attributes}->{charset}) {
2284 !!!cp ('t106');
2285 ## NOTE: Whether the encoding is supported or not is handled
2286 ## in the {change_encoding} callback.
2287 $self->{change_encoding}
2288 ->($self, $token->{attributes}->{charset}->{value},
2289 $token);
2290
2291 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2292 ->set_user_data (manakai_has_reference =>
2293 $token->{attributes}->{charset}
2294 ->{has_reference});
2295 } elsif ($token->{attributes}->{content}) {
2296 if ($token->{attributes}->{content}->{value}
2297 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2298 [\x09\x0A\x0C\x0D\x20]*=
2299 [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
2300 ([^"'\x09\x0A\x0C\x0D\x20]
2301 [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
2302 !!!cp ('t107');
2303 ## NOTE: Whether the encoding is supported or not is handled
2304 ## in the {change_encoding} callback.
2305 $self->{change_encoding}
2306 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
2307 $token);
2308 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2309 ->set_user_data (manakai_has_reference =>
2310 $token->{attributes}->{content}
2311 ->{has_reference});
2312 } else {
2313 !!!cp ('t108');
2314 }
2315 }
2316 } else {
2317 if ($token->{attributes}->{charset}) {
2318 !!!cp ('t109');
2319 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2320 ->set_user_data (manakai_has_reference =>
2321 $token->{attributes}->{charset}
2322 ->{has_reference});
2323 }
2324 if ($token->{attributes}->{content}) {
2325 !!!cp ('t110');
2326 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2327 ->set_user_data (manakai_has_reference =>
2328 $token->{attributes}->{content}
2329 ->{has_reference});
2330 }
2331 }
2332
2333 pop @{$self->{open_elements}} # <head>
2334 if $self->{insertion_mode} == AFTER_HEAD_IM;
2335 !!!ack ('t110.1');
2336 !!!next-token;
2337 next B;
2338 } elsif ($token->{tag_name} eq 'title') {
2339 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2340 !!!cp ('t111');
2341 ## As if </noscript>
2342 pop @{$self->{open_elements}};
2343 !!!parse-error (type => 'in noscript', text => 'title',
2344 token => $token);
2345
2346 $self->{insertion_mode} = IN_HEAD_IM;
2347 ## Reprocess in the "in head" insertion mode...
2348 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2349 !!!cp ('t112');
2350 !!!parse-error (type => 'after head',
2351 text => $token->{tag_name}, token => $token);
2352 push @{$self->{open_elements}},
2353 [$self->{head_element}, $el_category->{head}];
2354 $self->{head_element_inserted} = 1;
2355 } else {
2356 !!!cp ('t113');
2357 }
2358
2359 ## NOTE: There is a "as if in head" code clone.
2360 $parse_rcdata->(RCDATA_CONTENT_MODEL);
2361
2362 ## NOTE: At this point the stack of open elements contain
2363 ## the |head| element (index == -2) and the |script| element
2364 ## (index == -1). In the "after head" insertion mode the
2365 ## |head| element is inserted only for the purpose of
2366 ## providing the context for the |script| element, and
2367 ## therefore we can now and have to remove the element from
2368 ## the stack.
2369 splice @{$self->{open_elements}}, -2, 1, () # <head>
2370 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2371 next B;
2372 } elsif ($token->{tag_name} eq 'style' or
2373 $token->{tag_name} eq 'noframes') {
2374 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
2375 ## insertion mode IN_HEAD_IM)
2376 ## NOTE: There is a "as if in head" code clone.
2377 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2378 !!!cp ('t114');
2379 !!!parse-error (type => 'after head',
2380 text => $token->{tag_name}, token => $token);
2381 push @{$self->{open_elements}},
2382 [$self->{head_element}, $el_category->{head}];
2383 $self->{head_element_inserted} = 1;
2384 } else {
2385 !!!cp ('t115');
2386 }
2387 $parse_rcdata->(CDATA_CONTENT_MODEL);
2388 ## ISSUE: A spec bug [Bug 6038]
2389 splice @{$self->{open_elements}}, -2, 1, () # <head>
2390 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2391 next B;
2392 } elsif ($token->{tag_name} eq 'noscript') {
2393 if ($self->{insertion_mode} == IN_HEAD_IM) {
2394 !!!cp ('t116');
2395 ## NOTE: and scripting is disalbed
2396 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2397 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
2398 !!!nack ('t116.1');
2399 !!!next-token;
2400 next B;
2401 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2402 !!!cp ('t117');
2403 !!!parse-error (type => 'in noscript', text => 'noscript',
2404 token => $token);
2405 ## Ignore the token
2406 !!!nack ('t117.1');
2407 !!!next-token;
2408 next B;
2409 } else {
2410 !!!cp ('t118');
2411 #
2412 }
2413 } elsif ($token->{tag_name} eq 'script') {
2414 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2415 !!!cp ('t119');
2416 ## As if </noscript>
2417 pop @{$self->{open_elements}};
2418 !!!parse-error (type => 'in noscript', text => 'script',
2419 token => $token);
2420
2421 $self->{insertion_mode} = IN_HEAD_IM;
2422 ## Reprocess in the "in head" insertion mode...
2423 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2424 !!!cp ('t120');
2425 !!!parse-error (type => 'after head',
2426 text => $token->{tag_name}, token => $token);
2427 push @{$self->{open_elements}},
2428 [$self->{head_element}, $el_category->{head}];
2429 $self->{head_element_inserted} = 1;
2430 } else {
2431 !!!cp ('t121');
2432 }
2433
2434 ## NOTE: There is a "as if in head" code clone.
2435 $script_start_tag->();
2436 ## ISSUE: A spec bug [Bug 6038]
2437 splice @{$self->{open_elements}}, -2, 1 # <head>
2438 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2439 next B;
2440 } elsif ($token->{tag_name} eq 'body' or
2441 $token->{tag_name} eq 'frameset') {
2442 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2443 !!!cp ('t122');
2444 ## As if </noscript>
2445 pop @{$self->{open_elements}};
2446 !!!parse-error (type => 'in noscript',
2447 text => $token->{tag_name}, token => $token);
2448
2449 ## Reprocess in the "in head" insertion mode...
2450 ## As if </head>
2451 pop @{$self->{open_elements}};
2452
2453 ## Reprocess in the "after head" insertion mode...
2454 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2455 !!!cp ('t124');
2456 pop @{$self->{open_elements}};
2457
2458 ## Reprocess in the "after head" insertion mode...
2459 } else {
2460 !!!cp ('t125');
2461 }
2462
2463 ## "after head" insertion mode
2464 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2465 if ($token->{tag_name} eq 'body') {
2466 !!!cp ('t126');
2467 $self->{insertion_mode} = IN_BODY_IM;
2468 } elsif ($token->{tag_name} eq 'frameset') {
2469 !!!cp ('t127');
2470 $self->{insertion_mode} = IN_FRAMESET_IM;
2471 } else {
2472 die "$0: tag name: $self->{tag_name}";
2473 }
2474 !!!nack ('t127.1');
2475 !!!next-token;
2476 next B;
2477 } else {
2478 !!!cp ('t128');
2479 #
2480 }
2481
2482 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2483 !!!cp ('t129');
2484 ## As if </noscript>
2485 pop @{$self->{open_elements}};
2486 !!!parse-error (type => 'in noscript:/',
2487 text => $token->{tag_name}, token => $token);
2488
2489 ## Reprocess in the "in head" insertion mode...
2490 ## As if </head>
2491 pop @{$self->{open_elements}};
2492
2493 ## Reprocess in the "after head" insertion mode...
2494 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2495 !!!cp ('t130');
2496 ## As if </head>
2497 pop @{$self->{open_elements}};
2498
2499 ## Reprocess in the "after head" insertion mode...
2500 } else {
2501 !!!cp ('t131');
2502 }
2503
2504 ## "after head" insertion mode
2505 ## As if <body>
2506 !!!insert-element ('body',, $token);
2507 $self->{insertion_mode} = IN_BODY_IM;
2508 ## reprocess
2509 !!!ack-later;
2510 next B;
2511 } elsif ($token->{type} == END_TAG_TOKEN) {
2512 if ($token->{tag_name} eq 'head') {
2513 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2514 !!!cp ('t132');
2515 ## As if <head>
2516 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2517 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2518 push @{$self->{open_elements}},
2519 [$self->{head_element}, $el_category->{head}];
2520
2521 ## Reprocess in the "in head" insertion mode...
2522 pop @{$self->{open_elements}};
2523 $self->{insertion_mode} = AFTER_HEAD_IM;
2524 !!!next-token;
2525 next B;
2526 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2527 !!!cp ('t133');
2528 ## As if </noscript>
2529 pop @{$self->{open_elements}};
2530 !!!parse-error (type => 'in noscript:/',
2531 text => 'head', token => $token);
2532
2533 ## Reprocess in the "in head" insertion mode...
2534 pop @{$self->{open_elements}};
2535 $self->{insertion_mode} = AFTER_HEAD_IM;
2536 !!!next-token;
2537 next B;
2538 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2539 !!!cp ('t134');
2540 pop @{$self->{open_elements}};
2541 $self->{insertion_mode} = AFTER_HEAD_IM;
2542 !!!next-token;
2543 next B;
2544 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2545 !!!cp ('t134.1');
2546 !!!parse-error (type => 'unmatched end tag', text => 'head',
2547 token => $token);
2548 ## Ignore the token
2549 !!!next-token;
2550 next B;
2551 } else {
2552 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2553 }
2554 } elsif ($token->{tag_name} eq 'noscript') {
2555 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2556 !!!cp ('t136');
2557 pop @{$self->{open_elements}};
2558 $self->{insertion_mode} = IN_HEAD_IM;
2559 !!!next-token;
2560 next B;
2561 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
2562 $self->{insertion_mode} == AFTER_HEAD_IM) {
2563 !!!cp ('t137');
2564 !!!parse-error (type => 'unmatched end tag',
2565 text => 'noscript', token => $token);
2566 ## Ignore the token ## ISSUE: An issue in the spec.
2567 !!!next-token;
2568 next B;
2569 } else {
2570 !!!cp ('t138');
2571 #
2572 }
2573 } elsif ({
2574 body => 1, html => 1,
2575 }->{$token->{tag_name}}) {
2576 ## TODO: This branch is entirely redundant.
2577 if ($self->{insertion_mode} == BEFORE_HEAD_IM or
2578 $self->{insertion_mode} == IN_HEAD_IM or
2579 $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2580 !!!cp ('t140');
2581 !!!parse-error (type => 'unmatched end tag',
2582 text => $token->{tag_name}, token => $token);
2583 ## Ignore the token
2584 !!!next-token;
2585 next B;
2586 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2587 !!!cp ('t140.1');
2588 !!!parse-error (type => 'unmatched end tag',
2589 text => $token->{tag_name}, token => $token);
2590 ## Ignore the token
2591 !!!next-token;
2592 next B;
2593 } else {
2594 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2595 }
2596 } elsif ($token->{tag_name} eq 'p') {
2597 !!!cp ('t142');
2598 !!!parse-error (type => 'unmatched end tag',
2599 text => $token->{tag_name}, token => $token);
2600 ## Ignore the token
2601 !!!next-token;
2602 next B;
2603 } elsif ($token->{tag_name} eq 'br') {
2604 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2605 !!!cp ('t142.2');
2606 ## (before head) as if <head>, (in head) as if </head>
2607 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2608 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2609 $self->{insertion_mode} = AFTER_HEAD_IM;
2610
2611 ## Reprocess in the "after head" insertion mode...
2612 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2613 !!!cp ('t143.2');
2614 ## As if </head>
2615 pop @{$self->{open_elements}};
2616 $self->{insertion_mode} = AFTER_HEAD_IM;
2617
2618 ## Reprocess in the "after head" insertion mode...
2619 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2620 !!!cp ('t143.3');
2621 ## NOTE: Two parse errors for <head><noscript></br>
2622 !!!parse-error (type => 'unmatched end tag',
2623 text => 'br', token => $token);
2624 ## As if </noscript>
2625 pop @{$self->{open_elements}};
2626 $self->{insertion_mode} = IN_HEAD_IM;
2627
2628 ## Reprocess in the "in head" insertion mode...
2629 ## As if </head>
2630 pop @{$self->{open_elements}};
2631 $self->{insertion_mode} = AFTER_HEAD_IM;
2632
2633 ## Reprocess in the "after head" insertion mode...
2634 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2635 !!!cp ('t143.4');
2636 #
2637 } else {
2638 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2639 }
2640
2641 #
2642 } else { ## Other end tags
2643 !!!cp ('t145');
2644 !!!parse-error (type => 'unmatched end tag',
2645 text => $token->{tag_name}, token => $token);
2646 ## Ignore the token
2647 !!!next-token;
2648 next B;
2649 }
2650
2651 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2652 !!!cp ('t146');
2653 ## As if </noscript>
2654 pop @{$self->{open_elements}};
2655 !!!parse-error (type => 'in noscript:/',
2656 text => $token->{tag_name}, token => $token);
2657
2658 ## Reprocess in the "in head" insertion mode...
2659 ## As if </head>
2660 pop @{$self->{open_elements}};
2661
2662 ## Reprocess in the "after head" insertion mode...
2663 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2664 !!!cp ('t147');
2665 ## As if </head>
2666 pop @{$self->{open_elements}};
2667
2668 ## Reprocess in the "after head" insertion mode...
2669 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2670 ## ISSUE: This case cannot be reached?
2671 !!!cp ('t148');
2672 !!!parse-error (type => 'unmatched end tag',
2673 text => $token->{tag_name}, token => $token);
2674 ## Ignore the token ## ISSUE: An issue in the spec.
2675 !!!next-token;
2676 next B;
2677 } else {
2678 !!!cp ('t149');
2679 }
2680
2681 ## "after head" insertion mode
2682 ## As if <body>
2683 !!!insert-element ('body',, $token);
2684 $self->{insertion_mode} = IN_BODY_IM;
2685 ## reprocess
2686 next B;
2687 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2688 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2689 !!!cp ('t149.1');
2690
2691 ## NOTE: As if <head>
2692 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2693 $self->{open_elements}->[-1]->[0]->append_child
2694 ($self->{head_element});
2695 #push @{$self->{open_elements}},
2696 # [$self->{head_element}, $el_category->{head}];
2697 #$self->{insertion_mode} = IN_HEAD_IM;
2698 ## NOTE: Reprocess.
2699
2700 ## NOTE: As if </head>
2701 #pop @{$self->{open_elements}};
2702 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2703 ## NOTE: Reprocess.
2704
2705 #
2706 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2707 !!!cp ('t149.2');
2708
2709 ## NOTE: As if </head>
2710 pop @{$self->{open_elements}};
2711 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2712 ## NOTE: Reprocess.
2713
2714 #
2715 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2716 !!!cp ('t149.3');
2717
2718 !!!parse-error (type => 'in noscript:#eof', token => $token);
2719
2720 ## As if </noscript>
2721 pop @{$self->{open_elements}};
2722 #$self->{insertion_mode} = IN_HEAD_IM;
2723 ## NOTE: Reprocess.
2724
2725 ## NOTE: As if </head>
2726 pop @{$self->{open_elements}};
2727 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2728 ## NOTE: Reprocess.
2729
2730 #
2731 } else {
2732 !!!cp ('t149.4');
2733 #
2734 }
2735
2736 ## NOTE: As if <body>
2737 !!!insert-element ('body',, $token);
2738 $self->{insertion_mode} = IN_BODY_IM;
2739 ## NOTE: Reprocess.
2740 next B;
2741 } else {
2742 die "$0: $token->{type}: Unknown token type";
2743 }
2744 } elsif ($self->{insertion_mode} & BODY_IMS) {
2745 if ($token->{type} == CHARACTER_TOKEN) {
2746 !!!cp ('t150');
2747 ## NOTE: There is a code clone of "character in body".
2748 $reconstruct_active_formatting_elements->($insert_to_current);
2749
2750 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
2751
2752 !!!next-token;
2753 next B;
2754 } elsif ($token->{type} == START_TAG_TOKEN) {
2755 if ({
2756 caption => 1, col => 1, colgroup => 1, tbody => 1,
2757 td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2758 }->{$token->{tag_name}}) {
2759 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2760 ## have an element in table scope
2761 for (reverse 0..$#{$self->{open_elements}}) {
2762 my $node = $self->{open_elements}->[$_];
2763 if ($node->[1] == TABLE_CELL_EL) {
2764 !!!cp ('t151');
2765
2766 ## Close the cell
2767 !!!back-token; # <x>
2768 $token = {type => END_TAG_TOKEN,
2769 tag_name => $node->[0]->manakai_local_name,
2770 line => $token->{line},
2771 column => $token->{column}};
2772 next B;
2773 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2774 !!!cp ('t152');
2775 ## ISSUE: This case can never be reached, maybe.
2776 last;
2777 }
2778 }
2779
2780 !!!cp ('t153');
2781 !!!parse-error (type => 'start tag not allowed',
2782 text => $token->{tag_name}, token => $token);
2783 ## Ignore the token
2784 !!!nack ('t153.1');
2785 !!!next-token;
2786 next B;
2787 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2788 !!!parse-error (type => 'not closed', text => 'caption',
2789 token => $token);
2790
2791 ## NOTE: As if </caption>.
2792 ## have a table element in table scope
2793 my $i;
2794 INSCOPE: {
2795 for (reverse 0..$#{$self->{open_elements}}) {
2796 my $node = $self->{open_elements}->[$_];
2797 if ($node->[1] == CAPTION_EL) {
2798 !!!cp ('t155');
2799 $i = $_;
2800 last INSCOPE;
2801 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2802 !!!cp ('t156');
2803 last;
2804 }
2805 }
2806
2807 !!!cp ('t157');
2808 !!!parse-error (type => 'start tag not allowed',
2809 text => $token->{tag_name}, token => $token);
2810 ## Ignore the token
2811 !!!nack ('t157.1');
2812 !!!next-token;
2813 next B;
2814 } # INSCOPE
2815
2816 ## generate implied end tags
2817 while ($self->{open_elements}->[-1]->[1]
2818 & END_TAG_OPTIONAL_EL) {
2819 !!!cp ('t158');
2820 pop @{$self->{open_elements}};
2821 }
2822
2823 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2824 !!!cp ('t159');
2825 !!!parse-error (type => 'not closed',
2826 text => $self->{open_elements}->[-1]->[0]
2827 ->manakai_local_name,
2828 token => $token);
2829 } else {
2830 !!!cp ('t160');
2831 }
2832
2833 splice @{$self->{open_elements}}, $i;
2834
2835 $clear_up_to_marker->();
2836
2837 $self->{insertion_mode} = IN_TABLE_IM;
2838
2839 ## reprocess
2840 !!!ack-later;
2841 next B;
2842 } else {
2843 !!!cp ('t161');
2844 #
2845 }
2846 } else {
2847 !!!cp ('t162');
2848 #
2849 }
2850 } elsif ($token->{type} == END_TAG_TOKEN) {
2851 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
2852 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2853 ## have an element in table scope
2854 my $i;
2855 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2856 my $node = $self->{open_elements}->[$_];
2857 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2858 !!!cp ('t163');
2859 $i = $_;
2860 last INSCOPE;
2861 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2862 !!!cp ('t164');
2863 last INSCOPE;
2864 }
2865 } # INSCOPE
2866 unless (defined $i) {
2867 !!!cp ('t165');
2868 !!!parse-error (type => 'unmatched end tag',
2869 text => $token->{tag_name},
2870 token => $token);
2871 ## Ignore the token
2872 !!!next-token;
2873 next B;
2874 }
2875
2876 ## generate implied end tags
2877 while ($self->{open_elements}->[-1]->[1]
2878 & END_TAG_OPTIONAL_EL) {
2879 !!!cp ('t166');
2880 pop @{$self->{open_elements}};
2881 }
2882
2883 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
2884 ne $token->{tag_name}) {
2885 !!!cp ('t167');
2886 !!!parse-error (type => 'not closed',
2887 text => $self->{open_elements}->[-1]->[0]
2888 ->manakai_local_name,
2889 token => $token);
2890 } else {
2891 !!!cp ('t168');
2892 }
2893
2894 splice @{$self->{open_elements}}, $i;
2895
2896 $clear_up_to_marker->();
2897
2898 $self->{insertion_mode} = IN_ROW_IM;
2899
2900 !!!next-token;
2901 next B;
2902 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2903 !!!cp ('t169');
2904 !!!parse-error (type => 'unmatched end tag',
2905 text => $token->{tag_name}, token => $token);
2906 ## Ignore the token
2907 !!!next-token;
2908 next B;
2909 } else {
2910 !!!cp ('t170');
2911 #
2912 }
2913 } elsif ($token->{tag_name} eq 'caption') {
2914 if (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2915 ## have a table element in table scope
2916 my $i;
2917 INSCOPE: {
2918 for (reverse 0..$#{$self->{open_elements}}) {
2919 my $node = $self->{open_elements}->[$_];
2920 if ($node->[1] == CAPTION_EL) {
2921 !!!cp ('t171');
2922 $i = $_;
2923 last INSCOPE;
2924 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2925 !!!cp ('t172');
2926 last;
2927 }
2928 }
2929
2930 !!!cp ('t173');
2931 !!!parse-error (type => 'unmatched end tag',
2932 text => $token->{tag_name}, token => $token);
2933 ## Ignore the token
2934 !!!next-token;
2935 next B;
2936 } # INSCOPE
2937
2938 ## generate implied end tags
2939 while ($self->{open_elements}->[-1]->[1]
2940 & END_TAG_OPTIONAL_EL) {
2941 !!!cp ('t174');
2942 pop @{$self->{open_elements}};
2943 }
2944
2945 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2946 !!!cp ('t175');
2947 !!!parse-error (type => 'not closed',
2948 text => $self->{open_elements}->[-1]->[0]
2949 ->manakai_local_name,
2950 token => $token);
2951 } else {
2952 !!!cp ('t176');
2953 }
2954
2955 splice @{$self->{open_elements}}, $i;
2956
2957 $clear_up_to_marker->();
2958
2959 $self->{insertion_mode} = IN_TABLE_IM;
2960
2961 !!!next-token;
2962 next B;
2963 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2964 !!!cp ('t177');
2965 !!!parse-error (type => 'unmatched end tag',
2966 text => $token->{tag_name}, token => $token);
2967 ## Ignore the token
2968 !!!next-token;
2969 next B;
2970 } else {
2971 !!!cp ('t178');
2972 #
2973 }
2974 } elsif ({
2975 table => 1, tbody => 1, tfoot => 1,
2976 thead => 1, tr => 1,
2977 }->{$token->{tag_name}} and
2978 ($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2979 ## have an element in table scope
2980 my $i;
2981 my $tn;
2982 INSCOPE: {
2983 for (reverse 0..$#{$self->{open_elements}}) {
2984 my $node = $self->{open_elements}->[$_];
2985 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2986 !!!cp ('t179');
2987 $i = $_;
2988
2989 ## Close the cell
2990 !!!back-token; # </x>
2991 $token = {type => END_TAG_TOKEN, tag_name => $tn,
2992 line => $token->{line},
2993 column => $token->{column}};
2994 next B;
2995 } elsif ($node->[1] == TABLE_CELL_EL) {
2996 !!!cp ('t180');
2997 $tn = $node->[0]->manakai_local_name;
2998 ## NOTE: There is exactly one |td| or |th| element
2999 ## in scope in the stack of open elements by definition.
3000 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3001 ## ISSUE: Can this be reached?
3002 !!!cp ('t181');
3003 last;
3004 }
3005 }
3006
3007 !!!cp ('t182');
3008 !!!parse-error (type => 'unmatched end tag',
3009 text => $token->{tag_name}, token => $token);
3010 ## Ignore the token
3011 !!!next-token;
3012 next B;
3013 } # INSCOPE
3014 } elsif ($token->{tag_name} eq 'table' and
3015 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
3016 !!!parse-error (type => 'not closed', text => 'caption',
3017 token => $token);
3018
3019 ## As if </caption>
3020 ## have a table element in table scope
3021 my $i;
3022 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3023 my $node = $self->{open_elements}->[$_];
3024 if ($node->[1] == CAPTION_EL) {
3025 !!!cp ('t184');
3026 $i = $_;
3027 last INSCOPE;
3028 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3029 !!!cp ('t185');
3030 last INSCOPE;
3031 }
3032 } # INSCOPE
3033 unless (defined $i) {
3034 !!!cp ('t186');
3035 ## TODO: Wrong error type?
3036 !!!parse-error (type => 'unmatched end tag',
3037 text => 'caption', token => $token);
3038 ## Ignore the token
3039 !!!next-token;
3040 next B;
3041 }
3042
3043 ## generate implied end tags
3044 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3045 !!!cp ('t187');
3046 pop @{$self->{open_elements}};
3047 }
3048
3049 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
3050 !!!cp ('t188');
3051 !!!parse-error (type => 'not closed',
3052 text => $self->{open_elements}->[-1]->[0]
3053 ->manakai_local_name,
3054 token => $token);
3055 } else {
3056 !!!cp ('t189');
3057 }
3058
3059 splice @{$self->{open_elements}}, $i;
3060
3061 $clear_up_to_marker->();
3062
3063 $self->{insertion_mode} = IN_TABLE_IM;
3064
3065 ## reprocess
3066 next B;
3067 } elsif ({
3068 body => 1, col => 1, colgroup => 1, html => 1,
3069 }->{$token->{tag_name}}) {
3070 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
3071 !!!cp ('t190');
3072 !!!parse-error (type => 'unmatched end tag',
3073 text => $token->{tag_name}, token => $token);
3074 ## Ignore the token
3075 !!!next-token;
3076 next B;
3077 } else {
3078 !!!cp ('t191');
3079 #
3080 }
3081 } elsif ({
3082 tbody => 1, tfoot => 1,
3083 thead => 1, tr => 1,
3084 }->{$token->{tag_name}} and
3085 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
3086 !!!cp ('t192');
3087 !!!parse-error (type => 'unmatched end tag',
3088 text => $token->{tag_name}, token => $token);
3089 ## Ignore the token
3090 !!!next-token;
3091 next B;
3092 } else {
3093 !!!cp ('t193');
3094 #
3095 }
3096 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3097 for my $entry (@{$self->{open_elements}}) {
3098 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
3099 !!!cp ('t75');
3100 !!!parse-error (type => 'in body:#eof', token => $token);
3101 last;
3102 }
3103 }
3104
3105 ## Stop parsing.
3106 last B;
3107 } else {
3108 die "$0: $token->{type}: Unknown token type";
3109 }
3110
3111 $insert = $insert_to_current;
3112 #
3113 } elsif ($self->{insertion_mode} & TABLE_IMS) {
3114 if ($token->{type} == START_TAG_TOKEN) {
3115 if ({
3116 tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM),
3117 th => 1, td => 1,
3118 }->{$token->{tag_name}}) {
3119 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_IM) {
3120 ## Clear back to table context
3121 while (not ($self->{open_elements}->[-1]->[1]
3122 & TABLE_SCOPING_EL)) {
3123 !!!cp ('t201');
3124 pop @{$self->{open_elements}};
3125 }
3126
3127 !!!insert-element ('tbody',, $token);
3128 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3129 ## reprocess in the "in table body" insertion mode...
3130 }
3131
3132 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3133 unless ($token->{tag_name} eq 'tr') {
3134 !!!cp ('t202');
3135 !!!parse-error (type => 'missing start tag:tr', token => $token);
3136 }
3137
3138 ## Clear back to table body context
3139 while (not ($self->{open_elements}->[-1]->[1]
3140 & TABLE_ROWS_SCOPING_EL)) {
3141 !!!cp ('t203');
3142 ## ISSUE: Can this case be reached?
3143 pop @{$self->{open_elements}};
3144 }
3145
3146 $self->{insertion_mode} = IN_ROW_IM;
3147 if ($token->{tag_name} eq 'tr') {
3148 !!!cp ('t204');
3149 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3150 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3151 !!!nack ('t204');
3152 !!!next-token;
3153 next B;
3154 } else {
3155 !!!cp ('t205');
3156 !!!insert-element ('tr',, $token);
3157 ## reprocess in the "in row" insertion mode
3158 }
3159 } else {
3160 !!!cp ('t206');
3161 }
3162
3163 ## Clear back to table row context
3164 while (not ($self->{open_elements}->[-1]->[1]
3165 & TABLE_ROW_SCOPING_EL)) {
3166 !!!cp ('t207');
3167 pop @{$self->{open_elements}};
3168 }
3169
3170 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3171 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3172 $self->{insertion_mode} = IN_CELL_IM;
3173
3174 push @$active_formatting_elements, ['#marker', ''];
3175
3176 !!!nack ('t207.1');
3177 !!!next-token;
3178 next B;
3179 } elsif ({
3180 caption => 1, col => 1, colgroup => 1,
3181 tbody => 1, tfoot => 1, thead => 1,
3182 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3183 }->{$token->{tag_name}}) {
3184 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3185 ## As if </tr>
3186 ## have an element in table scope
3187 my $i;
3188 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3189 my $node = $self->{open_elements}->[$_];
3190 if ($node->[1] == TABLE_ROW_EL) {
3191 !!!cp ('t208');
3192 $i = $_;
3193 last INSCOPE;
3194 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3195 !!!cp ('t209');
3196 last INSCOPE;
3197 }
3198 } # INSCOPE
3199 unless (defined $i) {
3200 !!!cp ('t210');
3201 ## TODO: This type is wrong.
3202 !!!parse-error (type => 'unmacthed end tag',
3203 text => $token->{tag_name}, token => $token);
3204 ## Ignore the token
3205 !!!nack ('t210.1');
3206 !!!next-token;
3207 next B;
3208 }
3209
3210 ## Clear back to table row context
3211 while (not ($self->{open_elements}->[-1]->[1]
3212 & TABLE_ROW_SCOPING_EL)) {
3213 !!!cp ('t211');
3214 ## ISSUE: Can this case be reached?
3215 pop @{$self->{open_elements}};
3216 }
3217
3218 pop @{$self->{open_elements}}; # tr
3219 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3220 if ($token->{tag_name} eq 'tr') {
3221 !!!cp ('t212');
3222 ## reprocess
3223 !!!ack-later;
3224 next B;
3225 } else {
3226 !!!cp ('t213');
3227 ## reprocess in the "in table body" insertion mode...
3228 }
3229 }
3230
3231 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3232 ## have an element in table scope
3233 my $i;
3234 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3235 my $node = $self->{open_elements}->[$_];
3236 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3237 !!!cp ('t214');
3238 $i = $_;
3239 last INSCOPE;
3240 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3241 !!!cp ('t215');
3242 last INSCOPE;
3243 }
3244 } # INSCOPE
3245 unless (defined $i) {
3246 !!!cp ('t216');
3247 ## TODO: This erorr type is wrong.
3248 !!!parse-error (type => 'unmatched end tag',
3249 text => $token->{tag_name}, token => $token);
3250 ## Ignore the token
3251 !!!nack ('t216.1');
3252 !!!next-token;
3253 next B;
3254 }
3255
3256 ## Clear back to table body context
3257 while (not ($self->{open_elements}->[-1]->[1]
3258 & TABLE_ROWS_SCOPING_EL)) {
3259 !!!cp ('t217');
3260 ## ISSUE: Can this state be reached?
3261 pop @{$self->{open_elements}};
3262 }
3263
3264 ## As if <{current node}>
3265 ## have an element in table scope
3266 ## true by definition
3267
3268 ## Clear back to table body context
3269 ## nop by definition
3270
3271 pop @{$self->{open_elements}};
3272 $self->{insertion_mode} = IN_TABLE_IM;
3273 ## reprocess in "in table" insertion mode...
3274 } else {
3275 !!!cp ('t218');
3276 }
3277
3278 if ($token->{tag_name} eq 'col') {
3279 ## Clear back to table context
3280 while (not ($self->{open_elements}->[-1]->[1]
3281 & TABLE_SCOPING_EL)) {
3282 !!!cp ('t219');
3283 ## ISSUE: Can this state be reached?
3284 pop @{$self->{open_elements}};
3285 }
3286
3287 !!!insert-element ('colgroup',, $token);
3288 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
3289 ## reprocess
3290 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3291 !!!ack-later;
3292 next B;
3293 } elsif ({
3294 caption => 1,
3295 colgroup => 1,
3296 tbody => 1, tfoot => 1, thead => 1,
3297 }->{$token->{tag_name}}) {
3298 ## Clear back to table context
3299 while (not ($self->{open_elements}->[-1]->[1]
3300 & TABLE_SCOPING_EL)) {
3301 !!!cp ('t220');
3302 ## ISSUE: Can this state be reached?
3303 pop @{$self->{open_elements}};
3304 }
3305
3306 push @$active_formatting_elements, ['#marker', '']
3307 if $token->{tag_name} eq 'caption';
3308
3309 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3310 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3311 $self->{insertion_mode} = {
3312 caption => IN_CAPTION_IM,
3313 colgroup => IN_COLUMN_GROUP_IM,
3314 tbody => IN_TABLE_BODY_IM,
3315 tfoot => IN_TABLE_BODY_IM,
3316 thead => IN_TABLE_BODY_IM,
3317 }->{$token->{tag_name}};
3318 !!!next-token;
3319 !!!nack ('t220.1');
3320 next B;
3321 } else {
3322 die "$0: in table: <>: $token->{tag_name}";
3323 }
3324 } elsif ($token->{tag_name} eq 'table') {
3325 !!!parse-error (type => 'not closed',
3326 text => $self->{open_elements}->[-1]->[0]
3327 ->manakai_local_name,
3328 token => $token);
3329
3330 ## As if </table>
3331 ## have a table element in table scope
3332 my $i;
3333 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3334 my $node = $self->{open_elements}->[$_];
3335 if ($node->[1] == TABLE_EL) {
3336 !!!cp ('t221');
3337 $i = $_;
3338 last INSCOPE;
3339 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3340 !!!cp ('t222');
3341 last INSCOPE;
3342 }
3343 } # INSCOPE
3344 unless (defined $i) {
3345 !!!cp ('t223');
3346 ## TODO: The following is wrong, maybe.
3347 !!!parse-error (type => 'unmatched end tag', text => 'table',
3348 token => $token);
3349 ## Ignore tokens </table><table>
3350 !!!nack ('t223.1');
3351 !!!next-token;
3352 next B;
3353 }
3354
3355 ## TODO: Followings are removed from the latest spec.
3356 ## generate implied end tags
3357 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3358 !!!cp ('t224');
3359 pop @{$self->{open_elements}};
3360 }
3361
3362 unless ($self->{open_elements}->[-1]->[1] == TABLE_EL) {
3363 !!!cp ('t225');
3364 ## NOTE: |<table><tr><table>|
3365 !!!parse-error (type => 'not closed',
3366 text => $self->{open_elements}->[-1]->[0]
3367 ->manakai_local_name,
3368 token => $token);
3369 } else {
3370 !!!cp ('t226');
3371 }
3372
3373 splice @{$self->{open_elements}}, $i;
3374 pop @{$open_tables};
3375
3376 $self->_reset_insertion_mode;
3377
3378 ## reprocess
3379 !!!ack-later;
3380 next B;
3381 } elsif ($token->{tag_name} eq 'style') {
3382 if (not $open_tables->[-1]->[1]) { # tainted
3383 !!!cp ('t227.8');
3384 ## NOTE: This is a "as if in head" code clone.
3385 $parse_rcdata->(CDATA_CONTENT_MODEL);
3386 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3387 next B;
3388 } else {
3389 !!!cp ('t227.7');
3390 #
3391 }
3392 } elsif ($token->{tag_name} eq 'script') {
3393 if (not $open_tables->[-1]->[1]) { # tainted
3394 !!!cp ('t227.6');
3395 ## NOTE: This is a "as if in head" code clone.
3396 $script_start_tag->();
3397 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3398 next B;
3399 } else {
3400 !!!cp ('t227.5');
3401 #
3402 }
3403 } elsif ($token->{tag_name} eq 'input') {
3404 if (not $open_tables->[-1]->[1]) { # tainted
3405 if ($token->{attributes}->{type}) { ## TODO: case
3406 my $type = lc $token->{attributes}->{type}->{value};
3407 if ($type eq 'hidden') {
3408 !!!cp ('t227.3');
3409 !!!parse-error (type => 'in table',
3410 text => $token->{tag_name}, token => $token);
3411
3412 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3413 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3414
3415 ## TODO: form element pointer
3416
3417 pop @{$self->{open_elements}};
3418
3419 !!!next-token;
3420 !!!ack ('t227.2.1');
3421 next B;
3422 } else {
3423 !!!cp ('t227.2');
3424 #
3425 }
3426 } else {
3427 !!!cp ('t227.1');
3428 #
3429 }
3430 } else {
3431 !!!cp ('t227.4');
3432 #
3433 }
3434 } else {
3435 !!!cp ('t227');
3436 #
3437 }
3438
3439 !!!parse-error (type => 'in table', text => $token->{tag_name},
3440 token => $token);
3441
3442 $insert = $insert_to_foster;
3443 #
3444 } elsif ($token->{type} == END_TAG_TOKEN) {
3445 if ($token->{tag_name} eq 'tr' and
3446 ($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3447 ## have an element in table scope
3448 my $i;
3449 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3450 my $node = $self->{open_elements}->[$_];
3451 if ($node->[1] == TABLE_ROW_EL) {
3452 !!!cp ('t228');
3453 $i = $_;
3454 last INSCOPE;
3455 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3456 !!!cp ('t229');
3457 last INSCOPE;
3458 }
3459 } # INSCOPE
3460 unless (defined $i) {
3461 !!!cp ('t230');
3462 !!!parse-error (type => 'unmatched end tag',
3463 text => $token->{tag_name}, token => $token);
3464 ## Ignore the token
3465 !!!nack ('t230.1');
3466 !!!next-token;
3467 next B;
3468 } else {
3469 !!!cp ('t232');
3470 }
3471
3472 ## Clear back to table row context
3473 while (not ($self->{open_elements}->[-1]->[1]
3474 & TABLE_ROW_SCOPING_EL)) {
3475 !!!cp ('t231');
3476 ## ISSUE: Can this state be reached?
3477 pop @{$self->{open_elements}};
3478 }
3479
3480 pop @{$self->{open_elements}}; # tr
3481 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3482 !!!next-token;
3483 !!!nack ('t231.1');
3484 next B;
3485 } elsif ($token->{tag_name} eq 'table') {
3486 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3487 ## As if </tr>
3488 ## have an element in table scope
3489 my $i;
3490 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3491 my $node = $self->{open_elements}->[$_];
3492 if ($node->[1] == TABLE_ROW_EL) {
3493 !!!cp ('t233');
3494 $i = $_;
3495 last INSCOPE;
3496 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3497 !!!cp ('t234');
3498 last INSCOPE;
3499 }
3500 } # INSCOPE
3501 unless (defined $i) {
3502 !!!cp ('t235');
3503 ## TODO: The following is wrong.
3504 !!!parse-error (type => 'unmatched end tag',
3505 text => $token->{type}, token => $token);
3506 ## Ignore the token
3507 !!!nack ('t236.1');
3508 !!!next-token;
3509 next B;
3510 }
3511
3512 ## Clear back to table row context
3513 while (not ($self->{open_elements}->[-1]->[1]
3514 & TABLE_ROW_SCOPING_EL)) {
3515 !!!cp ('t236');
3516 ## ISSUE: Can this state be reached?
3517 pop @{$self->{open_elements}};
3518 }
3519
3520 pop @{$self->{open_elements}}; # tr
3521 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3522 ## reprocess in the "in table body" insertion mode...
3523 }
3524
3525 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3526 ## have an element in table scope
3527 my $i;
3528 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3529 my $node = $self->{open_elements}->[$_];
3530 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3531 !!!cp ('t237');
3532 $i = $_;
3533 last INSCOPE;
3534 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3535 !!!cp ('t238');
3536 last INSCOPE;
3537 }
3538 } # INSCOPE
3539 unless (defined $i) {
3540 !!!cp ('t239');
3541 !!!parse-error (type => 'unmatched end tag',
3542 text => $token->{tag_name}, token => $token);
3543 ## Ignore the token
3544 !!!nack ('t239.1');
3545 !!!next-token;
3546 next B;
3547 }
3548
3549 ## Clear back to table body context
3550 while (not ($self->{open_elements}->[-1]->[1]
3551 & TABLE_ROWS_SCOPING_EL)) {
3552 !!!cp ('t240');
3553 pop @{$self->{open_elements}};
3554 }
3555
3556 ## As if <{current node}>
3557 ## have an element in table scope
3558 ## true by definition
3559
3560 ## Clear back to table body context
3561 ## nop by definition
3562
3563 pop @{$self->{open_elements}};
3564 $self->{insertion_mode} = IN_TABLE_IM;
3565 ## reprocess in the "in table" insertion mode...
3566 }
3567
3568 ## NOTE: </table> in the "in table" insertion mode.
3569 ## When you edit the code fragment below, please ensure that
3570 ## the code for <table> in the "in table" insertion mode
3571 ## is synced with it.
3572
3573 ## have a table element in table scope
3574 my $i;
3575 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3576 my $node = $self->{open_elements}->[$_];
3577 if ($node->[1] == TABLE_EL) {
3578 !!!cp ('t241');
3579 $i = $_;
3580 last INSCOPE;
3581 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3582 !!!cp ('t242');
3583 last INSCOPE;
3584 }
3585 } # INSCOPE
3586 unless (defined $i) {
3587 !!!cp ('t243');
3588 !!!parse-error (type => 'unmatched end tag',
3589 text => $token->{tag_name}, token => $token);
3590 ## Ignore the token
3591 !!!nack ('t243.1');
3592 !!!next-token;
3593 next B;
3594 }
3595
3596 splice @{$self->{open_elements}}, $i;
3597 pop @{$open_tables};
3598
3599 $self->_reset_insertion_mode;
3600
3601 !!!next-token;
3602 next B;
3603 } elsif ({
3604 tbody => 1, tfoot => 1, thead => 1,
3605 }->{$token->{tag_name}} and
3606 $self->{insertion_mode} & ROW_IMS) {
3607 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3608 ## have an element in table scope
3609 my $i;
3610 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3611 my $node = $self->{open_elements}->[$_];
3612 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3613 !!!cp ('t247');
3614 $i = $_;
3615 last INSCOPE;
3616 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3617 !!!cp ('t248');
3618 last INSCOPE;
3619 }
3620 } # INSCOPE
3621 unless (defined $i) {
3622 !!!cp ('t249');
3623 !!!parse-error (type => 'unmatched end tag',
3624 text => $token->{tag_name}, token => $token);
3625 ## Ignore the token
3626 !!!nack ('t249.1');
3627 !!!next-token;
3628 next B;
3629 }
3630
3631 ## As if </tr>
3632 ## have an element in table scope
3633 my $i;
3634 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3635 my $node = $self->{open_elements}->[$_];
3636 if ($node->[1] == TABLE_ROW_EL) {
3637 !!!cp ('t250');
3638 $i = $_;
3639 last INSCOPE;
3640 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3641 !!!cp ('t251');
3642 last INSCOPE;
3643 }
3644 } # INSCOPE
3645 unless (defined $i) {
3646 !!!cp ('t252');
3647 !!!parse-error (type => 'unmatched end tag',
3648 text => 'tr', token => $token);
3649 ## Ignore the token
3650 !!!nack ('t252.1');
3651 !!!next-token;
3652 next B;
3653 }
3654
3655 ## Clear back to table row context
3656 while (not ($self->{open_elements}->[-1]->[1]
3657 & TABLE_ROW_SCOPING_EL)) {
3658 !!!cp ('t253');
3659 ## ISSUE: Can this case be reached?
3660 pop @{$self->{open_elements}};
3661 }
3662
3663 pop @{$self->{open_elements}}; # tr
3664 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3665 ## reprocess in the "in table body" insertion mode...
3666 }
3667
3668 ## have an element in table scope
3669 my $i;
3670 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3671 my $node = $self->{open_elements}->[$_];
3672 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3673 !!!cp ('t254');
3674 $i = $_;
3675 last INSCOPE;
3676 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3677 !!!cp ('t255');
3678 last INSCOPE;
3679 }
3680 } # INSCOPE
3681 unless (defined $i) {
3682 !!!cp ('t256');
3683 !!!parse-error (type => 'unmatched end tag',
3684 text => $token->{tag_name}, token => $token);
3685 ## Ignore the token
3686 !!!nack ('t256.1');
3687 !!!next-token;
3688 next B;
3689 }
3690
3691 ## Clear back to table body context
3692 while (not ($self->{open_elements}->[-1]->[1]
3693 & TABLE_ROWS_SCOPING_EL)) {
3694 !!!cp ('t257');
3695 ## ISSUE: Can this case be reached?
3696 pop @{$self->{open_elements}};
3697 }
3698
3699 pop @{$self->{open_elements}};
3700 $self->{insertion_mode} = IN_TABLE_IM;
3701 !!!nack ('t257.1');
3702 !!!next-token;
3703 next B;
3704 } elsif ({
3705 body => 1, caption => 1, col => 1, colgroup => 1,
3706 html => 1, td => 1, th => 1,
3707 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3708 tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
3709 }->{$token->{tag_name}}) {
3710 !!!cp ('t258');
3711 !!!parse-error (type => 'unmatched end tag',
3712 text => $token->{tag_name}, token => $token);
3713 ## Ignore the token
3714 !!!nack ('t258.1');
3715 !!!next-token;
3716 next B;
3717 } else {
3718 !!!cp ('t259');
3719 !!!parse-error (type => 'in table:/',
3720 text => $token->{tag_name}, token => $token);
3721
3722 $insert = $insert_to_foster;
3723 #
3724 }
3725 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3726 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
3727 @{$self->{open_elements}} == 1) { # redundant, maybe
3728 !!!parse-error (type => 'in body:#eof', token => $token);
3729 !!!cp ('t259.1');
3730 #
3731 } else {
3732 !!!cp ('t259.2');
3733 #
3734 }
3735
3736 ## Stop parsing
3737 last B;
3738 } else {
3739 die "$0: $token->{type}: Unknown token type";
3740 }
3741 } elsif (($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
3742 if ($token->{type} == CHARACTER_TOKEN) {
3743 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3744 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3745 unless (length $token->{data}) {
3746 !!!cp ('t260');
3747 !!!next-token;
3748 next B;
3749 }
3750 }
3751
3752 !!!cp ('t261');
3753 #
3754 } elsif ($token->{type} == START_TAG_TOKEN) {
3755 if ($token->{tag_name} eq 'col') {
3756 !!!cp ('t262');
3757 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3758 pop @{$self->{open_elements}};
3759 !!!ack ('t262.1');
3760 !!!next-token;
3761 next B;
3762 } else {
3763 !!!cp ('t263');
3764 #
3765 }
3766 } elsif ($token->{type} == END_TAG_TOKEN) {
3767 if ($token->{tag_name} eq 'colgroup') {
3768 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3769 !!!cp ('t264');
3770 !!!parse-error (type => 'unmatched end tag',
3771 text => 'colgroup', token => $token);
3772 ## Ignore the token
3773 !!!next-token;
3774 next B;
3775 } else {
3776 !!!cp ('t265');
3777 pop @{$self->{open_elements}}; # colgroup
3778 $self->{insertion_mode} = IN_TABLE_IM;
3779 !!!next-token;
3780 next B;
3781 }
3782 } elsif ($token->{tag_name} eq 'col') {
3783 !!!cp ('t266');
3784 !!!parse-error (type => 'unmatched end tag',
3785 text => 'col', token => $token);
3786 ## Ignore the token
3787 !!!next-token;
3788 next B;
3789 } else {
3790 !!!cp ('t267');
3791 #
3792 }
3793 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3794 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
3795 @{$self->{open_elements}} == 1) { # redundant, maybe
3796 !!!cp ('t270.2');
3797 ## Stop parsing.
3798 last B;
3799 } else {
3800 ## NOTE: As if </colgroup>.
3801 !!!cp ('t270.1');
3802 pop @{$self->{open_elements}}; # colgroup
3803 $self->{insertion_mode} = IN_TABLE_IM;
3804 ## Reprocess.
3805 next B;
3806 }
3807 } else {
3808 die "$0: $token->{type}: Unknown token type";
3809 }
3810
3811 ## As if </colgroup>
3812 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3813 !!!cp ('t269');
3814 ## TODO: Wrong error type?
3815 !!!parse-error (type => 'unmatched end tag',
3816 text => 'colgroup', token => $token);
3817 ## Ignore the token
3818 !!!nack ('t269.1');
3819 !!!next-token;
3820 next B;
3821 } else {
3822 !!!cp ('t270');
3823 pop @{$self->{open_elements}}; # colgroup
3824 $self->{insertion_mode} = IN_TABLE_IM;
3825 !!!ack-later;
3826 ## reprocess
3827 next B;
3828 }
3829 } elsif ($self->{insertion_mode} & SELECT_IMS) {
3830 if ($token->{type} == CHARACTER_TOKEN) {
3831 !!!cp ('t271');
3832 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3833 !!!next-token;
3834 next B;
3835 } elsif ($token->{type} == START_TAG_TOKEN) {
3836 if ($token->{tag_name} eq 'option') {
3837 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3838 !!!cp ('t272');
3839 ## As if </option>
3840 pop @{$self->{open_elements}};
3841 } else {
3842 !!!cp ('t273');
3843 }
3844
3845 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3846 !!!nack ('t273.1');
3847 !!!next-token;
3848 next B;
3849 } elsif ($token->{tag_name} eq 'optgroup') {
3850 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3851 !!!cp ('t274');
3852 ## As if </option>
3853 pop @{$self->{open_elements}};
3854 } else {
3855 !!!cp ('t275');
3856 }
3857
3858 if ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3859 !!!cp ('t276');
3860 ## As if </optgroup>
3861 pop @{$self->{open_elements}};
3862 } else {
3863 !!!cp ('t277');
3864 }
3865
3866 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3867 !!!nack ('t277.1');
3868 !!!next-token;
3869 next B;
3870 } elsif ({
3871 select => 1, input => 1, textarea => 1, keygen => 1,
3872 }->{$token->{tag_name}} or
3873 (($self->{insertion_mode} & IM_MASK)
3874 == IN_SELECT_IN_TABLE_IM and
3875 {
3876 caption => 1, table => 1,
3877 tbody => 1, tfoot => 1, thead => 1,
3878 tr => 1, td => 1, th => 1,
3879 }->{$token->{tag_name}})) {
3880
3881 ## 1. Parse error.
3882 if ($token->{tag_name} eq 'select') {
3883 !!!parse-error (type => 'select in select', ## XXX: documentation
3884 token => $token);
3885 } else {
3886 !!!parse-error (type => 'not closed', text => 'select',
3887 token => $token);
3888 }
3889
3890 ## 2./<select>-1. Unless "have an element in table scope" (select):
3891 my $i;
3892 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3893 my $node = $self->{open_elements}->[$_];
3894 if ($node->[1] == SELECT_EL) {
3895 !!!cp ('t278');
3896 $i = $_;
3897 last INSCOPE;
3898 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3899 !!!cp ('t279');
3900 last INSCOPE;
3901 }
3902 } # INSCOPE
3903 unless (defined $i) {
3904 !!!cp ('t280');
3905 if ($token->{tag_name} eq 'select') {
3906 ## NOTE: This error would be raised when
3907 ## |select.innerHTML = '<select>'| is executed; in this
3908 ## case two errors, "select in select" and "unmatched
3909 ## end tags" are reported to the user, the latter might
3910 ## be confusing but this is what the spec requires.
3911 !!!parse-error (type => 'unmatched end tag',
3912 text => 'select',
3913 token => $token);
3914 }
3915 ## Ignore the token.
3916 !!!nack ('t280.1');
3917 !!!next-token;
3918 next B;
3919 }
3920
3921 ## 3. Otherwise, as if there were <select>:
3922
3923 !!!cp ('t281');
3924 splice @{$self->{open_elements}}, $i;
3925
3926 $self->_reset_insertion_mode;
3927
3928 if ($token->{tag_name} eq 'select') {
3929 !!!nack ('t281.2');
3930 !!!next-token;
3931 next B;
3932 } else {
3933 !!!cp ('t281.1');
3934 !!!ack-later;
3935 ## Reprocess the token.
3936 next B;
3937 }
3938 } elsif ($token->{tag_name} eq 'script') {
3939 !!!cp ('t281.3');
3940 ## NOTE: This is an "as if in head" code clone
3941 $script_start_tag->();
3942 next B;
3943 } else {
3944 !!!cp ('t282');
3945 !!!parse-error (type => 'in select',
3946 text => $token->{tag_name}, token => $token);
3947 ## Ignore the token
3948 !!!nack ('t282.1');
3949 !!!next-token;
3950 next B;
3951 }
3952 } elsif ($token->{type} == END_TAG_TOKEN) {
3953 if ($token->{tag_name} eq 'optgroup') {
3954 if ($self->{open_elements}->[-1]->[1] == OPTION_EL and
3955 $self->{open_elements}->[-2]->[1] == OPTGROUP_EL) {
3956 !!!cp ('t283');
3957 ## As if </option>
3958 splice @{$self->{open_elements}}, -2;
3959 } elsif ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3960 !!!cp ('t284');
3961 pop @{$self->{open_elements}};
3962 } else {
3963 !!!cp ('t285');
3964 !!!parse-error (type => 'unmatched end tag',
3965 text => $token->{tag_name}, token => $token);
3966 ## Ignore the token
3967 }
3968 !!!nack ('t285.1');
3969 !!!next-token;
3970 next B;
3971 } elsif ($token->{tag_name} eq 'option') {
3972 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3973 !!!cp ('t286');
3974 pop @{$self->{open_elements}};
3975 } else {
3976 !!!cp ('t287');
3977 !!!parse-error (type => 'unmatched end tag',
3978 text => $token->{tag_name}, token => $token);
3979 ## Ignore the token
3980 }
3981 !!!nack ('t287.1');
3982 !!!next-token;
3983 next B;
3984 } elsif ($token->{tag_name} eq 'select') {
3985 ## have an element in table scope
3986 my $i;
3987 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3988 my $node = $self->{open_elements}->[$_];
3989 if ($node->[1] == SELECT_EL) {
3990 !!!cp ('t288');
3991 $i = $_;
3992 last INSCOPE;
3993 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3994 !!!cp ('t289');
3995 last INSCOPE;
3996 }
3997 } # INSCOPE
3998 unless (defined $i) {
3999 !!!cp ('t290');
4000 !!!parse-error (type => 'unmatched end tag',
4001 text => $token->{tag_name}, token => $token);
4002 ## Ignore the token
4003 !!!nack ('t290.1');
4004 !!!next-token;
4005 next B;
4006 }
4007
4008 !!!cp ('t291');
4009 splice @{$self->{open_elements}}, $i;
4010
4011 $self->_reset_insertion_mode;
4012
4013 !!!nack ('t291.1');
4014 !!!next-token;
4015 next B;
4016 } elsif (($self->{insertion_mode} & IM_MASK)
4017 == IN_SELECT_IN_TABLE_IM and
4018 {
4019 caption => 1, table => 1, tbody => 1,
4020 tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4021 }->{$token->{tag_name}}) {
4022 ## TODO: The following is wrong?
4023 !!!parse-error (type => 'unmatched end tag',
4024 text => $token->{tag_name}, token => $token);
4025
4026 ## have an element in table scope
4027 my $i;
4028 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4029 my $node = $self->{open_elements}->[$_];
4030 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4031 !!!cp ('t292');
4032 $i = $_;
4033 last INSCOPE;
4034 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4035 !!!cp ('t293');
4036 last INSCOPE;
4037 }
4038 } # INSCOPE
4039 unless (defined $i) {
4040 !!!cp ('t294');
4041 ## Ignore the token
4042 !!!nack ('t294.1');
4043 !!!next-token;
4044 next B;
4045 }
4046
4047 ## As if </select>
4048 ## have an element in table scope
4049 undef $i;
4050 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4051 my $node = $self->{open_elements}->[$_];
4052 if ($node->[1] == SELECT_EL) {
4053 !!!cp ('t295');
4054 $i = $_;
4055 last INSCOPE;
4056 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4057 ## ISSUE: Can this state be reached?
4058 !!!cp ('t296');
4059 last INSCOPE;
4060 }
4061 } # INSCOPE
4062 unless (defined $i) {
4063 !!!cp ('t297');
4064 ## TODO: The following error type is correct?
4065 !!!parse-error (type => 'unmatched end tag',
4066 text => 'select', token => $token);
4067 ## Ignore the </select> token
4068 !!!nack ('t297.1');
4069 !!!next-token; ## TODO: ok?
4070 next B;
4071 }
4072
4073 !!!cp ('t298');
4074 splice @{$self->{open_elements}}, $i;
4075
4076 $self->_reset_insertion_mode;
4077
4078 !!!ack-later;
4079 ## reprocess
4080 next B;
4081 } else {
4082 !!!cp ('t299');
4083 !!!parse-error (type => 'in select:/',
4084 text => $token->{tag_name}, token => $token);
4085 ## Ignore the token
4086 !!!nack ('t299.3');
4087 !!!next-token;
4088 next B;
4089 }
4090 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4091 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4092 @{$self->{open_elements}} == 1) { # redundant, maybe
4093 !!!cp ('t299.1');
4094 !!!parse-error (type => 'in body:#eof', token => $token);
4095 } else {
4096 !!!cp ('t299.2');
4097 }
4098
4099 ## Stop parsing.
4100 last B;
4101 } else {
4102 die "$0: $token->{type}: Unknown token type";
4103 }
4104 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
4105 if ($token->{type} == CHARACTER_TOKEN) {
4106 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4107 my $data = $1;
4108 ## As if in body
4109 $reconstruct_active_formatting_elements->($insert_to_current);
4110
4111 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4112
4113 unless (length $token->{data}) {
4114 !!!cp ('t300');
4115 !!!next-token;
4116 next B;
4117 }
4118 }
4119
4120 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4121 !!!cp ('t301');
4122 !!!parse-error (type => 'after html:#text', token => $token);
4123 #
4124 } else {
4125 !!!cp ('t302');
4126 ## "after body" insertion mode
4127 !!!parse-error (type => 'after body:#text', token => $token);
4128 #
4129 }
4130
4131 $self->{insertion_mode} = IN_BODY_IM;
4132 ## reprocess
4133 next B;
4134 } elsif ($token->{type} == START_TAG_TOKEN) {
4135 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4136 !!!cp ('t303');
4137 !!!parse-error (type => 'after html',
4138 text => $token->{tag_name}, token => $token);
4139 #
4140 } else {
4141 !!!cp ('t304');
4142 ## "after body" insertion mode
4143 !!!parse-error (type => 'after body',
4144 text => $token->{tag_name}, token => $token);
4145 #
4146 }
4147
4148 $self->{insertion_mode} = IN_BODY_IM;
4149 !!!ack-later;
4150 ## reprocess
4151 next B;
4152 } elsif ($token->{type} == END_TAG_TOKEN) {
4153 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4154 !!!cp ('t305');
4155 !!!parse-error (type => 'after html:/',
4156 text => $token->{tag_name}, token => $token);
4157
4158 $self->{insertion_mode} = IN_BODY_IM;
4159 ## Reprocess.
4160 next B;
4161 } else {
4162 !!!cp ('t306');
4163 }
4164
4165 ## "after body" insertion mode
4166 if ($token->{tag_name} eq 'html') {
4167 if (defined $self->{inner_html_node}) {
4168 !!!cp ('t307');
4169 !!!parse-error (type => 'unmatched end tag',
4170 text => 'html', token => $token);
4171 ## Ignore the token
4172 !!!next-token;
4173 next B;
4174 } else {
4175 !!!cp ('t308');
4176 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
4177 !!!next-token;
4178 next B;
4179 }
4180 } else {
4181 !!!cp ('t309');
4182 !!!parse-error (type => 'after body:/',
4183 text => $token->{tag_name}, token => $token);
4184
4185 $self->{insertion_mode} = IN_BODY_IM;
4186 ## reprocess
4187 next B;
4188 }
4189 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4190 !!!cp ('t309.2');
4191 ## Stop parsing
4192 last B;
4193 } else {
4194 die "$0: $token->{type}: Unknown token type";
4195 }
4196 } elsif ($self->{insertion_mode} & FRAME_IMS) {
4197 if ($token->{type} == CHARACTER_TOKEN) {
4198 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4199 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4200
4201 unless (length $token->{data}) {
4202 !!!cp ('t310');
4203 !!!next-token;
4204 next B;
4205 }
4206 }
4207
4208 if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
4209 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4210 !!!cp ('t311');
4211 !!!parse-error (type => 'in frameset:#text', token => $token);
4212 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4213 !!!cp ('t312');
4214 !!!parse-error (type => 'after frameset:#text', token => $token);
4215 } else { # "after after frameset"
4216 !!!cp ('t313');
4217 !!!parse-error (type => 'after html:#text', token => $token);
4218 }
4219
4220 ## Ignore the token.
4221 if (length $token->{data}) {
4222 !!!cp ('t314');
4223 ## reprocess the rest of characters
4224 } else {
4225 !!!cp ('t315');
4226 !!!next-token;
4227 }
4228 next B;
4229 }
4230
4231 die qq[$0: Character "$token->{data}"];
4232 } elsif ($token->{type} == START_TAG_TOKEN) {
4233 if ($token->{tag_name} eq 'frameset' and
4234 $self->{insertion_mode} == IN_FRAMESET_IM) {
4235 !!!cp ('t318');
4236 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4237 !!!nack ('t318.1');
4238 !!!next-token;
4239 next B;
4240 } elsif ($token->{tag_name} eq 'frame' and
4241 $self->{insertion_mode} == IN_FRAMESET_IM) {
4242 !!!cp ('t319');
4243 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4244 pop @{$self->{open_elements}};
4245 !!!ack ('t319.1');
4246 !!!next-token;
4247 next B;
4248 } elsif ($token->{tag_name} eq 'noframes') {
4249 !!!cp ('t320');
4250 ## NOTE: As if in head.
4251 $parse_rcdata->(CDATA_CONTENT_MODEL);
4252 next B;
4253
4254 ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
4255 ## has no parse error.
4256 } else {
4257 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4258 !!!cp ('t321');
4259 !!!parse-error (type => 'in frameset',
4260 text => $token->{tag_name}, token => $token);
4261 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4262 !!!cp ('t322');
4263 !!!parse-error (type => 'after frameset',
4264 text => $token->{tag_name}, token => $token);
4265 } else { # "after after frameset"
4266 !!!cp ('t322.2');
4267 !!!parse-error (type => 'after after frameset',
4268 text => $token->{tag_name}, token => $token);
4269 }
4270 ## Ignore the token
4271 !!!nack ('t322.1');
4272 !!!next-token;
4273 next B;
4274 }
4275 } elsif ($token->{type} == END_TAG_TOKEN) {
4276 if ($token->{tag_name} eq 'frameset' and
4277 $self->{insertion_mode} == IN_FRAMESET_IM) {
4278 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
4279 @{$self->{open_elements}} == 1) {
4280 !!!cp ('t325');
4281 !!!parse-error (type => 'unmatched end tag',
4282 text => $token->{tag_name}, token => $token);
4283 ## Ignore the token
4284 !!!next-token;
4285 } else {
4286 !!!cp ('t326');
4287 pop @{$self->{open_elements}};
4288 !!!next-token;
4289 }
4290
4291 if (not defined $self->{inner_html_node} and
4292 not ($self->{open_elements}->[-1]->[1] == FRAMESET_EL)) {
4293 !!!cp ('t327');
4294 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4295 } else {
4296 !!!cp ('t328');
4297 }
4298 next B;
4299 } elsif ($token->{tag_name} eq 'html' and
4300 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
4301 !!!cp ('t329');
4302 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
4303 !!!next-token;
4304 next B;
4305 } else {
4306 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4307 !!!cp ('t330');
4308 !!!parse-error (type => 'in frameset:/',
4309 text => $token->{tag_name}, token => $token);
4310 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4311 !!!cp ('t330.1');
4312 !!!parse-error (type => 'after frameset:/',
4313 text => $token->{tag_name}, token => $token);
4314 } else { # "after after html"
4315 !!!cp ('t331');
4316 !!!parse-error (type => 'after after frameset:/',
4317 text => $token->{tag_name}, token => $token);
4318 }
4319 ## Ignore the token
4320 !!!next-token;
4321 next B;
4322 }
4323 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4324 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4325 @{$self->{open_elements}} == 1) { # redundant, maybe
4326 !!!cp ('t331.1');
4327 !!!parse-error (type => 'in body:#eof', token => $token);
4328 } else {
4329 !!!cp ('t331.2');
4330 }
4331
4332 ## Stop parsing
4333 last B;
4334 } else {
4335 die "$0: $token->{type}: Unknown token type";
4336 }
4337 } else {
4338 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4339 }
4340
4341 ## "in body" insertion mode
4342 if ($token->{type} == START_TAG_TOKEN) {
4343 if ($token->{tag_name} eq 'script') {
4344 !!!cp ('t332');
4345 ## NOTE: This is an "as if in head" code clone
4346 $script_start_tag->();
4347 next B;
4348 } elsif ($token->{tag_name} eq 'style') {
4349 !!!cp ('t333');
4350 ## NOTE: This is an "as if in head" code clone
4351 $parse_rcdata->(CDATA_CONTENT_MODEL);
4352 next B;
4353 } elsif ({
4354 base => 1, command => 1, link => 1,
4355 }->{$token->{tag_name}}) {
4356 !!!cp ('t334');
4357 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4358 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4359 pop @{$self->{open_elements}};
4360 !!!ack ('t334.1');
4361 !!!next-token;
4362 next B;
4363 } elsif ($token->{tag_name} eq 'meta') {
4364 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4365 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4366 my $meta_el = pop @{$self->{open_elements}};
4367
4368 unless ($self->{confident}) {
4369 if ($token->{attributes}->{charset}) {
4370 !!!cp ('t335');
4371 ## NOTE: Whether the encoding is supported or not is handled
4372 ## in the {change_encoding} callback.
4373 $self->{change_encoding}
4374 ->($self, $token->{attributes}->{charset}->{value}, $token);
4375
4376 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4377 ->set_user_data (manakai_has_reference =>
4378 $token->{attributes}->{charset}
4379 ->{has_reference});
4380 } elsif ($token->{attributes}->{content}) {
4381 if ($token->{attributes}->{content}->{value}
4382 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4383 [\x09\x0A\x0C\x0D\x20]*=
4384 [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4385 ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
4386 /x) {
4387 !!!cp ('t336');
4388 ## NOTE: Whether the encoding is supported or not is handled
4389 ## in the {change_encoding} callback.
4390 $self->{change_encoding}
4391 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
4392 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4393 ->set_user_data (manakai_has_reference =>
4394 $token->{attributes}->{content}
4395 ->{has_reference});
4396 }
4397 }
4398 } else {
4399 if ($token->{attributes}->{charset}) {
4400 !!!cp ('t337');
4401 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4402 ->set_user_data (manakai_has_reference =>
4403 $token->{attributes}->{charset}
4404 ->{has_reference});
4405 }
4406 if ($token->{attributes}->{content}) {
4407 !!!cp ('t338');
4408 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4409 ->set_user_data (manakai_has_reference =>
4410 $token->{attributes}->{content}
4411 ->{has_reference});
4412 }
4413 }
4414
4415 !!!ack ('t338.1');
4416 !!!next-token;
4417 next B;
4418 } elsif ($token->{tag_name} eq 'title') {
4419 !!!cp ('t341');
4420 ## NOTE: This is an "as if in head" code clone
4421 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4422 next B;
4423 } elsif ($token->{tag_name} eq 'body') {
4424 !!!parse-error (type => 'in body', text => 'body', token => $token);
4425
4426 if (@{$self->{open_elements}} == 1 or
4427 not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
4428 !!!cp ('t342');
4429 ## Ignore the token
4430 } else {
4431 my $body_el = $self->{open_elements}->[1]->[0];
4432 for my $attr_name (keys %{$token->{attributes}}) {
4433 unless ($body_el->has_attribute_ns (undef, $attr_name)) {
4434 !!!cp ('t343');
4435 $body_el->set_attribute_ns
4436 (undef, [undef, $attr_name],
4437 $token->{attributes}->{$attr_name}->{value});
4438 }
4439 }
4440 }
4441 !!!nack ('t343.1');
4442 !!!next-token;
4443 next B;
4444 } elsif ({
4445 ## NOTE: Start tags for non-phrasing flow content elements
4446
4447 ## NOTE: The normal one
4448 address => 1, article => 1, aside => 1, blockquote => 1,
4449 center => 1, datagrid => 1, details => 1, dialog => 1,
4450 dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
4451 footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1,
4452 h6 => 1, header => 1, menu => 1, nav => 1, ol => 1, p => 1,
4453 section => 1, ul => 1,
4454 ## NOTE: As normal, but drops leading newline
4455 pre => 1, listing => 1,
4456 ## NOTE: As normal, but interacts with the form element pointer
4457 form => 1,
4458
4459 table => 1,
4460 hr => 1,
4461 }->{$token->{tag_name}}) {
4462
4463 ## 1. When there is an opening |form| element:
4464 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
4465 !!!cp ('t350');
4466 !!!parse-error (type => 'in form:form', token => $token);
4467 ## Ignore the token
4468 !!!nack ('t350.1');
4469 !!!next-token;
4470 next B;
4471 }
4472
4473 ## 2. Close the |p| element, if any.
4474 if ($token->{tag_name} ne 'table' or # The Hixie Quirk
4475 $self->{document}->manakai_compat_mode ne 'quirks') {
4476 ## has a p element in scope
4477 INSCOPE: for (reverse @{$self->{open_elements}}) {
4478 if ($_->[1] == P_EL) {
4479 !!!cp ('t344');
4480 !!!back-token; # <form>
4481 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4482 line => $token->{line}, column => $token->{column}};
4483 next B;
4484 } elsif ($_->[1] & SCOPING_EL) {
4485 !!!cp ('t345');
4486 last INSCOPE;
4487 }
4488 } # INSCOPE
4489 }
4490
4491 ## 3. Close the opening <hn> element, if any.
4492 if ({h1 => 1, h2 => 1, h3 => 1,
4493 h4 => 1, h5 => 1, h6 => 1}->{$token->{tag_name}}) {
4494 if ($self->{open_elements}->[-1]->[1] == HEADING_EL) {
4495 !!!parse-error (type => 'not closed',
4496 text => $self->{open_elements}->[-1]->[0]->manakai_local_name,
4497 token => $token);
4498 pop @{$self->{open_elements}};
4499 }
4500 }
4501
4502 ## 4. Insertion.
4503 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4504 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
4505 !!!nack ('t346.1');
4506 !!!next-token;
4507 if ($token->{type} == CHARACTER_TOKEN) {
4508 $token->{data} =~ s/^\x0A//;
4509 unless (length $token->{data}) {
4510 !!!cp ('t346');
4511 !!!next-token;
4512 } else {
4513 !!!cp ('t349');
4514 }
4515 } else {
4516 !!!cp ('t348');
4517 }
4518 } elsif ($token->{tag_name} eq 'form') {
4519 !!!cp ('t347.1');
4520 $self->{form_element} = $self->{open_elements}->[-1]->[0];
4521
4522 !!!nack ('t347.2');
4523 !!!next-token;
4524 } elsif ($token->{tag_name} eq 'table') {
4525 !!!cp ('t382');
4526 push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
4527
4528 $self->{insertion_mode} = IN_TABLE_IM;
4529
4530 !!!nack ('t382.1');
4531 !!!next-token;
4532 } elsif ($token->{tag_name} eq 'hr') {
4533 !!!cp ('t386');
4534 pop @{$self->{open_elements}};
4535
4536 !!!nack ('t386.1');
4537 !!!next-token;
4538 } else {
4539 !!!nack ('t347.1');
4540 !!!next-token;
4541 }
4542 next B;
4543 } elsif ($token->{tag_name} eq 'li') {
4544 ## NOTE: As normal, but imply </li> when there's another <li> ...
4545
4546 ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)::
4547 ## Interpreted as <li><foo/></li><li/> (non-conforming):
4548 ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
4549 ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
4550 ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
4551 ## object (Fx)
4552 ## Generate non-tree (non-conforming):
4553 ## basefont (IE7 (where basefont is non-void)), center (IE),
4554 ## form (IE), hn (IE)
4555 ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)::
4556 ## Interpreted as <li><foo><li/></foo></li> (non-conforming):
4557 ## div (Fx, S)
4558
4559 my $non_optional;
4560 my $i = -1;
4561
4562 ## 1.
4563 for my $node (reverse @{$self->{open_elements}}) {
4564 if ($node->[1] == LI_EL) {
4565 ## 2. (a) As if </li>
4566 {
4567 ## If no </li> - not applied
4568 #
4569
4570 ## Otherwise
4571
4572 ## 1. generate implied end tags, except for </li>
4573 #
4574
4575 ## 2. If current node != "li", parse error
4576 if ($non_optional) {
4577 !!!parse-error (type => 'not closed',
4578 text => $non_optional->[0]->manakai_local_name,
4579 token => $token);
4580 !!!cp ('t355');
4581 } else {
4582 !!!cp ('t356');
4583 }
4584
4585 ## 3. Pop
4586 splice @{$self->{open_elements}}, $i;
4587 }
4588
4589 last; ## 2. (b) goto 5.
4590 } elsif (
4591 ## NOTE: not "formatting" and not "phrasing"
4592 ($node->[1] & SPECIAL_EL or
4593 $node->[1] & SCOPING_EL) and
4594 ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4595 (not $node->[1] & ADDRESS_DIV_P_EL)
4596 ) {
4597 ## 3.
4598 !!!cp ('t357');
4599 last; ## goto 5.
4600 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4601 !!!cp ('t358');
4602 #
4603 } else {
4604 !!!cp ('t359');
4605 $non_optional ||= $node;
4606 #
4607 }
4608 ## 4.
4609 ## goto 2.
4610 $i--;
4611 }
4612
4613 ## 5. (a) has a |p| element in scope
4614 INSCOPE: for (reverse @{$self->{open_elements}}) {
4615 if ($_->[1] == P_EL) {
4616 !!!cp ('t353');
4617
4618 ## NOTE: |<p><li>|, for example.
4619
4620 !!!back-token; # <x>
4621 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4622 line => $token->{line}, column => $token->{column}};
4623 next B;
4624 } elsif ($_->[1] & SCOPING_EL) {
4625 !!!cp ('t354');
4626 last INSCOPE;
4627 }
4628 } # INSCOPE
4629
4630 ## 5. (b) insert
4631 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4632 !!!nack ('t359.1');
4633 !!!next-token;
4634 next B;
4635 } elsif ($token->{tag_name} eq 'dt' or
4636 $token->{tag_name} eq 'dd') {
4637 ## NOTE: As normal, but imply </dt> or </dd> when ...
4638
4639 my $non_optional;
4640 my $i = -1;
4641
4642 ## 1.
4643 for my $node (reverse @{$self->{open_elements}}) {
4644 if ($node->[1] == DTDD_EL) {
4645 ## 2. (a) As if </li>
4646 {
4647 ## If no </li> - not applied
4648 #
4649
4650 ## Otherwise
4651
4652 ## 1. generate implied end tags, except for </dt> or </dd>
4653 #
4654
4655 ## 2. If current node != "dt"|"dd", parse error
4656 if ($non_optional) {
4657 !!!parse-error (type => 'not closed',
4658 text => $non_optional->[0]->manakai_local_name,
4659 token => $token);
4660 !!!cp ('t355.1');
4661 } else {
4662 !!!cp ('t356.1');
4663 }
4664
4665 ## 3. Pop
4666 splice @{$self->{open_elements}}, $i;
4667 }
4668
4669 last; ## 2. (b) goto 5.
4670 } elsif (
4671 ## NOTE: not "formatting" and not "phrasing"
4672 ($node->[1] & SPECIAL_EL or
4673 $node->[1] & SCOPING_EL) and
4674 ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4675
4676 (not $node->[1] & ADDRESS_DIV_P_EL)
4677 ) {
4678 ## 3.
4679 !!!cp ('t357.1');
4680 last; ## goto 5.
4681 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4682 !!!cp ('t358.1');
4683 #
4684 } else {
4685 !!!cp ('t359.1');
4686 $non_optional ||= $node;
4687 #
4688 }
4689 ## 4.
4690 ## goto 2.
4691 $i--;
4692 }
4693
4694 ## 5. (a) has a |p| element in scope
4695 INSCOPE: for (reverse @{$self->{open_elements}}) {
4696 if ($_->[1] == P_EL) {
4697 !!!cp ('t353.1');
4698 !!!back-token; # <x>
4699 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4700 line => $token->{line}, column => $token->{column}};
4701 next B;
4702 } elsif ($_->[1] & SCOPING_EL) {
4703 !!!cp ('t354.1');
4704 last INSCOPE;
4705 }
4706 } # INSCOPE
4707
4708 ## 5. (b) insert
4709 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4710 !!!nack ('t359.2');
4711 !!!next-token;
4712 next B;
4713 } elsif ($token->{tag_name} eq 'plaintext') {
4714 ## NOTE: As normal, but effectively ends parsing
4715
4716 ## has a p element in scope
4717 INSCOPE: for (reverse @{$self->{open_elements}}) {
4718 if ($_->[1] == P_EL) {
4719 !!!cp ('t367');
4720 !!!back-token; # <plaintext>
4721 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4722 line => $token->{line}, column => $token->{column}};
4723 next B;
4724 } elsif ($_->[1] & SCOPING_EL) {
4725 !!!cp ('t368');
4726 last INSCOPE;
4727 }
4728 } # INSCOPE
4729
4730 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4731
4732 $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
4733
4734 !!!nack ('t368.1');
4735 !!!next-token;
4736 next B;
4737 } elsif ($token->{tag_name} eq 'a') {
4738 AFE: for my $i (reverse 0..$#$active_formatting_elements) {
4739 my $node = $active_formatting_elements->[$i];
4740 if ($node->[1] == A_EL) {
4741 !!!cp ('t371');
4742 !!!parse-error (type => 'in a:a', token => $token);
4743
4744 !!!back-token; # <a>
4745 $token = {type => END_TAG_TOKEN, tag_name => 'a',
4746 line => $token->{line}, column => $token->{column}};
4747 $formatting_end_tag->($token);
4748
4749 AFE2: for (reverse 0..$#$active_formatting_elements) {
4750 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4751 !!!cp ('t372');
4752 splice @$active_formatting_elements, $_, 1;
4753 last AFE2;
4754 }
4755 } # AFE2
4756 OE: for (reverse 0..$#{$self->{open_elements}}) {
4757 if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
4758 !!!cp ('t373');
4759 splice @{$self->{open_elements}}, $_, 1;
4760 last OE;
4761 }
4762 } # OE
4763 last AFE;
4764 } elsif ($node->[0] eq '#marker') {
4765 !!!cp ('t374');
4766 last AFE;
4767 }
4768 } # AFE
4769
4770 $reconstruct_active_formatting_elements->($insert_to_current);
4771
4772 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4773 push @$active_formatting_elements, $self->{open_elements}->[-1];
4774
4775 !!!nack ('t374.1');
4776 !!!next-token;
4777 next B;
4778 } elsif ($token->{tag_name} eq 'nobr') {
4779 $reconstruct_active_formatting_elements->($insert_to_current);
4780
4781 ## has a |nobr| element in scope
4782 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4783 my $node = $self->{open_elements}->[$_];
4784 if ($node->[1] == NOBR_EL) {
4785 !!!cp ('t376');
4786 !!!parse-error (type => 'in nobr:nobr', token => $token);
4787 !!!back-token; # <nobr>
4788 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
4789 line => $token->{line}, column => $token->{column}};
4790 next B;
4791 } elsif ($node->[1] & SCOPING_EL) {
4792 !!!cp ('t377');
4793 last INSCOPE;
4794 }
4795 } # INSCOPE
4796
4797 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4798 push @$active_formatting_elements, $self->{open_elements}->[-1];
4799
4800 !!!nack ('t377.1');
4801 !!!next-token;
4802 next B;
4803 } elsif ($token->{tag_name} eq 'button') {
4804 ## has a button element in scope
4805 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4806 my $node = $self->{open_elements}->[$_];
4807 if ($node->[1] == BUTTON_EL) {
4808 !!!cp ('t378');
4809 !!!parse-error (type => 'in button:button', token => $token);
4810 !!!back-token; # <button>
4811 $token = {type => END_TAG_TOKEN, tag_name => 'button',
4812 line => $token->{line}, column => $token->{column}};
4813 next B;
4814 } elsif ($node->[1] & SCOPING_EL) {
4815 !!!cp ('t379');
4816 last INSCOPE;
4817 }
4818 } # INSCOPE
4819
4820 $reconstruct_active_formatting_elements->($insert_to_current);
4821
4822 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4823
4824 ## TODO: associate with $self->{form_element} if defined
4825
4826 push @$active_formatting_elements, ['#marker', ''];
4827
4828 !!!nack ('t379.1');
4829 !!!next-token;
4830 next B;
4831 } elsif ({
4832 xmp => 1,
4833 iframe => 1,
4834 noembed => 1,
4835 noframes => 1, ## NOTE: This is an "as if in head" code clone.
4836 noscript => 0, ## TODO: 1 if scripting is enabled
4837 }->{$token->{tag_name}}) {
4838 if ($token->{tag_name} eq 'xmp') {
4839 !!!cp ('t381');
4840 $reconstruct_active_formatting_elements->($insert_to_current);
4841 } else {
4842 !!!cp ('t399');
4843 }
4844 ## NOTE: There is an "as if in body" code clone.
4845 $parse_rcdata->(CDATA_CONTENT_MODEL);
4846 next B;
4847 } elsif ($token->{tag_name} eq 'isindex') {
4848 !!!parse-error (type => 'isindex', token => $token);
4849
4850 if (defined $self->{form_element}) {
4851 !!!cp ('t389');
4852 ## Ignore the token
4853 !!!nack ('t389'); ## NOTE: Not acknowledged.
4854 !!!next-token;
4855 next B;
4856 } else {
4857 !!!ack ('t391.1');
4858
4859 my $at = $token->{attributes};
4860 my $form_attrs;
4861 $form_attrs->{action} = $at->{action} if $at->{action};
4862 my $prompt_attr = $at->{prompt};
4863 $at->{name} = {name => 'name', value => 'isindex'};
4864 delete $at->{action};
4865 delete $at->{prompt};
4866 my @tokens = (
4867 {type => START_TAG_TOKEN, tag_name => 'form',
4868 attributes => $form_attrs,
4869 line => $token->{line}, column => $token->{column}},
4870 {type => START_TAG_TOKEN, tag_name => 'hr',
4871 line => $token->{line}, column => $token->{column}},
4872 {type => START_TAG_TOKEN, tag_name => 'label',
4873 line => $token->{line}, column => $token->{column}},
4874 );
4875 if ($prompt_attr) {
4876 !!!cp ('t390');
4877 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
4878 #line => $token->{line}, column => $token->{column},
4879 };
4880 } else {
4881 !!!cp ('t391');
4882 push @tokens, {type => CHARACTER_TOKEN,
4883 data => 'This is a searchable index. Insert your search keywords here: ',
4884 #line => $token->{line}, column => $token->{column},
4885 }; # SHOULD
4886 ## TODO: make this configurable
4887 }
4888 push @tokens,
4889 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
4890 line => $token->{line}, column => $token->{column}},
4891 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
4892 {type => END_TAG_TOKEN, tag_name => 'label',
4893 line => $token->{line}, column => $token->{column}},
4894 {type => START_TAG_TOKEN, tag_name => 'hr',
4895 line => $token->{line}, column => $token->{column}},
4896 {type => END_TAG_TOKEN, tag_name => 'form',
4897 line => $token->{line}, column => $token->{column}};
4898 !!!back-token (@tokens);
4899 !!!next-token;
4900 next B;
4901 }
4902 } elsif ($token->{tag_name} eq 'textarea') {
4903 ## 1. Insert
4904 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4905
4906 ## Step 2 # XXX
4907 ## TODO: $self->{form_element} if defined
4908
4909 ## 2. Drop U+000A LINE FEED
4910 $self->{ignore_newline} = 1;
4911
4912 ## 3. RCDATA
4913 $self->{content_model} = RCDATA_CONTENT_MODEL;
4914 delete $self->{escape}; # MUST
4915
4916 ## 4., 6. Insertion mode
4917 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
4918
4919 ## XXX: 5. frameset-ok flag
4920
4921 !!!nack ('t392.1');
4922 !!!next-token;
4923 next B;
4924 } elsif ($token->{tag_name} eq 'optgroup' or
4925 $token->{tag_name} eq 'option') {
4926 ## has an |option| element in scope
4927 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4928 my $node = $self->{open_elements}->[$_];
4929 if ($node->[1] == OPTION_EL) {
4930 !!!cp ('t397.1');
4931 ## NOTE: As if </option>
4932 !!!back-token; # <option> or <optgroup>
4933 $token = {type => END_TAG_TOKEN, tag_name => 'option',
4934 line => $token->{line}, column => $token->{column}};
4935 next B;
4936 } elsif ($node->[1] & SCOPING_EL) {
4937 !!!cp ('t397.2');
4938 last INSCOPE;
4939 }
4940 } # INSCOPE
4941
4942 $reconstruct_active_formatting_elements->($insert_to_current);
4943
4944 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4945
4946 !!!nack ('t397.3');
4947 !!!next-token;
4948 redo B;
4949 } elsif ($token->{tag_name} eq 'rt' or
4950 $token->{tag_name} eq 'rp') {
4951 ## has a |ruby| element in scope
4952 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4953 my $node = $self->{open_elements}->[$_];
4954 if ($node->[1] == RUBY_EL) {
4955 !!!cp ('t398.1');
4956 ## generate implied end tags
4957 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4958 !!!cp ('t398.2');
4959 pop @{$self->{open_elements}};
4960 }
4961 unless ($self->{open_elements}->[-1]->[1] == RUBY_EL) {
4962 !!!cp ('t398.3');
4963 !!!parse-error (type => 'not closed',
4964 text => $self->{open_elements}->[-1]->[0]
4965 ->manakai_local_name,
4966 token => $token);
4967 pop @{$self->{open_elements}}
4968 while not $self->{open_elements}->[-1]->[1] == RUBY_EL;
4969 }
4970 last INSCOPE;
4971 } elsif ($node->[1] & SCOPING_EL) {
4972 !!!cp ('t398.4');
4973 last INSCOPE;
4974 }
4975 } # INSCOPE
4976
4977 ## TODO: <non-ruby><rt> is not allowed.
4978
4979 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4980
4981 !!!nack ('t398.5');
4982 !!!next-token;
4983 redo B;
4984 } elsif ($token->{tag_name} eq 'math' or
4985 $token->{tag_name} eq 'svg') {
4986 $reconstruct_active_formatting_elements->($insert_to_current);
4987
4988 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
4989
4990 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
4991
4992 ## "adjust foreign attributes" - done in insert-element-f
4993
4994 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
4995
4996 if ($self->{self_closing}) {
4997 pop @{$self->{open_elements}};
4998 !!!ack ('t398.6');
4999 } else {
5000 !!!cp ('t398.7');
5001 $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
5002 ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
5003 ## mode, "in body" (not "in foreign content") secondary insertion
5004 ## mode, maybe.
5005 }
5006
5007 !!!next-token;
5008 next B;
5009 } elsif ({
5010 caption => 1, col => 1, colgroup => 1, frame => 1,
5011 frameset => 1, head => 1,
5012 tbody => 1, td => 1, tfoot => 1, th => 1,
5013 thead => 1, tr => 1,
5014 }->{$token->{tag_name}}) {
5015 !!!cp ('t401');
5016 !!!parse-error (type => 'in body',
5017 text => $token->{tag_name}, token => $token);
5018 ## Ignore the token
5019 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
5020 !!!next-token;
5021 next B;
5022 } elsif ($token->{tag_name} eq 'param' or
5023 $token->{tag_name} eq 'source') {
5024 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5025 pop @{$self->{open_elements}};
5026
5027 !!!ack ('t398.5');
5028 !!!next-token;
5029 redo B;
5030 } else {
5031 if ($token->{tag_name} eq 'image') {
5032 !!!cp ('t384');
5033 !!!parse-error (type => 'image', token => $token);
5034 $token->{tag_name} = 'img';
5035 } else {
5036 !!!cp ('t385');
5037 }
5038
5039 ## NOTE: There is an "as if <br>" code clone.
5040 $reconstruct_active_formatting_elements->($insert_to_current);
5041
5042 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5043
5044 if ({
5045 applet => 1, marquee => 1, object => 1,
5046 }->{$token->{tag_name}}) {
5047 !!!cp ('t380');
5048 push @$active_formatting_elements, ['#marker', ''];
5049 !!!nack ('t380.1');
5050 } elsif ({
5051 b => 1, big => 1, em => 1, font => 1, i => 1,
5052 s => 1, small => 1, strike => 1,
5053 strong => 1, tt => 1, u => 1,
5054 }->{$token->{tag_name}}) {
5055 !!!cp ('t375');
5056 push @$active_formatting_elements, $self->{open_elements}->[-1];
5057 !!!nack ('t375.1');
5058 } elsif ($token->{tag_name} eq 'input') {
5059 !!!cp ('t388');
5060 ## TODO: associate with $self->{form_element} if defined
5061 pop @{$self->{open_elements}};
5062 !!!ack ('t388.2');
5063 } elsif ({
5064 area => 1, basefont => 1, bgsound => 1, br => 1,
5065 embed => 1, img => 1, spacer => 1, wbr => 1,
5066 keygen => 1,
5067 }->{$token->{tag_name}}) {
5068 !!!cp ('t388.1');
5069 pop @{$self->{open_elements}};
5070 !!!ack ('t388.3');
5071 } elsif ($token->{tag_name} eq 'select') {
5072 ## TODO: associate with $self->{form_element} if defined
5073
5074 if ($self->{insertion_mode} & TABLE_IMS or
5075 $self->{insertion_mode} & BODY_TABLE_IMS or
5076 ($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
5077 !!!cp ('t400.1');
5078 $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
5079 } else {
5080 !!!cp ('t400.2');
5081 $self->{insertion_mode} = IN_SELECT_IM;
5082 }
5083 !!!nack ('t400.3');
5084 } else {
5085 !!!nack ('t402');
5086 }
5087
5088 !!!next-token;
5089 next B;
5090 }
5091 } elsif ($token->{type} == END_TAG_TOKEN) {
5092 if ($token->{tag_name} eq 'body') {
5093
5094 ## 1. If not "have an element in scope":
5095 ## "has a |body| element in scope"
5096 my $i;
5097 INSCOPE: {
5098 for (reverse @{$self->{open_elements}}) {
5099 if ($_->[1] == BODY_EL) {
5100 !!!cp ('t405');
5101 $i = $_;
5102 last INSCOPE;
5103 } elsif ($_->[1] & SCOPING_EL) {
5104 !!!cp ('t405.1');
5105 last;
5106 }
5107 }
5108
5109 ## NOTE: |<marquee></body>|, |<svg><foreignobject></body>|
5110
5111 !!!parse-error (type => 'unmatched end tag',
5112 text => $token->{tag_name}, token => $token);
5113 ## NOTE: Ignore the token.
5114 !!!next-token;
5115 next B;
5116 } # INSCOPE
5117
5118 ## 2. If unclosed elements:
5119 for (@{$self->{open_elements}}) {
5120 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL ||
5121 $_->[1] == OPTGROUP_EL ||
5122 $_->[1] == OPTION_EL ||
5123 $_->[1] == RUBY_COMPONENT_EL) {
5124 !!!cp ('t403');
5125 !!!parse-error (type => 'not closed',
5126 text => $_->[0]->manakai_local_name,
5127 token => $token);
5128 last;
5129 } else {
5130 !!!cp ('t404');
5131 }
5132 }
5133
5134 ## 3. Switch the insertion mode.
5135 $self->{insertion_mode} = AFTER_BODY_IM;
5136 !!!next-token;
5137 next B;
5138 } elsif ($token->{tag_name} eq 'html') {
5139 ## TODO: Update this code. It seems that the code below is not
5140 ## up-to-date, though it has same effect as speced.
5141 if (@{$self->{open_elements}} > 1 and
5142 $self->{open_elements}->[1]->[1] == BODY_EL) {
5143 unless ($self->{open_elements}->[-1]->[1] == BODY_EL) {
5144 !!!cp ('t406');
5145 !!!parse-error (type => 'not closed',
5146 text => $self->{open_elements}->[1]->[0]
5147 ->manakai_local_name,
5148 token => $token);
5149 } else {
5150 !!!cp ('t407');
5151 }
5152 $self->{insertion_mode} = AFTER_BODY_IM;
5153 ## reprocess
5154 next B;
5155 } else {
5156 !!!cp ('t408');
5157 !!!parse-error (type => 'unmatched end tag',
5158 text => $token->{tag_name}, token => $token);
5159 ## Ignore the token
5160 !!!next-token;
5161 next B;
5162 }
5163 } elsif ({
5164 ## NOTE: End tags for non-phrasing flow content elements
5165
5166 ## NOTE: The normal ones
5167 address => 1, article => 1, aside => 1, blockquote => 1,
5168 center => 1, datagrid => 1, details => 1, dialog => 1,
5169 dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
5170 footer => 1, header => 1, listing => 1, menu => 1, nav => 1,
5171 ol => 1, pre => 1, section => 1, ul => 1,
5172
5173 ## NOTE: As normal, but ... optional tags
5174 dd => 1, dt => 1, li => 1,
5175
5176 applet => 1, button => 1, marquee => 1, object => 1,
5177 }->{$token->{tag_name}}) {
5178 ## NOTE: Code for <li> start tags includes "as if </li>" code.
5179 ## Code for <dt> or <dd> start tags includes "as if </dt> or
5180 ## </dd>" code.
5181
5182 ## has an element in scope
5183 my $i;
5184 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5185 my $node = $self->{open_elements}->[$_];
5186 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5187 !!!cp ('t410');
5188 $i = $_;
5189 last INSCOPE;
5190 } elsif ($node->[1] & SCOPING_EL) {
5191 !!!cp ('t411');
5192 last INSCOPE;
5193 }
5194 } # INSCOPE
5195
5196 unless (defined $i) { # has an element in scope
5197 !!!cp ('t413');
5198 !!!parse-error (type => 'unmatched end tag',
5199 text => $token->{tag_name}, token => $token);
5200 ## NOTE: Ignore the token.
5201 } else {
5202 ## Step 1. generate implied end tags
5203 while ({
5204 ## END_TAG_OPTIONAL_EL
5205 dd => ($token->{tag_name} ne 'dd'),
5206 dt => ($token->{tag_name} ne 'dt'),
5207 li => ($token->{tag_name} ne 'li'),
5208 option => 1,
5209 optgroup => 1,
5210 p => 1,
5211 rt => 1,
5212 rp => 1,
5213 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
5214 !!!cp ('t409');
5215 pop @{$self->{open_elements}};
5216 }
5217
5218 ## Step 2.
5219 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5220 ne $token->{tag_name}) {
5221 !!!cp ('t412');
5222 !!!parse-error (type => 'not closed',
5223 text => $self->{open_elements}->[-1]->[0]
5224 ->manakai_local_name,
5225 token => $token);
5226 } else {
5227 !!!cp ('t414');
5228 }
5229
5230 ## Step 3.
5231 splice @{$self->{open_elements}}, $i;
5232
5233 ## Step 4.
5234 $clear_up_to_marker->()
5235 if {
5236 applet => 1, button => 1, marquee => 1, object => 1,
5237 }->{$token->{tag_name}};
5238 }
5239 !!!next-token;
5240 next B;
5241 } elsif ($token->{tag_name} eq 'form') {
5242 ## NOTE: As normal, but interacts with the form element pointer
5243
5244 undef $self->{form_element};
5245
5246 ## has an element in scope
5247 my $i;
5248 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5249 my $node = $self->{open_elements}->[$_];
5250 if ($node->[1] == FORM_EL) {
5251 !!!cp ('t418');
5252 $i = $_;
5253 last INSCOPE;
5254 } elsif ($node->[1] & SCOPING_EL) {
5255 !!!cp ('t419');
5256 last INSCOPE;
5257 }
5258 } # INSCOPE
5259
5260 unless (defined $i) { # has an element in scope
5261 !!!cp ('t421');
5262 !!!parse-error (type => 'unmatched end tag',
5263 text => $token->{tag_name}, token => $token);
5264 ## NOTE: Ignore the token.
5265 } else {
5266 ## Step 1. generate implied end tags
5267 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5268 !!!cp ('t417');
5269 pop @{$self->{open_elements}};
5270 }
5271
5272 ## Step 2.
5273 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5274 ne $token->{tag_name}) {
5275 !!!cp ('t417.1');
5276 !!!parse-error (type => 'not closed',
5277 text => $self->{open_elements}->[-1]->[0]
5278 ->manakai_local_name,
5279 token => $token);
5280 } else {
5281 !!!cp ('t420');
5282 }
5283
5284 ## Step 3.
5285 splice @{$self->{open_elements}}, $i;
5286 }
5287
5288 !!!next-token;
5289 next B;
5290 } elsif ({
5291 ## NOTE: As normal, except acts as a closer for any ...
5292 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5293 }->{$token->{tag_name}}) {
5294 ## has an element in scope
5295 my $i;
5296 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5297 my $node = $self->{open_elements}->[$_];
5298 if ($node->[1] == HEADING_EL) {
5299 !!!cp ('t423');
5300 $i = $_;
5301 last INSCOPE;
5302 } elsif ($node->[1] & SCOPING_EL) {
5303 !!!cp ('t424');
5304 last INSCOPE;
5305 }
5306 } # INSCOPE
5307
5308 unless (defined $i) { # has an element in scope
5309 !!!cp ('t425.1');
5310 !!!parse-error (type => 'unmatched end tag',
5311 text => $token->{tag_name}, token => $token);
5312 ## NOTE: Ignore the token.
5313 } else {
5314 ## Step 1. generate implied end tags
5315 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5316 !!!cp ('t422');
5317 pop @{$self->{open_elements}};
5318 }
5319
5320 ## Step 2.
5321 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5322 ne $token->{tag_name}) {
5323 !!!cp ('t425');
5324 !!!parse-error (type => 'unmatched end tag',
5325 text => $token->{tag_name}, token => $token);
5326 } else {
5327 !!!cp ('t426');
5328 }
5329
5330 ## Step 3.
5331 splice @{$self->{open_elements}}, $i;
5332 }
5333
5334 !!!next-token;
5335 next B;
5336 } elsif ($token->{tag_name} eq 'p') {
5337 ## NOTE: As normal, except </p> implies <p> and ...
5338
5339 ## has an element in scope
5340 my $non_optional;
5341 my $i;
5342 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5343 my $node = $self->{open_elements}->[$_];
5344 if ($node->[1] == P_EL) {
5345 !!!cp ('t410.1');
5346 $i = $_;
5347 last INSCOPE;
5348 } elsif ($node->[1] & SCOPING_EL) {
5349 !!!cp ('t411.1');
5350 last INSCOPE;
5351 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
5352 ## NOTE: |END_TAG_OPTIONAL_EL| includes "p"
5353 !!!cp ('t411.2');
5354 #
5355 } else {
5356 !!!cp ('t411.3');
5357 $non_optional ||= $node;
5358 #
5359 }
5360 } # INSCOPE
5361
5362 if (defined $i) {
5363 ## 1. Generate implied end tags
5364 #
5365
5366 ## 2. If current node != "p", parse error
5367 if ($non_optional) {
5368 !!!cp ('t412.1');
5369 !!!parse-error (type => 'not closed',
5370 text => $non_optional->[0]->manakai_local_name,
5371 token => $token);
5372 } else {
5373 !!!cp ('t414.1');
5374 }
5375
5376 ## 3. Pop
5377 splice @{$self->{open_elements}}, $i;
5378 } else {
5379 !!!cp ('t413.1');
5380 !!!parse-error (type => 'unmatched end tag',
5381 text => $token->{tag_name}, token => $token);
5382
5383 !!!cp ('t415.1');
5384 ## As if <p>, then reprocess the current token
5385 my $el;
5386 !!!create-element ($el, $HTML_NS, 'p',, $token);
5387 $insert->($el);
5388 ## NOTE: Not inserted into |$self->{open_elements}|.
5389 }
5390
5391 !!!next-token;
5392 next B;
5393 } elsif ({
5394 a => 1,
5395 b => 1, big => 1, em => 1, font => 1, i => 1,
5396 nobr => 1, s => 1, small => 1, strike => 1,
5397 strong => 1, tt => 1, u => 1,
5398 }->{$token->{tag_name}}) {
5399 !!!cp ('t427');
5400 $formatting_end_tag->($token);
5401 next B;
5402 } elsif ($token->{tag_name} eq 'br') {
5403 !!!cp ('t428');
5404 !!!parse-error (type => 'unmatched end tag',
5405 text => 'br', token => $token);
5406
5407 ## As if <br>
5408 $reconstruct_active_formatting_elements->($insert_to_current);
5409
5410 my $el;
5411 !!!create-element ($el, $HTML_NS, 'br',, $token);
5412 $insert->($el);
5413
5414 ## Ignore the token.
5415 !!!next-token;
5416 next B;
5417 } else {
5418 if ($token->{tag_name} eq 'sarcasm') {
5419 sleep 0.001; # take a deep breath
5420 }
5421
5422 ## Step 1
5423 my $node_i = -1;
5424 my $node = $self->{open_elements}->[$node_i];
5425
5426 ## Step 2
5427 S2: {
5428 my $node_tag_name = $node->[0]->manakai_local_name;
5429 $node_tag_name =~ tr/A-Z/a-z/; # for SVG camelCase tag names
5430 if ($node_tag_name eq $token->{tag_name}) {
5431 ## Step 1
5432 ## generate implied end tags
5433 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5434 !!!cp ('t430');
5435 ## NOTE: |<ruby><rt></ruby>|.
5436 ## ISSUE: <ruby><rt></rt> will also take this code path,
5437 ## which seems wrong.
5438 pop @{$self->{open_elements}};
5439 $node_i++;
5440 }
5441
5442 ## Step 2
5443 my $current_tag_name
5444 = $self->{open_elements}->[-1]->[0]->manakai_local_name;
5445 $current_tag_name =~ tr/A-Z/a-z/;
5446 if ($current_tag_name ne $token->{tag_name}) {
5447 !!!cp ('t431');
5448 ## NOTE: <x><y></x>
5449 !!!parse-error (type => 'not closed',
5450 text => $self->{open_elements}->[-1]->[0]
5451 ->manakai_local_name,
5452 token => $token);
5453 } else {
5454 !!!cp ('t432');
5455 }
5456
5457 ## Step 3
5458 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
5459
5460 !!!next-token;
5461 last S2;
5462 } else {
5463 ## Step 3
5464 if (not ($node->[1] & FORMATTING_EL) and
5465 #not $phrasing_category->{$node->[1]} and
5466 ($node->[1] & SPECIAL_EL or
5467 $node->[1] & SCOPING_EL)) {
5468 !!!cp ('t433');
5469 !!!parse-error (type => 'unmatched end tag',
5470 text => $token->{tag_name}, token => $token);
5471 ## Ignore the token
5472 !!!next-token;
5473 last S2;
5474
5475 ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
5476 ## 9.27, "a" is a child of <dd> (conforming). In
5477 ## Firefox 3.0.2, "a" is a child of <body>. In WinIE 7,
5478 ## "a" is a child of both <body> and <dd>.
5479 }
5480
5481 !!!cp ('t434');
5482 }
5483
5484 ## Step 4
5485 $node_i--;
5486 $node = $self->{open_elements}->[$node_i];
5487
5488 ## Step 5;
5489 redo S2;
5490 } # S2
5491 next B;
5492 }
5493 }
5494 next B;
5495 } continue { # B
5496 if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
5497 ## NOTE: The code below is executed in cases where it does not have
5498 ## to be, but it it is harmless even in those cases.
5499 ## has an element in scope
5500 INSCOPE: {
5501 for (reverse 0..$#{$self->{open_elements}}) {
5502 my $node = $self->{open_elements}->[$_];
5503 if ($node->[1] & FOREIGN_EL) {
5504 last INSCOPE;
5505 } elsif ($node->[1] & SCOPING_EL) {
5506 last;
5507 }
5508 }
5509
5510 ## NOTE: No foreign element in scope.
5511 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
5512 } # INSCOPE
5513 }
5514 } # B
5515
5516 ## Stop parsing # MUST
5517
5518 ## TODO: script stuffs
5519 } # _tree_construct_main
5520
5521 ## XXX: How this method is organized is somewhat out of date, although
5522 ## it still does what the current spec documents.
5523 sub set_inner_html ($$$$;$) {
5524 my $class = shift;
5525 my $node = shift; # /context/
5526 #my $s = \$_[0];
5527 my $onerror = $_[1];
5528 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
5529
5530 ## ISSUE: Should {confident} be true?
5531
5532 my $nt = $node->node_type;
5533 if ($nt == 9) { # Document (invoke the algorithm with no /context/ element)
5534 # MUST
5535
5536 ## Step 1 # MUST
5537 ## TODO: If the document has an active parser, ...
5538 ## ISSUE: There is an issue in the spec.
5539
5540 ## Step 2 # MUST
5541 my @cn = @{$node->child_nodes};
5542 for (@cn) {
5543 $node->remove_child ($_);
5544 }
5545
5546 ## Step 3, 4, 5 # MUST
5547 $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
5548 } elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element)
5549 ## TODO: If non-html element
5550
5551 ## NOTE: Most of this code is copied from |parse_string|
5552
5553 ## TODO: Support for $get_wrapper
5554
5555 ## F1. Create an HTML document.
5556 my $this_doc = $node->owner_document;
5557 my $doc = $this_doc->implementation->create_document;
5558 $doc->manakai_is_html (1);
5559
5560 ## F2. Propagate quirkness flag
5561 my $node_doc = $node->owner_document;
5562 $doc->manakai_compat_mode ($node_doc->manakai_compat_mode);
5563
5564 ## F3. Create an HTML parser
5565 my $p = $class->new;
5566 $p->{document} = $doc;
5567
5568 ## Step 8 # MUST
5569 my $i = 0;
5570 $p->{line_prev} = $p->{line} = 1;
5571 $p->{column_prev} = $p->{column} = 0;
5572 require Whatpm::Charset::DecodeHandle;
5573 my $input = Whatpm::Charset::DecodeHandle::CharString->new (\($_[0]));
5574 $input = $get_wrapper->($input);
5575 $p->{set_nc} = sub {
5576 my $self = shift;
5577
5578 my $char = '';
5579 if (defined $self->{next_nc}) {
5580 $char = $self->{next_nc};
5581 delete $self->{next_nc};
5582 $self->{nc} = ord $char;
5583 } else {
5584 $self->{char_buffer} = '';
5585 $self->{char_buffer_pos} = 0;
5586
5587 my $count = $input->manakai_read_until
5588 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/,
5589 $self->{char_buffer_pos});
5590 if ($count) {
5591 $self->{line_prev} = $self->{line};
5592 $self->{column_prev} = $self->{column};
5593 $self->{column}++;
5594 $self->{nc}
5595 = ord substr ($self->{char_buffer},
5596 $self->{char_buffer_pos}++, 1);
5597 return;
5598 }
5599
5600 if ($input->read ($char, 1)) {
5601 $self->{nc} = ord $char;
5602 } else {
5603 $self->{nc} = -1;
5604 return;
5605 }
5606 }
5607
5608 ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
5609 $p->{column}++;
5610
5611 if ($self->{nc} == 0x000A) { # LF
5612 $p->{line}++;
5613 $p->{column} = 0;
5614 !!!cp ('i1');
5615 } elsif ($self->{nc} == 0x000D) { # CR
5616 ## TODO: support for abort/streaming
5617 my $next = '';
5618 if ($input->read ($next, 1) and $next ne "\x0A") {
5619 $self->{next_nc} = $next;
5620 }
5621 $self->{nc} = 0x000A; # LF # MUST
5622 $p->{line}++;
5623 $p->{column} = 0;
5624 !!!cp ('i2');
5625 } elsif ($self->{nc} == 0x0000) { # NULL
5626 !!!cp ('i4');
5627 !!!parse-error (type => 'NULL');
5628 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5629 }
5630 };
5631
5632 $p->{read_until} = sub {
5633 #my ($scalar, $specials_range, $offset) = @_;
5634 return 0 if defined $p->{next_nc};
5635
5636 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
5637 my $offset = $_[2] || 0;
5638
5639 if ($p->{char_buffer_pos} < length $p->{char_buffer}) {
5640 pos ($p->{char_buffer}) = $p->{char_buffer_pos};
5641 if ($p->{char_buffer} =~ /\G(?>$pattern)+/) {
5642 substr ($_[0], $offset)
5643 = substr ($p->{char_buffer}, $-[0], $+[0] - $-[0]);
5644 my $count = $+[0] - $-[0];
5645 if ($count) {
5646 $p->{column} += $count;
5647 $p->{char_buffer_pos} += $count;
5648 $p->{line_prev} = $p->{line};
5649 $p->{column_prev} = $p->{column} - 1;
5650 $p->{nc} = -1;
5651 }
5652 return $count;
5653 } else {
5654 return 0;
5655 }
5656 } else {
5657 my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
5658 if ($count) {
5659 $p->{column} += $count;
5660 $p->{column_prev} += $count;
5661 $p->{nc} = -1;
5662 }
5663 return $count;
5664 }
5665 }; # $p->{read_until}
5666
5667 my $ponerror = $onerror || sub {
5668 my (%opt) = @_;
5669 my $line = $opt{line};
5670 my $column = $opt{column};
5671 if (defined $opt{token} and defined $opt{token}->{line}) {
5672 $line = $opt{token}->{line};
5673 $column = $opt{token}->{column};
5674 }
5675 warn "Parse error ($opt{type}) at line $line column $column\n";
5676 };
5677 $p->{parse_error} = sub {
5678 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
5679 };
5680
5681 my $char_onerror = sub {
5682 my (undef, $type, %opt) = @_;
5683 $ponerror->(layer => 'encode',
5684 line => $p->{line}, column => $p->{column} + 1,
5685 %opt, type => $type);
5686 }; # $char_onerror
5687 $input->onerror ($char_onerror);
5688
5689 $p->_initialize_tokenizer;
5690 $p->_initialize_tree_constructor;
5691
5692 ## F4. If /context/ is not undef...
5693
5694 ## F4.1. content model flag
5695 my $node_ln = $node->manakai_local_name;
5696 $p->{content_model} = {
5697 title => RCDATA_CONTENT_MODEL,
5698 textarea => RCDATA_CONTENT_MODEL,
5699 style => CDATA_CONTENT_MODEL,
5700 script => CDATA_CONTENT_MODEL,
5701 xmp => CDATA_CONTENT_MODEL,
5702 iframe => CDATA_CONTENT_MODEL,
5703 noembed => CDATA_CONTENT_MODEL,
5704 noframes => CDATA_CONTENT_MODEL,
5705 noscript => CDATA_CONTENT_MODEL,
5706 plaintext => PLAINTEXT_CONTENT_MODEL,
5707 }->{$node_ln};
5708 $p->{content_model} = PCDATA_CONTENT_MODEL
5709 unless defined $p->{content_model};
5710
5711 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
5712 ## TODO: Foreign element OK?
5713
5714 ## F4.2. Root |html| element
5715 my $root = $doc->create_element_ns
5716 ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5717
5718 ## F4.3.
5719 $doc->append_child ($root);
5720
5721 ## F4.4.
5722 push @{$p->{open_elements}}, [$root, $el_category->{html}];
5723
5724 undef $p->{head_element};
5725 undef $p->{head_element_inserted};
5726
5727 ## F4.5.
5728 $p->_reset_insertion_mode;
5729
5730 ## F4.6.
5731 my $anode = $node;
5732 AN: while (defined $anode) {
5733 if ($anode->node_type == 1) {
5734 my $nsuri = $anode->namespace_uri;
5735 if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5736 if ($anode->manakai_local_name eq 'form') {
5737 !!!cp ('i5');
5738 $p->{form_element} = $anode;
5739 last AN;
5740 }
5741 }
5742 }
5743 $anode = $anode->parent_node;
5744 } # AN
5745
5746 ## F.6. Start the parser.
5747 {
5748 my $self = $p;
5749 !!!next-token;
5750 }
5751 $p->_tree_construction_main;
5752
5753 ## F.7.
5754 my @cn = @{$node->child_nodes};
5755 for (@cn) {
5756 $node->remove_child ($_);
5757 }
5758 ## ISSUE: mutation events? read-only?
5759
5760 ## Step 11 # MUST
5761 @cn = @{$root->child_nodes};
5762 for (@cn) {
5763 $this_doc->adopt_node ($_);
5764 $node->append_child ($_);
5765 }
5766 ## ISSUE: mutation events?
5767
5768 $p->_terminate_tree_constructor;
5769
5770 delete $p->{parse_error}; # delete loop
5771 } else {
5772 die "$0: |set_inner_html| is not defined for node of type $nt";
5773 }
5774 } # set_inner_html
5775
5776 } # tree construction stage
5777
5778 package Whatpm::HTML::RestartParser;
5779 push our @ISA, 'Error';
5780
5781 1;
5782 # $Date: 2009/09/06 01:21:44 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24