/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.244 - (show annotations) (download) (as text)
Sun Sep 6 23:32:06 2009 UTC (15 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.243: +31 -15 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	6 Sep 2009 23:31:19 -0000
2009-09-07  Wakaba  <wakaba@suika.fam.cx>

	* tree-test-1.dat: Added new test data on obsolete permitted
	DOCTYPEs (HTML5 revision 3378).

++ whatpm/Whatpm/ChangeLog	6 Sep 2009 23:31:49 -0000
2009-09-07  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src (_tree_construction_initial): Implemented "obsolete
	permitted DOCTYPEs" (HTML5 revision 3378).

1 package Whatpm::HTML;
2 use strict;
3 our $VERSION=do{my @r=(q$Revision: 1.243 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 use Error qw(:try);
5
6 use Whatpm::HTML::Tokenizer;
7
8 ## NOTE: This module don't check all HTML5 parse errors; character
9 ## encoding related parse errors are expected to be handled by relevant
10 ## modules.
11 ## Parse errors for control characters that are not allowed in HTML5
12 ## documents, for surrogate code points, and for noncharacter code
13 ## points, as well as U+FFFD substitions for characters whose code points
14 ## is higher than U+10FFFF may be detected by combining the parser with
15 ## the checker implemented by Whatpm::Charset::UnicodeChecker (for its
16 ## usage example, see |t/HTML-tree.t| in the Whatpm package or the
17 ## WebHACC::Language::HTML module in the WebHACC package).
18
19 ## ISSUE:
20 ## var doc = implementation.createDocument (null, null, null);
21 ## doc.write ('');
22 ## alert (doc.compatMode);
23
24 require IO::Handle;
25
26 ## Namespace URLs
27
28 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
29 my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
30 my $SVG_NS = q<http://www.w3.org/2000/svg>;
31 my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
32 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
33 my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
34
35 ## Element categories
36
37 ## Bits 12-15
38 sub SPECIAL_EL () { 0b1_000000000000000 }
39 sub SCOPING_EL () { 0b1_00000000000000 }
40 sub FORMATTING_EL () { 0b1_0000000000000 }
41 sub PHRASING_EL () { 0b1_000000000000 }
42
43 ## Bits 10-11
44 #sub FOREIGN_EL () { 0b1_00000000000 } # see Whatpm::HTML::Tokenizer
45 sub FOREIGN_FLOW_CONTENT_EL () { 0b1_0000000000 }
46
47 ## Bits 6-9
48 sub TABLE_SCOPING_EL () { 0b1_000000000 }
49 sub TABLE_ROWS_SCOPING_EL () { 0b1_00000000 }
50 sub TABLE_ROW_SCOPING_EL () { 0b1_0000000 }
51 sub TABLE_ROWS_EL () { 0b1_000000 }
52
53 ## Bit 5
54 sub ADDRESS_DIV_P_EL () { 0b1_00000 }
55
56 ## NOTE: Used in </body> and EOF algorithms.
57 ## Bit 4
58 sub ALL_END_TAG_OPTIONAL_EL () { 0b1_0000 }
59
60 ## NOTE: Used in "generate implied end tags" algorithm.
61 ## NOTE: There is a code where a modified version of
62 ## END_TAG_OPTIONAL_EL is used in "generate implied end tags"
63 ## implementation (search for the algorithm name).
64 ## Bit 3
65 sub END_TAG_OPTIONAL_EL () { 0b1_000 }
66
67 ## Bits 0-2
68
69 sub MISC_SPECIAL_EL () { SPECIAL_EL | 0b000 }
70 sub FORM_EL () { SPECIAL_EL | 0b001 }
71 sub FRAMESET_EL () { SPECIAL_EL | 0b010 }
72 sub HEADING_EL () { SPECIAL_EL | 0b011 }
73 sub SELECT_EL () { SPECIAL_EL | 0b100 }
74 sub SCRIPT_EL () { SPECIAL_EL | 0b101 }
75
76 sub ADDRESS_DIV_EL () { SPECIAL_EL | ADDRESS_DIV_P_EL | 0b001 }
77 sub BODY_EL () { SPECIAL_EL | ALL_END_TAG_OPTIONAL_EL | 0b001 }
78
79 sub DTDD_EL () {
80 SPECIAL_EL |
81 END_TAG_OPTIONAL_EL |
82 ALL_END_TAG_OPTIONAL_EL |
83 0b010
84 }
85 sub LI_EL () {
86 SPECIAL_EL |
87 END_TAG_OPTIONAL_EL |
88 ALL_END_TAG_OPTIONAL_EL |
89 0b100
90 }
91 sub P_EL () {
92 SPECIAL_EL |
93 ADDRESS_DIV_P_EL |
94 END_TAG_OPTIONAL_EL |
95 ALL_END_TAG_OPTIONAL_EL |
96 0b001
97 }
98
99 sub TABLE_ROW_EL () {
100 SPECIAL_EL |
101 TABLE_ROWS_EL |
102 TABLE_ROW_SCOPING_EL |
103 ALL_END_TAG_OPTIONAL_EL |
104 0b001
105 }
106 sub TABLE_ROW_GROUP_EL () {
107 SPECIAL_EL |
108 TABLE_ROWS_EL |
109 TABLE_ROWS_SCOPING_EL |
110 ALL_END_TAG_OPTIONAL_EL |
111 0b001
112 }
113
114 sub MISC_SCOPING_EL () { SCOPING_EL | 0b000 }
115 sub BUTTON_EL () { SCOPING_EL | 0b001 }
116 sub CAPTION_EL () { SCOPING_EL | 0b010 }
117 sub HTML_EL () {
118 SCOPING_EL |
119 TABLE_SCOPING_EL |
120 TABLE_ROWS_SCOPING_EL |
121 TABLE_ROW_SCOPING_EL |
122 ALL_END_TAG_OPTIONAL_EL |
123 0b001
124 }
125 sub TABLE_EL () {
126 SCOPING_EL |
127 TABLE_ROWS_EL |
128 TABLE_SCOPING_EL |
129 0b001
130 }
131 sub TABLE_CELL_EL () {
132 SCOPING_EL |
133 TABLE_ROW_SCOPING_EL |
134 ALL_END_TAG_OPTIONAL_EL |
135 0b001
136 }
137
138 sub MISC_FORMATTING_EL () { FORMATTING_EL | 0b000 }
139 sub A_EL () { FORMATTING_EL | 0b001 }
140 sub NOBR_EL () { FORMATTING_EL | 0b010 }
141
142 sub RUBY_EL () { PHRASING_EL | 0b001 }
143
144 ## ISSUE: ALL_END_TAG_OPTIONAL_EL?
145 sub OPTGROUP_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b001 }
146 sub OPTION_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b010 }
147 sub RUBY_COMPONENT_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b100 }
148
149 sub MML_AXML_EL () { PHRASING_EL | FOREIGN_EL | 0b001 }
150
151 my $el_category = {
152 a => A_EL,
153 address => ADDRESS_DIV_EL,
154 applet => MISC_SCOPING_EL,
155 area => MISC_SPECIAL_EL,
156 article => MISC_SPECIAL_EL,
157 aside => MISC_SPECIAL_EL,
158 b => FORMATTING_EL,
159 base => MISC_SPECIAL_EL,
160 basefont => MISC_SPECIAL_EL,
161 bgsound => MISC_SPECIAL_EL,
162 big => FORMATTING_EL,
163 blockquote => MISC_SPECIAL_EL,
164 body => BODY_EL,
165 br => MISC_SPECIAL_EL,
166 button => BUTTON_EL,
167 caption => CAPTION_EL,
168 center => MISC_SPECIAL_EL,
169 col => MISC_SPECIAL_EL,
170 colgroup => MISC_SPECIAL_EL,
171 command => MISC_SPECIAL_EL,
172 datagrid => MISC_SPECIAL_EL,
173 dd => DTDD_EL,
174 details => MISC_SPECIAL_EL,
175 dialog => MISC_SPECIAL_EL,
176 dir => MISC_SPECIAL_EL,
177 div => ADDRESS_DIV_EL,
178 dl => MISC_SPECIAL_EL,
179 dt => DTDD_EL,
180 em => FORMATTING_EL,
181 embed => MISC_SPECIAL_EL,
182 fieldset => MISC_SPECIAL_EL,
183 figure => MISC_SPECIAL_EL,
184 font => FORMATTING_EL,
185 footer => MISC_SPECIAL_EL,
186 form => FORM_EL,
187 frame => MISC_SPECIAL_EL,
188 frameset => FRAMESET_EL,
189 h1 => HEADING_EL,
190 h2 => HEADING_EL,
191 h3 => HEADING_EL,
192 h4 => HEADING_EL,
193 h5 => HEADING_EL,
194 h6 => HEADING_EL,
195 head => MISC_SPECIAL_EL,
196 header => MISC_SPECIAL_EL,
197 hgroup => MISC_SPECIAL_EL,
198 hr => MISC_SPECIAL_EL,
199 html => HTML_EL,
200 i => FORMATTING_EL,
201 iframe => MISC_SPECIAL_EL,
202 img => MISC_SPECIAL_EL,
203 #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
204 input => MISC_SPECIAL_EL,
205 isindex => MISC_SPECIAL_EL,
206 ## XXX keygen? (Whether a void element is in Special or not does not
207 ## affect to the processing, however.)
208 li => LI_EL,
209 link => MISC_SPECIAL_EL,
210 listing => MISC_SPECIAL_EL,
211 marquee => MISC_SCOPING_EL,
212 menu => MISC_SPECIAL_EL,
213 meta => MISC_SPECIAL_EL,
214 nav => MISC_SPECIAL_EL,
215 nobr => NOBR_EL,
216 noembed => MISC_SPECIAL_EL,
217 noframes => MISC_SPECIAL_EL,
218 noscript => MISC_SPECIAL_EL,
219 object => MISC_SCOPING_EL,
220 ol => MISC_SPECIAL_EL,
221 optgroup => OPTGROUP_EL,
222 option => OPTION_EL,
223 p => P_EL,
224 param => MISC_SPECIAL_EL,
225 plaintext => MISC_SPECIAL_EL,
226 pre => MISC_SPECIAL_EL,
227 rp => RUBY_COMPONENT_EL,
228 rt => RUBY_COMPONENT_EL,
229 ruby => RUBY_EL,
230 s => FORMATTING_EL,
231 script => MISC_SPECIAL_EL,
232 select => SELECT_EL,
233 section => MISC_SPECIAL_EL,
234 small => FORMATTING_EL,
235 spacer => MISC_SPECIAL_EL,
236 strike => FORMATTING_EL,
237 strong => FORMATTING_EL,
238 style => MISC_SPECIAL_EL,
239 table => TABLE_EL,
240 tbody => TABLE_ROW_GROUP_EL,
241 td => TABLE_CELL_EL,
242 textarea => MISC_SPECIAL_EL,
243 tfoot => TABLE_ROW_GROUP_EL,
244 th => TABLE_CELL_EL,
245 thead => TABLE_ROW_GROUP_EL,
246 title => MISC_SPECIAL_EL,
247 tr => TABLE_ROW_EL,
248 tt => FORMATTING_EL,
249 u => FORMATTING_EL,
250 ul => MISC_SPECIAL_EL,
251 wbr => MISC_SPECIAL_EL,
252 xmp => MISC_SPECIAL_EL,
253 };
254
255 my $el_category_f = {
256 $MML_NS => {
257 'annotation-xml' => MML_AXML_EL,
258 mi => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
259 mo => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
260 mn => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
261 ms => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
262 mtext => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
263 },
264 $SVG_NS => {
265 foreignObject => SCOPING_EL | FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
266 desc => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
267 title => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
268 },
269 ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
270 };
271
272 my $svg_attr_name = {
273 attributename => 'attributeName',
274 attributetype => 'attributeType',
275 basefrequency => 'baseFrequency',
276 baseprofile => 'baseProfile',
277 calcmode => 'calcMode',
278 clippathunits => 'clipPathUnits',
279 contentscripttype => 'contentScriptType',
280 contentstyletype => 'contentStyleType',
281 diffuseconstant => 'diffuseConstant',
282 edgemode => 'edgeMode',
283 externalresourcesrequired => 'externalResourcesRequired',
284 filterres => 'filterRes',
285 filterunits => 'filterUnits',
286 glyphref => 'glyphRef',
287 gradienttransform => 'gradientTransform',
288 gradientunits => 'gradientUnits',
289 kernelmatrix => 'kernelMatrix',
290 kernelunitlength => 'kernelUnitLength',
291 keypoints => 'keyPoints',
292 keysplines => 'keySplines',
293 keytimes => 'keyTimes',
294 lengthadjust => 'lengthAdjust',
295 limitingconeangle => 'limitingConeAngle',
296 markerheight => 'markerHeight',
297 markerunits => 'markerUnits',
298 markerwidth => 'markerWidth',
299 maskcontentunits => 'maskContentUnits',
300 maskunits => 'maskUnits',
301 numoctaves => 'numOctaves',
302 pathlength => 'pathLength',
303 patterncontentunits => 'patternContentUnits',
304 patterntransform => 'patternTransform',
305 patternunits => 'patternUnits',
306 pointsatx => 'pointsAtX',
307 pointsaty => 'pointsAtY',
308 pointsatz => 'pointsAtZ',
309 preservealpha => 'preserveAlpha',
310 preserveaspectratio => 'preserveAspectRatio',
311 primitiveunits => 'primitiveUnits',
312 refx => 'refX',
313 refy => 'refY',
314 repeatcount => 'repeatCount',
315 repeatdur => 'repeatDur',
316 requiredextensions => 'requiredExtensions',
317 requiredfeatures => 'requiredFeatures',
318 specularconstant => 'specularConstant',
319 specularexponent => 'specularExponent',
320 spreadmethod => 'spreadMethod',
321 startoffset => 'startOffset',
322 stddeviation => 'stdDeviation',
323 stitchtiles => 'stitchTiles',
324 surfacescale => 'surfaceScale',
325 systemlanguage => 'systemLanguage',
326 tablevalues => 'tableValues',
327 targetx => 'targetX',
328 targety => 'targetY',
329 textlength => 'textLength',
330 viewbox => 'viewBox',
331 viewtarget => 'viewTarget',
332 xchannelselector => 'xChannelSelector',
333 ychannelselector => 'yChannelSelector',
334 zoomandpan => 'zoomAndPan',
335 };
336
337 my $foreign_attr_xname = {
338 'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
339 'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
340 'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
341 'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
342 'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
343 'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
344 'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
345 'xml:base' => [$XML_NS, ['xml', 'base']],
346 'xml:lang' => [$XML_NS, ['xml', 'lang']],
347 'xml:space' => [$XML_NS, ['xml', 'space']],
348 'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
349 'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
350 };
351
352 ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
353
354 ## TODO: Invoke the reset algorithm when a resettable element is
355 ## created (cf. HTML5 revision 2259).
356
357 sub parse_byte_string ($$$$;$) {
358 my $self = shift;
359 my $charset_name = shift;
360 open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
361 return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
362 } # parse_byte_string
363
364 sub parse_byte_stream ($$$$;$$) {
365 # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
366 my $self = ref $_[0] ? shift : shift->new;
367 my $charset_name = shift;
368 my $byte_stream = $_[0];
369
370 my $onerror = $_[2] || sub {
371 my (%opt) = @_;
372 warn "Parse error ($opt{type})\n";
373 };
374 $self->{parse_error} = $onerror; # updated later by parse_char_string
375
376 my $get_wrapper = $_[3] || sub ($) {
377 return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
378 };
379
380 ## HTML5 encoding sniffing algorithm
381 require Message::Charset::Info;
382 my $charset;
383 my $buffer;
384 my ($char_stream, $e_status);
385
386 SNIFFING: {
387 ## NOTE: By setting |allow_fallback| option true when the
388 ## |get_decode_handle| method is invoked, we ignore what the HTML5
389 ## spec requires, i.e. unsupported encoding should be ignored.
390 ## TODO: We should not do this unless the parser is invoked
391 ## in the conformance checking mode, in which this behavior
392 ## would be useful.
393
394 ## Step 1
395 if (defined $charset_name) {
396 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
397 ## TODO: Is this ok? Transfer protocol's parameter should be
398 ## interpreted in its semantics?
399
400 ($char_stream, $e_status) = $charset->get_decode_handle
401 ($byte_stream, allow_error_reporting => 1,
402 allow_fallback => 1);
403 if ($char_stream) {
404 $self->{confident} = 1;
405 last SNIFFING;
406 } else {
407 !!!parse-error (type => 'charset:not supported',
408 layer => 'encode',
409 line => 1, column => 1,
410 value => $charset_name,
411 level => $self->{level}->{uncertain});
412 }
413 }
414
415 ## Step 2
416 my $byte_buffer = '';
417 for (1..1024) {
418 my $char = $byte_stream->getc;
419 last unless defined $char;
420 $byte_buffer .= $char;
421 } ## TODO: timeout
422
423 ## Step 3
424 if ($byte_buffer =~ /^\xFE\xFF/) {
425 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
426 ($char_stream, $e_status) = $charset->get_decode_handle
427 ($byte_stream, allow_error_reporting => 1,
428 allow_fallback => 1, byte_buffer => \$byte_buffer);
429 $self->{confident} = 1;
430 last SNIFFING;
431 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
432 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
433 ($char_stream, $e_status) = $charset->get_decode_handle
434 ($byte_stream, allow_error_reporting => 1,
435 allow_fallback => 1, byte_buffer => \$byte_buffer);
436 $self->{confident} = 1;
437 last SNIFFING;
438 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
439 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
440 ($char_stream, $e_status) = $charset->get_decode_handle
441 ($byte_stream, allow_error_reporting => 1,
442 allow_fallback => 1, byte_buffer => \$byte_buffer);
443 $self->{confident} = 1;
444 last SNIFFING;
445 }
446
447 ## Step 4
448 ## TODO: <meta charset>
449
450 ## Step 5
451 ## TODO: from history
452
453 ## Step 6
454 require Whatpm::Charset::UniversalCharDet;
455 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
456 ($byte_buffer);
457 if (defined $charset_name) {
458 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
459
460 require Whatpm::Charset::DecodeHandle;
461 $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
462 ($byte_stream);
463 ($char_stream, $e_status) = $charset->get_decode_handle
464 ($buffer, allow_error_reporting => 1,
465 allow_fallback => 1, byte_buffer => \$byte_buffer);
466 if ($char_stream) {
467 $buffer->{buffer} = $byte_buffer;
468 !!!parse-error (type => 'sniffing:chardet',
469 text => $charset_name,
470 level => $self->{level}->{info},
471 layer => 'encode',
472 line => 1, column => 1);
473 $self->{confident} = 0;
474 last SNIFFING;
475 }
476 }
477
478 ## Step 7: default
479 ## TODO: Make this configurable.
480 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
481 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
482 ## detectable in the step 6.
483 require Whatpm::Charset::DecodeHandle;
484 $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
485 ($byte_stream);
486 ($char_stream, $e_status)
487 = $charset->get_decode_handle ($buffer,
488 allow_error_reporting => 1,
489 allow_fallback => 1,
490 byte_buffer => \$byte_buffer);
491 $buffer->{buffer} = $byte_buffer;
492 !!!parse-error (type => 'sniffing:default',
493 text => 'windows-1252',
494 level => $self->{level}->{info},
495 line => 1, column => 1,
496 layer => 'encode');
497 $self->{confident} = 0;
498 } # SNIFFING
499
500 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
501 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
502 !!!parse-error (type => 'chardecode:fallback',
503 #text => $self->{input_encoding},
504 level => $self->{level}->{uncertain},
505 line => 1, column => 1,
506 layer => 'encode');
507 } elsif (not ($e_status &
508 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
509 $self->{input_encoding} = $charset->get_iana_name;
510 !!!parse-error (type => 'chardecode:no error',
511 text => $self->{input_encoding},
512 level => $self->{level}->{uncertain},
513 line => 1, column => 1,
514 layer => 'encode');
515 } else {
516 $self->{input_encoding} = $charset->get_iana_name;
517 }
518
519 $self->{change_encoding} = sub {
520 my $self = shift;
521 $charset_name = shift;
522 my $token = shift;
523
524 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
525 ($char_stream, $e_status) = $charset->get_decode_handle
526 ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
527 byte_buffer => \ $buffer->{buffer});
528
529 if ($char_stream) { # if supported
530 ## "Change the encoding" algorithm:
531
532 ## Step 1
533 if (defined $self->{input_encoding} and
534 $self->{input_encoding} eq $charset_name) {
535 !!!parse-error (type => 'charset label:matching',
536 text => $charset_name,
537 level => $self->{level}->{info});
538 $self->{confident} = 1;
539 return;
540 }
541
542 ## Step 2 (HTML5 revision 3205)
543 if (defined $self->{input_encoding} and
544 Message::Charset::Info->get_by_html_name ($self->{input_encoding})
545 ->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
546 $self->{confident} = 1;
547 return;
548 }
549
550 ## Step 3
551 if ($charset->{category} &
552 Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
553 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
554 ($char_stream, $e_status) = $charset->get_decode_handle
555 ($byte_stream,
556 byte_buffer => \ $buffer->{buffer});
557 }
558 $charset_name = $charset->get_iana_name;
559
560 !!!parse-error (type => 'charset label detected',
561 text => $self->{input_encoding},
562 value => $charset_name,
563 level => $self->{level}->{warn},
564 token => $token);
565
566 ## Step 4
567 # if (can) {
568 ## change the encoding on the fly.
569 #$self->{confident} = 1;
570 #return;
571 # }
572
573 ## Step 5
574 throw Whatpm::HTML::RestartParser ();
575 }
576 }; # $self->{change_encoding}
577
578 my $char_onerror = sub {
579 my (undef, $type, %opt) = @_;
580 !!!parse-error (layer => 'encode',
581 line => $self->{line}, column => $self->{column} + 1,
582 %opt, type => $type);
583 if ($opt{octets}) {
584 ${$opt{octets}} = "\x{FFFD}"; # relacement character
585 }
586 };
587
588 my $wrapped_char_stream = $get_wrapper->($char_stream);
589 $wrapped_char_stream->onerror ($char_onerror);
590
591 my @args = ($_[1], $_[2]); # $doc, $onerror - $get_wrapper = undef;
592 my $return;
593 try {
594 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
595 } catch Whatpm::HTML::RestartParser with {
596 ## NOTE: Invoked after {change_encoding}.
597
598 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
599 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
600 !!!parse-error (type => 'chardecode:fallback',
601 level => $self->{level}->{uncertain},
602 #text => $self->{input_encoding},
603 line => 1, column => 1,
604 layer => 'encode');
605 } elsif (not ($e_status &
606 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
607 $self->{input_encoding} = $charset->get_iana_name;
608 !!!parse-error (type => 'chardecode:no error',
609 text => $self->{input_encoding},
610 level => $self->{level}->{uncertain},
611 line => 1, column => 1,
612 layer => 'encode');
613 } else {
614 $self->{input_encoding} = $charset->get_iana_name;
615 }
616 $self->{confident} = 1;
617
618 $wrapped_char_stream = $get_wrapper->($char_stream);
619 $wrapped_char_stream->onerror ($char_onerror);
620
621 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
622 };
623 return $return;
624 } # parse_byte_stream
625
626 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
627 ## and the HTML layer MUST ignore it. However, we does strip BOM in
628 ## the encoding layer and the HTML layer does not ignore any U+FEFF,
629 ## because the core part of our HTML parser expects a string of character,
630 ## not a string of bytes or code units or anything which might contain a BOM.
631 ## Therefore, any parser interface that accepts a string of bytes,
632 ## such as |parse_byte_string| in this module, must ensure that it does
633 ## strip the BOM and never strip any ZWNBSP.
634
635 sub parse_char_string ($$$;$$) {
636 #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
637 my $self = shift;
638 my $s = ref $_[0] ? $_[0] : \($_[0]);
639 require Whatpm::Charset::DecodeHandle;
640 my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
641 return $self->parse_char_stream ($input, @_[1..$#_]);
642 } # parse_char_string
643 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
644
645 sub parse_char_stream ($$$;$$) {
646 my $self = ref $_[0] ? shift : shift->new;
647 my $input = $_[0];
648 $self->{document} = $_[1];
649 @{$self->{document}->child_nodes} = ();
650
651 ## NOTE: |set_inner_html| copies most of this method's code
652
653 ## Confidence: irrelevant.
654 $self->{confident} = 1 unless exists $self->{confident};
655
656 $self->{document}->input_encoding ($self->{input_encoding})
657 if defined $self->{input_encoding};
658 ## TODO: |{input_encoding}| is needless?
659
660 $self->{line_prev} = $self->{line} = 1;
661 $self->{column_prev} = -1;
662 $self->{column} = 0;
663 $self->{set_nc} = sub {
664 my $self = shift;
665
666 my $char = '';
667 if (defined $self->{next_nc}) {
668 $char = $self->{next_nc};
669 delete $self->{next_nc};
670 $self->{nc} = ord $char;
671 } else {
672 $self->{char_buffer} = '';
673 $self->{char_buffer_pos} = 0;
674
675 my $count = $input->manakai_read_until
676 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/, $self->{char_buffer_pos});
677 if ($count) {
678 $self->{line_prev} = $self->{line};
679 $self->{column_prev} = $self->{column};
680 $self->{column}++;
681 $self->{nc}
682 = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
683 return;
684 }
685
686 if ($input->read ($char, 1)) {
687 $self->{nc} = ord $char;
688 } else {
689 $self->{nc} = -1;
690 return;
691 }
692 }
693
694 ($self->{line_prev}, $self->{column_prev})
695 = ($self->{line}, $self->{column});
696 $self->{column}++;
697
698 if ($self->{nc} == 0x000A) { # LF
699 !!!cp ('j1');
700 $self->{line}++;
701 $self->{column} = 0;
702 } elsif ($self->{nc} == 0x000D) { # CR
703 !!!cp ('j2');
704 ## TODO: support for abort/streaming
705 my $next = '';
706 if ($input->read ($next, 1) and $next ne "\x0A") {
707 $self->{next_nc} = $next;
708 }
709 $self->{nc} = 0x000A; # LF # MUST
710 $self->{line}++;
711 $self->{column} = 0;
712 } elsif ($self->{nc} == 0x0000) { # NULL
713 !!!cp ('j4');
714 !!!parse-error (type => 'NULL');
715 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
716 }
717 };
718
719 $self->{read_until} = sub {
720 #my ($scalar, $specials_range, $offset) = @_;
721 return 0 if defined $self->{next_nc};
722
723 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
724 my $offset = $_[2] || 0;
725
726 if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
727 pos ($self->{char_buffer}) = $self->{char_buffer_pos};
728 if ($self->{char_buffer} =~ /\G(?>$pattern)+/) {
729 substr ($_[0], $offset)
730 = substr ($self->{char_buffer}, $-[0], $+[0] - $-[0]);
731 my $count = $+[0] - $-[0];
732 if ($count) {
733 $self->{column} += $count;
734 $self->{char_buffer_pos} += $count;
735 $self->{line_prev} = $self->{line};
736 $self->{column_prev} = $self->{column} - 1;
737 $self->{nc} = -1;
738 }
739 return $count;
740 } else {
741 return 0;
742 }
743 } else {
744 my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
745 if ($count) {
746 $self->{column} += $count;
747 $self->{line_prev} = $self->{line};
748 $self->{column_prev} = $self->{column} - 1;
749 $self->{nc} = -1;
750 }
751 return $count;
752 }
753 }; # $self->{read_until}
754
755 my $onerror = $_[2] || sub {
756 my (%opt) = @_;
757 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
758 my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
759 warn "Parse error ($opt{type}) at line $line column $column\n";
760 };
761 $self->{parse_error} = sub {
762 $onerror->(line => $self->{line}, column => $self->{column}, @_);
763 };
764
765 my $char_onerror = sub {
766 my (undef, $type, %opt) = @_;
767 !!!parse-error (layer => 'encode',
768 line => $self->{line}, column => $self->{column} + 1,
769 %opt, type => $type);
770 }; # $char_onerror
771
772 if ($_[3]) {
773 $input = $_[3]->($input);
774 $input->onerror ($char_onerror);
775 } else {
776 $input->onerror ($char_onerror) unless defined $input->onerror;
777 }
778
779 $self->_initialize_tokenizer;
780 $self->_initialize_tree_constructor;
781 $self->_construct_tree;
782 $self->_terminate_tree_constructor;
783
784 delete $self->{parse_error}; # remove loop
785
786 return $self->{document};
787 } # parse_char_stream
788
789 sub new ($) {
790 my $class = shift;
791 my $self = bless {
792 level => {
793 must => 'm',
794 should => 's',
795 obc => 's', ## Obsolete but conforming, # XXX distinguish from "should"
796 warn => 'w',
797 info => 'i',
798 uncertain => 'u',
799 },
800 }, $class;
801 $self->{set_nc} = sub {
802 $self->{nc} = -1;
803 };
804 $self->{parse_error} = sub {
805 #
806 };
807 $self->{change_encoding} = sub {
808 # if ($_[0] is a supported encoding) {
809 # run "change the encoding" algorithm;
810 # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
811 # }
812 };
813 $self->{application_cache_selection} = sub {
814 #
815 };
816 return $self;
817 } # new
818
819 ## Insertion modes
820
821 sub AFTER_HTML_IMS () { 0b100 }
822 sub HEAD_IMS () { 0b1000 }
823 sub BODY_IMS () { 0b10000 }
824 sub BODY_TABLE_IMS () { 0b100000 }
825 sub TABLE_IMS () { 0b1000000 }
826 sub ROW_IMS () { 0b10000000 }
827 sub BODY_AFTER_IMS () { 0b100000000 }
828 sub FRAME_IMS () { 0b1000000000 }
829 sub SELECT_IMS () { 0b10000000000 }
830 #sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 } # see Whatpm::HTML::Tokenizer
831 ## NOTE: "in foreign content" insertion mode is special; it is combined
832 ## with the secondary insertion mode. In this parser, they are stored
833 ## together in the bit-or'ed form.
834 sub IN_CDATA_RCDATA_IM () { 0b1000000000000 }
835 ## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is
836 ## combined with the original insertion mode. In thie parser,
837 ## they are stored together in the bit-or'ed form.
838
839 sub IM_MASK () { 0b11111111111 }
840
841 ## NOTE: "initial" and "before html" insertion modes have no constants.
842
843 ## NOTE: "after after body" insertion mode.
844 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
845
846 ## NOTE: "after after frameset" insertion mode.
847 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
848
849 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
850 sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
851 sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
852 sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
853 sub IN_BODY_IM () { BODY_IMS }
854 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
855 sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
856 sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
857 sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
858 sub IN_TABLE_IM () { TABLE_IMS }
859 sub AFTER_BODY_IM () { BODY_AFTER_IMS }
860 sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
861 sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
862 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
863 sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
864 sub IN_COLUMN_GROUP_IM () { 0b10 }
865
866 sub _initialize_tree_constructor ($) {
867 my $self = shift;
868 ## NOTE: $self->{document} MUST be specified before this method is called
869 $self->{document}->strict_error_checking (0);
870 ## TODO: Turn mutation events off # MUST
871 ## TODO: Turn loose Document option (manakai extension) on
872 $self->{document}->manakai_is_html (1); # MUST
873 $self->{document}->set_user_data (manakai_source_line => 1);
874 $self->{document}->set_user_data (manakai_source_column => 1);
875
876 $self->{frameset_ok} = 1;
877 } # _initialize_tree_constructor
878
879 sub _terminate_tree_constructor ($) {
880 my $self = shift;
881 $self->{document}->strict_error_checking (1);
882 ## TODO: Turn mutation events on
883 } # _terminate_tree_constructor
884
885 ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
886
887 { # tree construction stage
888 my $token;
889
890 sub _construct_tree ($) {
891 my ($self) = @_;
892
893 ## When an interactive UA render the $self->{document} available
894 ## to the user, or when it begin accepting user input, are
895 ## not defined.
896
897 !!!next-token;
898
899 undef $self->{form_element};
900 undef $self->{head_element};
901 undef $self->{head_element_inserted};
902 $self->{open_elements} = [];
903 undef $self->{inner_html_node};
904 undef $self->{ignore_newline};
905
906 ## NOTE: The "initial" insertion mode.
907 $self->_tree_construction_initial; # MUST
908
909 ## NOTE: The "before html" insertion mode.
910 $self->_tree_construction_root_element;
911 $self->{insertion_mode} = BEFORE_HEAD_IM;
912
913 ## NOTE: The "before head" insertion mode and so on.
914 $self->_tree_construction_main;
915 } # _construct_tree
916
917 sub _tree_construction_initial ($) {
918 my $self = shift;
919
920 ## NOTE: "initial" insertion mode
921
922 INITIAL: {
923 if ($token->{type} == DOCTYPE_TOKEN) {
924 ## NOTE: Conformance checkers MAY, instead of reporting "not
925 ## HTML5" error, switch to a conformance checking mode for
926 ## another language. (We don't support such mode switchings; it
927 ## is nonsense to do anything different from what browsers do.)
928 my $doctype_name = $token->{name};
929 $doctype_name = '' unless defined $doctype_name;
930 my $doctype = $self->{document}->create_document_type_definition
931 ($doctype_name);
932
933 $doctype_name =~ tr/A-Z/a-z/; # ASCII case-insensitive.
934 if ($doctype_name ne 'html') {
935 !!!cp ('t1');
936 !!!parse-error (type => 'not HTML5', token => $token);
937 } elsif (defined $token->{pubid}) {
938 ## Obsolete permitted DOCTYPEs (case-sensitive)
939 my $xsysid = {
940 '-//W3C//DTD HTML 4.0//EN' => 'http://www.w3.org/TR/REC-html40/strict.dtd',
941 '-//W3C//DTD HTML 4.01//EN' => 'http://www.w3.org/TR/html4/strict.dtd',
942 '-//W3C//DTD XHTML 1.0 Strict//EN' => 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd',
943 '-//W3C//DTD XHTML 1.1//EN' => 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd',
944 }->{$token->{pubid}};
945 if (defined $xsysid and
946 (not defined $token->{sysid} or $token->{sysid} eq $xsysid)) {
947 !!!cp ('t2');
948 !!!parse-error (type => 'obs DOCTYPE', token => $token,
949 level => $self->{level}->{obc}); ## XXX error type
950 } else {
951 !!!cp ('t2.1');
952 !!!parse-error (type => 'not HTML5', token => $token);
953 }
954 } elsif (defined $token->{sysid}) {
955 if ($token->{sysid} eq 'about:legacy-compat') {
956 !!!cp ('t1.2'); ## <!DOCTYPE HTML SYSTEM "about:legacy-compat">
957 !!!parse-error (type => 'XSLT-compat', token => $token,
958 level => $self->{level}->{should});
959 } else {
960 !!!parse-error (type => 'not HTML5', token => $token);
961 }
962 } else { ## <!DOCTYPE HTML>
963 !!!cp ('t3');
964 #
965 }
966
967 ## NOTE: Default value for both |public_id| and |system_id| attributes
968 ## are empty strings, so that we don't set any value in missing cases.
969 $doctype->public_id ($token->{pubid}) if defined $token->{pubid};
970 $doctype->system_id ($token->{sysid}) if defined $token->{sysid};
971
972 ## NOTE: Other DocumentType attributes are null or empty lists.
973 ## In Firefox3, |internalSubset| attribute is set to the empty
974 ## string, while |null| is an allowed value for the attribute
975 ## according to DOM3 Core.
976 $self->{document}->append_child ($doctype);
977
978 if ($token->{quirks} or $doctype_name ne 'html') {
979 !!!cp ('t4');
980 $self->{document}->manakai_compat_mode ('quirks');
981 } elsif (defined $token->{pubid}) {
982 my $pubid = $token->{pubid};
983 $pubid =~ tr/a-z/A-Z/; ## ASCII case-insensitive.
984 my $prefix = [
985 "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
986 "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
987 "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
988 "-//IETF//DTD HTML 2.0 LEVEL 1//",
989 "-//IETF//DTD HTML 2.0 LEVEL 2//",
990 "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
991 "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
992 "-//IETF//DTD HTML 2.0 STRICT//",
993 "-//IETF//DTD HTML 2.0//",
994 "-//IETF//DTD HTML 2.1E//",
995 "-//IETF//DTD HTML 3.0//",
996 "-//IETF//DTD HTML 3.2 FINAL//",
997 "-//IETF//DTD HTML 3.2//",
998 "-//IETF//DTD HTML 3//",
999 "-//IETF//DTD HTML LEVEL 0//",
1000 "-//IETF//DTD HTML LEVEL 1//",
1001 "-//IETF//DTD HTML LEVEL 2//",
1002 "-//IETF//DTD HTML LEVEL 3//",
1003 "-//IETF//DTD HTML STRICT LEVEL 0//",
1004 "-//IETF//DTD HTML STRICT LEVEL 1//",
1005 "-//IETF//DTD HTML STRICT LEVEL 2//",
1006 "-//IETF//DTD HTML STRICT LEVEL 3//",
1007 "-//IETF//DTD HTML STRICT//",
1008 "-//IETF//DTD HTML//",
1009 "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
1010 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
1011 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
1012 "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
1013 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
1014 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
1015 "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
1016 "-//NETSCAPE COMM. CORP.//DTD HTML//",
1017 "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
1018 "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
1019 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
1020 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
1021 "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
1022 "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
1023 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
1024 "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
1025 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
1026 "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
1027 "-//W3C//DTD HTML 3 1995-03-24//",
1028 "-//W3C//DTD HTML 3.2 DRAFT//",
1029 "-//W3C//DTD HTML 3.2 FINAL//",
1030 "-//W3C//DTD HTML 3.2//",
1031 "-//W3C//DTD HTML 3.2S DRAFT//",
1032 "-//W3C//DTD HTML 4.0 FRAMESET//",
1033 "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
1034 "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
1035 "-//W3C//DTD HTML EXPERIMENTAL 970421//",
1036 "-//W3C//DTD W3 HTML//",
1037 "-//W3O//DTD W3 HTML 3.0//",
1038 "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
1039 "-//WEBTECHS//DTD MOZILLA HTML//",
1040 ]; # $prefix
1041 my $match;
1042 for (@$prefix) {
1043 if (substr ($prefix, 0, length $_) eq $_) {
1044 $match = 1;
1045 last;
1046 }
1047 }
1048 if ($match or
1049 $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
1050 $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
1051 $pubid eq "HTML") {
1052 !!!cp ('t5');
1053 $self->{document}->manakai_compat_mode ('quirks');
1054 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
1055 $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
1056 if (defined $token->{sysid}) {
1057 !!!cp ('t6');
1058 $self->{document}->manakai_compat_mode ('quirks');
1059 } else {
1060 !!!cp ('t7');
1061 $self->{document}->manakai_compat_mode ('limited quirks');
1062 }
1063 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
1064 $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
1065 !!!cp ('t8');
1066 $self->{document}->manakai_compat_mode ('limited quirks');
1067 } else {
1068 !!!cp ('t9');
1069 }
1070 } else {
1071 !!!cp ('t10');
1072 }
1073 if (defined $token->{sysid}) {
1074 my $sysid = $token->{sysid};
1075 $sysid =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
1076 if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1077 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"|
1078 ## is signaled as in quirks mode!
1079 $self->{document}->manakai_compat_mode ('quirks');
1080 !!!cp ('t11');
1081 } else {
1082 !!!cp ('t12');
1083 }
1084 } else {
1085 !!!cp ('t13');
1086 }
1087
1088 ## Go to the "before html" insertion mode.
1089 !!!next-token;
1090 return;
1091 } elsif ({
1092 START_TAG_TOKEN, 1,
1093 END_TAG_TOKEN, 1,
1094 END_OF_FILE_TOKEN, 1,
1095 }->{$token->{type}}) {
1096 !!!cp ('t14');
1097 !!!parse-error (type => 'no DOCTYPE', token => $token);
1098 $self->{document}->manakai_compat_mode ('quirks');
1099 ## Go to the "before html" insertion mode.
1100 ## reprocess
1101 !!!ack-later;
1102 return;
1103 } elsif ($token->{type} == CHARACTER_TOKEN) {
1104 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1105 ## Ignore the token
1106
1107 unless (length $token->{data}) {
1108 !!!cp ('t15');
1109 ## Stay in the insertion mode.
1110 !!!next-token;
1111 redo INITIAL;
1112 } else {
1113 !!!cp ('t16');
1114 }
1115 } else {
1116 !!!cp ('t17');
1117 }
1118
1119 !!!parse-error (type => 'no DOCTYPE', token => $token);
1120 $self->{document}->manakai_compat_mode ('quirks');
1121 ## Go to the "before html" insertion mode.
1122 ## reprocess
1123 return;
1124 } elsif ($token->{type} == COMMENT_TOKEN) {
1125 !!!cp ('t18');
1126 my $comment = $self->{document}->create_comment ($token->{data});
1127 $self->{document}->append_child ($comment);
1128
1129 ## Stay in the insertion mode.
1130 !!!next-token;
1131 redo INITIAL;
1132 } else {
1133 die "$0: $token->{type}: Unknown token type";
1134 }
1135 } # INITIAL
1136
1137 die "$0: _tree_construction_initial: This should be never reached";
1138 } # _tree_construction_initial
1139
1140 sub _tree_construction_root_element ($) {
1141 my $self = shift;
1142
1143 ## NOTE: "before html" insertion mode.
1144
1145 B: {
1146 if ($token->{type} == DOCTYPE_TOKEN) {
1147 !!!cp ('t19');
1148 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1149 ## Ignore the token
1150 ## Stay in the insertion mode.
1151 !!!next-token;
1152 redo B;
1153 } elsif ($token->{type} == COMMENT_TOKEN) {
1154 !!!cp ('t20');
1155 my $comment = $self->{document}->create_comment ($token->{data});
1156 $self->{document}->append_child ($comment);
1157 ## Stay in the insertion mode.
1158 !!!next-token;
1159 redo B;
1160 } elsif ($token->{type} == CHARACTER_TOKEN) {
1161 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1162 ## Ignore the token.
1163
1164 unless (length $token->{data}) {
1165 !!!cp ('t21');
1166 ## Stay in the insertion mode.
1167 !!!next-token;
1168 redo B;
1169 } else {
1170 !!!cp ('t22');
1171 }
1172 } else {
1173 !!!cp ('t23');
1174 }
1175
1176 $self->{application_cache_selection}->(undef);
1177
1178 #
1179 } elsif ($token->{type} == START_TAG_TOKEN) {
1180 if ($token->{tag_name} eq 'html') {
1181 my $root_element;
1182 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
1183 $self->{document}->append_child ($root_element);
1184 push @{$self->{open_elements}},
1185 [$root_element, $el_category->{html}];
1186
1187 if ($token->{attributes}->{manifest}) {
1188 !!!cp ('t24');
1189 $self->{application_cache_selection}
1190 ->($token->{attributes}->{manifest}->{value});
1191 ## ISSUE: Spec is unclear on relative references.
1192 ## According to Hixie (#whatwg 2008-03-19), it should be
1193 ## resolved against the base URI of the document in HTML
1194 ## or xml:base of the element in XHTML.
1195 } else {
1196 !!!cp ('t25');
1197 $self->{application_cache_selection}->(undef);
1198 }
1199
1200 !!!nack ('t25c');
1201
1202 !!!next-token;
1203 return; ## Go to the "before head" insertion mode.
1204 } else {
1205 !!!cp ('t25.1');
1206 #
1207 }
1208 } elsif ({
1209 END_TAG_TOKEN, 1,
1210 END_OF_FILE_TOKEN, 1,
1211 }->{$token->{type}}) {
1212 !!!cp ('t26');
1213 #
1214 } else {
1215 die "$0: $token->{type}: Unknown token type";
1216 }
1217
1218 my $root_element;
1219 !!!create-element ($root_element, $HTML_NS, 'html',, $token);
1220 $self->{document}->append_child ($root_element);
1221 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
1222
1223 $self->{application_cache_selection}->(undef);
1224
1225 ## NOTE: Reprocess the token.
1226 !!!ack-later;
1227 return; ## Go to the "before head" insertion mode.
1228 } # B
1229
1230 die "$0: _tree_construction_root_element: This should never be reached";
1231 } # _tree_construction_root_element
1232
1233 sub _reset_insertion_mode ($) {
1234 my $self = shift;
1235
1236 ## Step 1
1237 my $last;
1238
1239 ## Step 2
1240 my $i = -1;
1241 my $node = $self->{open_elements}->[$i];
1242
1243 ## Step 3
1244 S3: {
1245 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
1246 $last = 1;
1247 if (defined $self->{inner_html_node}) {
1248 !!!cp ('t28');
1249 $node = $self->{inner_html_node};
1250 } else {
1251 die "_reset_insertion_mode: t27";
1252 }
1253 }
1254
1255 ## Step 4..14
1256 my $new_mode;
1257 if ($node->[1] & FOREIGN_EL) {
1258 !!!cp ('t28.1');
1259 ## NOTE: Strictly spaking, the line below only applies to MathML and
1260 ## SVG elements. Currently the HTML syntax supports only MathML and
1261 ## SVG elements as foreigners.
1262 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
1263 } elsif ($node->[1] == TABLE_CELL_EL) {
1264 if ($last) {
1265 !!!cp ('t28.2');
1266 #
1267 } else {
1268 !!!cp ('t28.3');
1269 $new_mode = IN_CELL_IM;
1270 }
1271 } else {
1272 !!!cp ('t28.4');
1273 $new_mode = {
1274 select => IN_SELECT_IM,
1275 ## NOTE: |option| and |optgroup| do not set
1276 ## insertion mode to "in select" by themselves.
1277 tr => IN_ROW_IM,
1278 tbody => IN_TABLE_BODY_IM,
1279 thead => IN_TABLE_BODY_IM,
1280 tfoot => IN_TABLE_BODY_IM,
1281 caption => IN_CAPTION_IM,
1282 colgroup => IN_COLUMN_GROUP_IM,
1283 table => IN_TABLE_IM,
1284 head => IN_BODY_IM, # not in head!
1285 body => IN_BODY_IM,
1286 frameset => IN_FRAMESET_IM,
1287 }->{$node->[0]->manakai_local_name};
1288 }
1289 $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1290
1291 ## Step 15
1292 if ($node->[1] == HTML_EL) {
1293 unless (defined $self->{head_element}) {
1294 !!!cp ('t29');
1295 $self->{insertion_mode} = BEFORE_HEAD_IM;
1296 } else {
1297 ## ISSUE: Can this state be reached?
1298 !!!cp ('t30');
1299 $self->{insertion_mode} = AFTER_HEAD_IM;
1300 }
1301 return;
1302 } else {
1303 !!!cp ('t31');
1304 }
1305
1306 ## Step 16
1307 $self->{insertion_mode} = IN_BODY_IM and return if $last;
1308
1309 ## Step 17
1310 $i--;
1311 $node = $self->{open_elements}->[$i];
1312
1313 ## Step 18
1314 redo S3;
1315 } # S3
1316
1317 die "$0: _reset_insertion_mode: This line should never be reached";
1318 } # _reset_insertion_mode
1319
1320 sub _tree_construction_main ($) {
1321 my $self = shift;
1322
1323 my $active_formatting_elements = [];
1324
1325 my $reconstruct_active_formatting_elements = sub { # MUST
1326 my $insert = shift;
1327
1328 ## Step 1
1329 return unless @$active_formatting_elements;
1330
1331 ## Step 3
1332 my $i = -1;
1333 my $entry = $active_formatting_elements->[$i];
1334
1335 ## Step 2
1336 return if $entry->[0] eq '#marker';
1337 for (@{$self->{open_elements}}) {
1338 if ($entry->[0] eq $_->[0]) {
1339 !!!cp ('t32');
1340 return;
1341 }
1342 }
1343
1344 S4: {
1345 ## Step 4
1346 last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1347
1348 ## Step 5
1349 $i--;
1350 $entry = $active_formatting_elements->[$i];
1351
1352 ## Step 6
1353 if ($entry->[0] eq '#marker') {
1354 !!!cp ('t33_1');
1355 #
1356 } else {
1357 my $in_open_elements;
1358 OE: for (@{$self->{open_elements}}) {
1359 if ($entry->[0] eq $_->[0]) {
1360 !!!cp ('t33');
1361 $in_open_elements = 1;
1362 last OE;
1363 }
1364 }
1365 if ($in_open_elements) {
1366 !!!cp ('t34');
1367 #
1368 } else {
1369 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
1370 !!!cp ('t35');
1371 redo S4;
1372 }
1373 }
1374
1375 ## Step 7
1376 $i++;
1377 $entry = $active_formatting_elements->[$i];
1378 } # S4
1379
1380 S7: {
1381 ## Step 8
1382 my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1383
1384 ## Step 9
1385 $insert->($clone->[0]);
1386 push @{$self->{open_elements}}, $clone;
1387
1388 ## Step 10
1389 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1390
1391 ## Step 11
1392 unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1393 !!!cp ('t36');
1394 ## Step 7'
1395 $i++;
1396 $entry = $active_formatting_elements->[$i];
1397
1398 redo S7;
1399 }
1400
1401 !!!cp ('t37');
1402 } # S7
1403 }; # $reconstruct_active_formatting_elements
1404
1405 my $clear_up_to_marker = sub {
1406 for (reverse 0..$#$active_formatting_elements) {
1407 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1408 !!!cp ('t38');
1409 splice @$active_formatting_elements, $_;
1410 return;
1411 }
1412 }
1413
1414 !!!cp ('t39');
1415 }; # $clear_up_to_marker
1416
1417 my $insert;
1418
1419 my $parse_rcdata = sub ($) {
1420 my ($content_model_flag) = @_;
1421
1422 ## Step 1
1423 my $start_tag_name = $token->{tag_name};
1424 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
1425
1426 ## Step 2
1427 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
1428 delete $self->{escape}; # MUST
1429
1430 ## Step 3, 4
1431 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1432
1433 !!!nack ('t40.1');
1434 !!!next-token;
1435 }; # $parse_rcdata
1436
1437 my $script_start_tag = sub () {
1438 ## Step 1
1439 my $script_el;
1440 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
1441
1442 ## Step 2
1443 ## TODO: mark as "parser-inserted"
1444
1445 ## Step 3
1446 ## TODO: Mark as "already executed", if ...
1447
1448 ## Step 4 (HTML5 revision 2702)
1449 $insert->($script_el);
1450 push @{$self->{open_elements}}, [$script_el, $el_category->{script}];
1451
1452 ## Step 5
1453 $self->{content_model} = CDATA_CONTENT_MODEL;
1454 delete $self->{escape}; # MUST
1455
1456 ## Step 6-7
1457 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1458
1459 !!!nack ('t40.2');
1460 !!!next-token;
1461 }; # $script_start_tag
1462
1463 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
1464 ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag (OBSOLETE; unused).
1465 ## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted.
1466 my $open_tables = [[$self->{open_elements}->[0]->[0]]];
1467
1468 my $formatting_end_tag = sub {
1469 my $end_tag_token = shift;
1470 my $tag_name = $end_tag_token->{tag_name};
1471
1472 ## NOTE: The adoption agency algorithm (AAA).
1473
1474 FET: {
1475 ## Step 1
1476 my $formatting_element;
1477 my $formatting_element_i_in_active;
1478 AFE: for (reverse 0..$#$active_formatting_elements) {
1479 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1480 !!!cp ('t52');
1481 last AFE;
1482 } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
1483 eq $tag_name) {
1484 !!!cp ('t51');
1485 $formatting_element = $active_formatting_elements->[$_];
1486 $formatting_element_i_in_active = $_;
1487 last AFE;
1488 }
1489 } # AFE
1490 unless (defined $formatting_element) {
1491 !!!cp ('t53');
1492 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
1493 ## Ignore the token
1494 !!!next-token;
1495 return;
1496 }
1497 ## has an element in scope
1498 my $in_scope = 1;
1499 my $formatting_element_i_in_open;
1500 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1501 my $node = $self->{open_elements}->[$_];
1502 if ($node->[0] eq $formatting_element->[0]) {
1503 if ($in_scope) {
1504 !!!cp ('t54');
1505 $formatting_element_i_in_open = $_;
1506 last INSCOPE;
1507 } else { # in open elements but not in scope
1508 !!!cp ('t55');
1509 !!!parse-error (type => 'unmatched end tag',
1510 text => $token->{tag_name},
1511 token => $end_tag_token);
1512 ## Ignore the token
1513 !!!next-token;
1514 return;
1515 }
1516 } elsif ($node->[1] & SCOPING_EL) {
1517 !!!cp ('t56');
1518 $in_scope = 0;
1519 }
1520 } # INSCOPE
1521 unless (defined $formatting_element_i_in_open) {
1522 !!!cp ('t57');
1523 !!!parse-error (type => 'unmatched end tag',
1524 text => $token->{tag_name},
1525 token => $end_tag_token);
1526 pop @$active_formatting_elements; # $formatting_element
1527 !!!next-token; ## TODO: ok?
1528 return;
1529 }
1530 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1531 !!!cp ('t58');
1532 !!!parse-error (type => 'not closed',
1533 text => $self->{open_elements}->[-1]->[0]
1534 ->manakai_local_name,
1535 token => $end_tag_token);
1536 }
1537
1538 ## Step 2
1539 my $furthest_block;
1540 my $furthest_block_i_in_open;
1541 OE: for (reverse 0..$#{$self->{open_elements}}) {
1542 my $node = $self->{open_elements}->[$_];
1543 if (not ($node->[1] & FORMATTING_EL) and
1544 #not $phrasing_category->{$node->[1]} and
1545 ($node->[1] & SPECIAL_EL or
1546 $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
1547 !!!cp ('t59');
1548 $furthest_block = $node;
1549 $furthest_block_i_in_open = $_;
1550 ## NOTE: The topmost (eldest) node.
1551 } elsif ($node->[0] eq $formatting_element->[0]) {
1552 !!!cp ('t60');
1553 last OE;
1554 }
1555 } # OE
1556
1557 ## Step 3
1558 unless (defined $furthest_block) { # MUST
1559 !!!cp ('t61');
1560 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
1561 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1562 !!!next-token;
1563 return;
1564 }
1565
1566 ## Step 4
1567 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
1568
1569 ## Step 5
1570 my $furthest_block_parent = $furthest_block->[0]->parent_node;
1571 if (defined $furthest_block_parent) {
1572 !!!cp ('t62');
1573 $furthest_block_parent->remove_child ($furthest_block->[0]);
1574 }
1575
1576 ## Step 6
1577 my $bookmark_prev_el
1578 = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1579 ->[0];
1580
1581 ## Step 7
1582 my $node = $furthest_block;
1583 my $node_i_in_open = $furthest_block_i_in_open;
1584 my $last_node = $furthest_block;
1585 S7: {
1586 ## Step 1
1587 $node_i_in_open--;
1588 $node = $self->{open_elements}->[$node_i_in_open];
1589
1590 ## Step 2
1591 my $node_i_in_active;
1592 S7S2: {
1593 for (reverse 0..$#$active_formatting_elements) {
1594 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1595 !!!cp ('t63');
1596 $node_i_in_active = $_;
1597 last S7S2;
1598 }
1599 }
1600 splice @{$self->{open_elements}}, $node_i_in_open, 1;
1601 redo S7;
1602 } # S7S2
1603
1604 ## Step 3
1605 last S7 if $node->[0] eq $formatting_element->[0];
1606
1607 ## Step 4
1608 if ($last_node->[0] eq $furthest_block->[0]) {
1609 !!!cp ('t64');
1610 $bookmark_prev_el = $node->[0];
1611 }
1612
1613 ## Step 5
1614 if ($node->[0]->has_child_nodes ()) {
1615 !!!cp ('t65');
1616 my $clone = [$node->[0]->clone_node (0), $node->[1]];
1617 $active_formatting_elements->[$node_i_in_active] = $clone;
1618 $self->{open_elements}->[$node_i_in_open] = $clone;
1619 $node = $clone;
1620 }
1621
1622 ## Step 6
1623 $node->[0]->append_child ($last_node->[0]);
1624
1625 ## Step 7
1626 $last_node = $node;
1627
1628 ## Step 8
1629 redo S7;
1630 } # S7
1631
1632 ## Step 8
1633 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
1634 ## Foster parenting.
1635 my $foster_parent_element;
1636 my $next_sibling;
1637 OE: for (reverse 0..$#{$self->{open_elements}}) {
1638 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1639 !!!cp ('t65.2');
1640 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1641 $next_sibling = $self->{open_elements}->[$_]->[0];
1642 undef $next_sibling
1643 unless $next_sibling->parent_node eq $foster_parent_element;
1644 last OE;
1645 }
1646 } # OE
1647 $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1648
1649 $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
1650 $open_tables->[-1]->[1] = 1; # tainted
1651 } else {
1652 !!!cp ('t65.3');
1653 $common_ancestor_node->[0]->append_child ($last_node->[0]);
1654 }
1655
1656 ## Step 9
1657 my $clone = [$formatting_element->[0]->clone_node (0),
1658 $formatting_element->[1]];
1659
1660 ## Step 10
1661 my @cn = @{$furthest_block->[0]->child_nodes};
1662 $clone->[0]->append_child ($_) for @cn;
1663
1664 ## Step 11
1665 $furthest_block->[0]->append_child ($clone->[0]);
1666
1667 ## Step 12
1668 my $i;
1669 AFE: for (reverse 0..$#$active_formatting_elements) {
1670 if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1671 !!!cp ('t66');
1672 splice @$active_formatting_elements, $_, 1;
1673 $i-- and last AFE if defined $i;
1674 } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1675 !!!cp ('t67');
1676 $i = $_;
1677 }
1678 } # AFE
1679 splice @$active_formatting_elements, $i + 1, 0, $clone;
1680
1681 ## Step 13
1682 undef $i;
1683 OE: for (reverse 0..$#{$self->{open_elements}}) {
1684 if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
1685 !!!cp ('t68');
1686 splice @{$self->{open_elements}}, $_, 1;
1687 $i-- and last OE if defined $i;
1688 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
1689 !!!cp ('t69');
1690 $i = $_;
1691 }
1692 } # OE
1693 splice @{$self->{open_elements}}, $i + 1, 0, $clone;
1694
1695 ## Step 14
1696 redo FET;
1697 } # FET
1698 }; # $formatting_end_tag
1699
1700 $insert = my $insert_to_current = sub {
1701 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
1702 }; # $insert_to_current
1703
1704 ## Foster parenting. Note that there are three "foster parenting"
1705 ## code in the parser: for elements (this one), for texts, and for
1706 ## elements in the AAA code.
1707 my $insert_to_foster = sub {
1708 my $child = shift;
1709 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1710 # MUST
1711 my $foster_parent_element;
1712 my $next_sibling;
1713 OE: for (reverse 0..$#{$self->{open_elements}}) {
1714 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1715 !!!cp ('t71');
1716 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1717 $next_sibling = $self->{open_elements}->[$_]->[0];
1718 undef $next_sibling
1719 unless $next_sibling->parent_node eq $foster_parent_element;
1720 last OE;
1721 }
1722 } # OE
1723 $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1724
1725 $foster_parent_element->insert_before ($child, $next_sibling);
1726 $open_tables->[-1]->[1] = 1; # tainted
1727 } else {
1728 !!!cp ('t72');
1729 $self->{open_elements}->[-1]->[0]->append_child ($child);
1730 }
1731 }; # $insert_to_foster
1732
1733 ## NOTE: Insert a character (MUST): When a character is inserted, if
1734 ## the last node that was inserted by the parser is a Text node and
1735 ## the character has to be inserted after that node, then the
1736 ## character is appended to the Text node. However, if any other
1737 ## node is inserted by the parser, then a new Text node is created
1738 ## and the character is appended as that Text node. If I'm not
1739 ## wrong, for a parser with scripting disabled, there are only two
1740 ## cases where this occurs. One is the case where an element node
1741 ## is inserted to the |head| element. This is covered by using the
1742 ## |$self->{head_element_inserted}| flag. Another is the case where
1743 ## an element or comment is inserted into the |table| subtree while
1744 ## foster parenting happens. This is covered by using the [2] flag
1745 ## of the |$open_tables| structure. All other cases are handled
1746 ## simply by calling |manakai_append_text| method.
1747
1748 ## TODO: |<body><script>document.write("a<br>");
1749 ## document.body.removeChild (document.body.lastChild);
1750 ## document.write ("b")</script>|
1751
1752 B: while (1) {
1753
1754 ## The "in table text" insertion mode.
1755 if ($self->{insertion_mode} & TABLE_IMS and
1756 not $self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
1757 not $self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1758 C: {
1759 my $s;
1760 if ($token->{type} == CHARACTER_TOKEN) {
1761 !!!cp ('t194');
1762 $self->{pending_chars} ||= [];
1763 push @{$self->{pending_chars}}, $token;
1764 !!!next-token;
1765 next B;
1766 } else {
1767 if ($self->{pending_chars}) {
1768 $s = join '', map { $_->{data} } @{$self->{pending_chars}};
1769 delete $self->{pending_chars};
1770 if ($s =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1771 !!!cp ('t195');
1772 #
1773 } else {
1774 !!!cp ('t195.1');
1775 #$self->{open_elements}->[-1]->[0]->manakai_append_text ($s);
1776 $self->{open_elements}->[-1]->[0]->append_child
1777 ($self->{document}->create_text_node ($s));
1778 last C;
1779 }
1780 } else {
1781 !!!cp ('t195.2');
1782 last C;
1783 }
1784 }
1785
1786 ## Foster parenting.
1787 !!!parse-error (type => 'in table:#text', token => $token);
1788
1789 ## NOTE: As if in body, but insert into the foster parent element.
1790 $reconstruct_active_formatting_elements->($insert_to_foster);
1791
1792 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1793 # MUST
1794 my $foster_parent_element;
1795 my $next_sibling;
1796 OE: for (reverse 0..$#{$self->{open_elements}}) {
1797 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1798 !!!cp ('t197');
1799 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1800 $next_sibling = $self->{open_elements}->[$_]->[0];
1801 undef $next_sibling
1802 unless $next_sibling->parent_node eq $foster_parent_element;
1803 last OE;
1804 }
1805 } # OE
1806 $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1807
1808 !!!cp ('t199');
1809 $foster_parent_element->insert_before
1810 ($self->{document}->create_text_node ($s), $next_sibling);
1811
1812 $open_tables->[-1]->[1] = 1; # tainted
1813 $open_tables->[-1]->[2] = 1; # ~node inserted
1814 } else {
1815 ## NOTE: Fragment case or in a foster parent'ed element
1816 ## (e.g. |<table><span>a|). In fragment case, whether the
1817 ## character is appended to existing node or a new node is
1818 ## created is irrelevant, since the foster parent'ed nodes
1819 ## are discarded and fragment parsing does not invoke any
1820 ## script.
1821 !!!cp ('t200');
1822 $self->{open_elements}->[-1]->[0]->manakai_append_text ($s);
1823 }
1824 } # C
1825 } # TABLE_IMS
1826
1827 if ($token->{type} == DOCTYPE_TOKEN) {
1828 !!!cp ('t73');
1829 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1830 ## Ignore the token
1831 ## Stay in the phase
1832 !!!next-token;
1833 next B;
1834 } elsif ($token->{type} == START_TAG_TOKEN and
1835 $token->{tag_name} eq 'html') {
1836 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
1837 !!!cp ('t79');
1838 !!!parse-error (type => 'after html', text => 'html', token => $token);
1839 $self->{insertion_mode} = AFTER_BODY_IM;
1840 } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
1841 !!!cp ('t80');
1842 !!!parse-error (type => 'after html', text => 'html', token => $token);
1843 $self->{insertion_mode} = AFTER_FRAMESET_IM;
1844 } else {
1845 !!!cp ('t81');
1846 }
1847
1848 !!!cp ('t82');
1849 !!!parse-error (type => 'not first start tag', token => $token);
1850 my $top_el = $self->{open_elements}->[0]->[0];
1851 for my $attr_name (keys %{$token->{attributes}}) {
1852 unless ($top_el->has_attribute_ns (undef, $attr_name)) {
1853 !!!cp ('t84');
1854 $top_el->set_attribute_ns
1855 (undef, [undef, $attr_name],
1856 $token->{attributes}->{$attr_name}->{value});
1857 }
1858 }
1859 !!!nack ('t84.1');
1860 !!!next-token;
1861 next B;
1862 } elsif ($token->{type} == COMMENT_TOKEN) {
1863 my $comment = $self->{document}->create_comment ($token->{data});
1864 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
1865 !!!cp ('t85');
1866 $self->{document}->append_child ($comment);
1867 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
1868 !!!cp ('t86');
1869 $self->{open_elements}->[0]->[0]->append_child ($comment);
1870 } else {
1871 !!!cp ('t87');
1872 $self->{open_elements}->[-1]->[0]->append_child ($comment);
1873 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
1874 }
1875 !!!next-token;
1876 next B;
1877 } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1878 if ($token->{type} == CHARACTER_TOKEN) {
1879 $token->{data} =~ s/^\x0A// if $self->{ignore_newline};
1880 delete $self->{ignore_newline};
1881
1882 if (length $token->{data}) {
1883 !!!cp ('t43');
1884 $self->{open_elements}->[-1]->[0]->manakai_append_text
1885 ($token->{data});
1886 } else {
1887 !!!cp ('t43.1');
1888 }
1889 !!!next-token;
1890 next B;
1891 } elsif ($token->{type} == END_TAG_TOKEN) {
1892 delete $self->{ignore_newline};
1893
1894 if ($token->{tag_name} eq 'script') {
1895 !!!cp ('t50');
1896
1897 ## Para 1-2
1898 my $script = pop @{$self->{open_elements}};
1899
1900 ## Para 3
1901 $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1902
1903 ## Para 4
1904 ## TODO: $old_insertion_point = $current_insertion_point;
1905 ## TODO: $current_insertion_point = just before $self->{nc};
1906
1907 ## Para 5
1908 ## TODO: Run the $script->[0].
1909
1910 ## Para 6
1911 ## TODO: $current_insertion_point = $old_insertion_point;
1912
1913 ## Para 7
1914 ## TODO: if ($pending_external_script) {
1915 ## TODO: ...
1916 ## TODO: }
1917
1918 !!!next-token;
1919 next B;
1920 } else {
1921 !!!cp ('t42');
1922
1923 pop @{$self->{open_elements}};
1924
1925 $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1926 !!!next-token;
1927 next B;
1928 }
1929 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1930 delete $self->{ignore_newline};
1931
1932 !!!cp ('t44');
1933 !!!parse-error (type => 'not closed',
1934 text => $self->{open_elements}->[-1]->[0]
1935 ->manakai_local_name,
1936 token => $token);
1937
1938 #if ($self->{open_elements}->[-1]->[1] == SCRIPT_EL) {
1939 # ## TODO: Mark as "already executed"
1940 #}
1941
1942 pop @{$self->{open_elements}};
1943
1944 $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1945 ## Reprocess.
1946 next B;
1947 } else {
1948 die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type";
1949 }
1950 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
1951 if ($token->{type} == CHARACTER_TOKEN) {
1952 !!!cp ('t87.1');
1953
1954 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
1955
1956 if ($token->{data} =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1957 delete $self->{frameset_ok};
1958 }
1959
1960 !!!next-token;
1961 next B;
1962 } elsif ($token->{type} == START_TAG_TOKEN) {
1963 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
1964 $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
1965 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1966 ($token->{tag_name} eq 'svg' and
1967 $self->{open_elements}->[-1]->[1] == MML_AXML_EL)) {
1968 ## NOTE: "using the rules for secondary insertion mode"then"continue"
1969 !!!cp ('t87.2');
1970 #
1971 } elsif ({
1972 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
1973 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
1974 em => 1, embed => 1, h1 => 1, h2 => 1, h3 => 1,
1975 h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
1976 img => 1, li => 1, listing => 1, menu => 1, meta => 1,
1977 nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
1978 small => 1, span => 1, strong => 1, strike => 1, sub => 1,
1979 sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
1980 }->{$token->{tag_name}} or
1981 ($token->{tag_name} eq 'font' and
1982 ($token->{attributes}->{color} or
1983 $token->{attributes}->{face} or
1984 $token->{attributes}->{size}))) {
1985 !!!cp ('t87.2');
1986 !!!parse-error (type => 'not closed',
1987 text => $self->{open_elements}->[-1]->[0]
1988 ->manakai_local_name,
1989 token => $token);
1990
1991 pop @{$self->{open_elements}}
1992 while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1993
1994 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1995 ## Reprocess.
1996 next B;
1997 } else {
1998 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
1999 my $tag_name = $token->{tag_name};
2000 if ($nsuri eq $SVG_NS) {
2001 $tag_name = {
2002 altglyph => 'altGlyph',
2003 altglyphdef => 'altGlyphDef',
2004 altglyphitem => 'altGlyphItem',
2005 animatecolor => 'animateColor',
2006 animatemotion => 'animateMotion',
2007 animatetransform => 'animateTransform',
2008 clippath => 'clipPath',
2009 feblend => 'feBlend',
2010 fecolormatrix => 'feColorMatrix',
2011 fecomponenttransfer => 'feComponentTransfer',
2012 fecomposite => 'feComposite',
2013 feconvolvematrix => 'feConvolveMatrix',
2014 fediffuselighting => 'feDiffuseLighting',
2015 fedisplacementmap => 'feDisplacementMap',
2016 fedistantlight => 'feDistantLight',
2017 feflood => 'feFlood',
2018 fefunca => 'feFuncA',
2019 fefuncb => 'feFuncB',
2020 fefuncg => 'feFuncG',
2021 fefuncr => 'feFuncR',
2022 fegaussianblur => 'feGaussianBlur',
2023 feimage => 'feImage',
2024 femerge => 'feMerge',
2025 femergenode => 'feMergeNode',
2026 femorphology => 'feMorphology',
2027 feoffset => 'feOffset',
2028 fepointlight => 'fePointLight',
2029 fespecularlighting => 'feSpecularLighting',
2030 fespotlight => 'feSpotLight',
2031 fetile => 'feTile',
2032 feturbulence => 'feTurbulence',
2033 foreignobject => 'foreignObject',
2034 glyphref => 'glyphRef',
2035 lineargradient => 'linearGradient',
2036 radialgradient => 'radialGradient',
2037 #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
2038 textpath => 'textPath',
2039 }->{$tag_name} || $tag_name;
2040 }
2041
2042 ## "adjust SVG attributes" (SVG only) - done in insert-element-f
2043
2044 ## "adjust foreign attributes" - done in insert-element-f
2045
2046 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
2047
2048 if ($self->{self_closing}) {
2049 pop @{$self->{open_elements}};
2050 !!!ack ('t87.3');
2051 } else {
2052 !!!cp ('t87.4');
2053 }
2054
2055 !!!next-token;
2056 next B;
2057 }
2058 } elsif ($token->{type} == END_TAG_TOKEN) {
2059 ## NOTE: "using the rules for secondary insertion mode" then "continue"
2060 if ($token->{tag_name} eq 'script') {
2061 !!!cp ('t87.41');
2062 #
2063 ## XXXscript: Execute script here.
2064 } else {
2065 !!!cp ('t87.5');
2066 #
2067 }
2068 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2069 !!!cp ('t87.6');
2070 !!!parse-error (type => 'not closed',
2071 text => $self->{open_elements}->[-1]->[0]
2072 ->manakai_local_name,
2073 token => $token);
2074
2075 pop @{$self->{open_elements}}
2076 while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
2077
2078 ## NOTE: |<span><svg>| ... two parse errors, |<svg>| ... a parse error.
2079
2080 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
2081 ## Reprocess.
2082 next B;
2083 } else {
2084 die "$0: $token->{type}: Unknown token type";
2085 }
2086 }
2087
2088 if ($self->{insertion_mode} & HEAD_IMS) {
2089 if ($token->{type} == CHARACTER_TOKEN) {
2090 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
2091 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2092 if ($self->{head_element_inserted}) {
2093 !!!cp ('t88.3');
2094 $self->{open_elements}->[-1]->[0]->append_child
2095 ($self->{document}->create_text_node ($1));
2096 delete $self->{head_element_inserted};
2097 ## NOTE: |</head> <link> |
2098 #
2099 } else {
2100 !!!cp ('t88.2');
2101 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2102 ## NOTE: |</head> &#x20;|
2103 #
2104 }
2105 } else {
2106 !!!cp ('t88.1');
2107 ## Ignore the token.
2108 #
2109 }
2110 unless (length $token->{data}) {
2111 !!!cp ('t88');
2112 !!!next-token;
2113 next B;
2114 }
2115 ## TODO: set $token->{column} appropriately
2116 }
2117
2118 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2119 !!!cp ('t89');
2120 ## As if <head>
2121 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2122 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2123 push @{$self->{open_elements}},
2124 [$self->{head_element}, $el_category->{head}];
2125
2126 ## Reprocess in the "in head" insertion mode...
2127 pop @{$self->{open_elements}};
2128
2129 ## Reprocess in the "after head" insertion mode...
2130 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2131 !!!cp ('t90');
2132 ## As if </noscript>
2133 pop @{$self->{open_elements}};
2134 !!!parse-error (type => 'in noscript:#text', token => $token);
2135
2136 ## Reprocess in the "in head" insertion mode...
2137 ## As if </head>
2138 pop @{$self->{open_elements}};
2139
2140 ## Reprocess in the "after head" insertion mode...
2141 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2142 !!!cp ('t91');
2143 pop @{$self->{open_elements}};
2144
2145 ## Reprocess in the "after head" insertion mode...
2146 } else {
2147 !!!cp ('t92');
2148 }
2149
2150 ## "after head" insertion mode
2151 ## As if <body>
2152 !!!insert-element ('body',, $token);
2153 $self->{insertion_mode} = IN_BODY_IM;
2154 ## The "frameset-ok" flag is left unchanged in this case.
2155 ## Reporcess the token.
2156 next B;
2157 } elsif ($token->{type} == START_TAG_TOKEN) {
2158 if ($token->{tag_name} eq 'head') {
2159 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2160 !!!cp ('t93');
2161 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
2162 $self->{open_elements}->[-1]->[0]->append_child
2163 ($self->{head_element});
2164 push @{$self->{open_elements}},
2165 [$self->{head_element}, $el_category->{head}];
2166 $self->{insertion_mode} = IN_HEAD_IM;
2167 !!!nack ('t93.1');
2168 !!!next-token;
2169 next B;
2170 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2171 !!!cp ('t93.2');
2172 !!!parse-error (type => 'after head', text => 'head',
2173 token => $token);
2174 ## Ignore the token
2175 !!!nack ('t93.3');
2176 !!!next-token;
2177 next B;
2178 } else {
2179 !!!cp ('t95');
2180 !!!parse-error (type => 'in head:head',
2181 token => $token); # or in head noscript
2182 ## Ignore the token
2183 !!!nack ('t95.1');
2184 !!!next-token;
2185 next B;
2186 }
2187 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2188 !!!cp ('t96');
2189 ## As if <head>
2190 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2191 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2192 push @{$self->{open_elements}},
2193 [$self->{head_element}, $el_category->{head}];
2194
2195 $self->{insertion_mode} = IN_HEAD_IM;
2196 ## Reprocess in the "in head" insertion mode...
2197 } else {
2198 !!!cp ('t97');
2199 }
2200
2201 if ($token->{tag_name} eq 'base') {
2202 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2203 !!!cp ('t98');
2204 ## As if </noscript>
2205 pop @{$self->{open_elements}};
2206 !!!parse-error (type => 'in noscript', text => 'base',
2207 token => $token);
2208
2209 $self->{insertion_mode} = IN_HEAD_IM;
2210 ## Reprocess in the "in head" insertion mode...
2211 } else {
2212 !!!cp ('t99');
2213 }
2214
2215 ## NOTE: There is a "as if in head" code clone.
2216 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2217 !!!cp ('t100');
2218 !!!parse-error (type => 'after head',
2219 text => $token->{tag_name}, token => $token);
2220 push @{$self->{open_elements}},
2221 [$self->{head_element}, $el_category->{head}];
2222 $self->{head_element_inserted} = 1;
2223 } else {
2224 !!!cp ('t101');
2225 }
2226 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2227 pop @{$self->{open_elements}};
2228 pop @{$self->{open_elements}} # <head>
2229 if $self->{insertion_mode} == AFTER_HEAD_IM;
2230 !!!nack ('t101.1');
2231 !!!next-token;
2232 next B;
2233 } elsif ($token->{tag_name} eq 'link') {
2234 ## NOTE: There is a "as if in head" code clone.
2235 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2236 !!!cp ('t102');
2237 !!!parse-error (type => 'after head',
2238 text => $token->{tag_name}, token => $token);
2239 push @{$self->{open_elements}},
2240 [$self->{head_element}, $el_category->{head}];
2241 $self->{head_element_inserted} = 1;
2242 } else {
2243 !!!cp ('t103');
2244 }
2245 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2246 pop @{$self->{open_elements}};
2247 pop @{$self->{open_elements}} # <head>
2248 if $self->{insertion_mode} == AFTER_HEAD_IM;
2249 !!!ack ('t103.1');
2250 !!!next-token;
2251 next B;
2252 } elsif ($token->{tag_name} eq 'command') {
2253 if ($self->{insertion_mode} == IN_HEAD_IM) {
2254 ## NOTE: If the insertion mode at the time of the emission
2255 ## of the token was "before head", $self->{insertion_mode}
2256 ## is already changed to |IN_HEAD_IM|.
2257
2258 ## NOTE: There is a "as if in head" code clone.
2259 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2260 pop @{$self->{open_elements}};
2261 pop @{$self->{open_elements}} # <head>
2262 if $self->{insertion_mode} == AFTER_HEAD_IM;
2263 !!!ack ('t103.2');
2264 !!!next-token;
2265 next B;
2266 } else {
2267 ## NOTE: "in head noscript" or "after head" insertion mode
2268 ## - in these cases, these tags are treated as same as
2269 ## normal in-body tags.
2270 !!!cp ('t103.3');
2271 #
2272 }
2273 } elsif ($token->{tag_name} eq 'meta') {
2274 ## NOTE: There is a "as if in head" code clone.
2275 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2276 !!!cp ('t104');
2277 !!!parse-error (type => 'after head',
2278 text => $token->{tag_name}, token => $token);
2279 push @{$self->{open_elements}},
2280 [$self->{head_element}, $el_category->{head}];
2281 $self->{head_element_inserted} = 1;
2282 } else {
2283 !!!cp ('t105');
2284 }
2285 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2286 my $meta_el = pop @{$self->{open_elements}};
2287
2288 unless ($self->{confident}) {
2289 if ($token->{attributes}->{charset}) {
2290 !!!cp ('t106');
2291 ## NOTE: Whether the encoding is supported or not is handled
2292 ## in the {change_encoding} callback.
2293 $self->{change_encoding}
2294 ->($self, $token->{attributes}->{charset}->{value},
2295 $token);
2296
2297 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2298 ->set_user_data (manakai_has_reference =>
2299 $token->{attributes}->{charset}
2300 ->{has_reference});
2301 } elsif ($token->{attributes}->{content}) {
2302 if ($token->{attributes}->{content}->{value}
2303 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2304 [\x09\x0A\x0C\x0D\x20]*=
2305 [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
2306 ([^"'\x09\x0A\x0C\x0D\x20]
2307 [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
2308 !!!cp ('t107');
2309 ## NOTE: Whether the encoding is supported or not is handled
2310 ## in the {change_encoding} callback.
2311 $self->{change_encoding}
2312 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
2313 $token);
2314 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2315 ->set_user_data (manakai_has_reference =>
2316 $token->{attributes}->{content}
2317 ->{has_reference});
2318 } else {
2319 !!!cp ('t108');
2320 }
2321 }
2322 } else {
2323 if ($token->{attributes}->{charset}) {
2324 !!!cp ('t109');
2325 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2326 ->set_user_data (manakai_has_reference =>
2327 $token->{attributes}->{charset}
2328 ->{has_reference});
2329 }
2330 if ($token->{attributes}->{content}) {
2331 !!!cp ('t110');
2332 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2333 ->set_user_data (manakai_has_reference =>
2334 $token->{attributes}->{content}
2335 ->{has_reference});
2336 }
2337 }
2338
2339 pop @{$self->{open_elements}} # <head>
2340 if $self->{insertion_mode} == AFTER_HEAD_IM;
2341 !!!ack ('t110.1');
2342 !!!next-token;
2343 next B;
2344 } elsif ($token->{tag_name} eq 'title') {
2345 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2346 !!!cp ('t111');
2347 ## As if </noscript>
2348 pop @{$self->{open_elements}};
2349 !!!parse-error (type => 'in noscript', text => 'title',
2350 token => $token);
2351
2352 $self->{insertion_mode} = IN_HEAD_IM;
2353 ## Reprocess in the "in head" insertion mode...
2354 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2355 !!!cp ('t112');
2356 !!!parse-error (type => 'after head',
2357 text => $token->{tag_name}, token => $token);
2358 push @{$self->{open_elements}},
2359 [$self->{head_element}, $el_category->{head}];
2360 $self->{head_element_inserted} = 1;
2361 } else {
2362 !!!cp ('t113');
2363 }
2364
2365 ## NOTE: There is a "as if in head" code clone.
2366 $parse_rcdata->(RCDATA_CONTENT_MODEL);
2367
2368 ## NOTE: At this point the stack of open elements contain
2369 ## the |head| element (index == -2) and the |script| element
2370 ## (index == -1). In the "after head" insertion mode the
2371 ## |head| element is inserted only for the purpose of
2372 ## providing the context for the |script| element, and
2373 ## therefore we can now and have to remove the element from
2374 ## the stack.
2375 splice @{$self->{open_elements}}, -2, 1, () # <head>
2376 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2377 next B;
2378 } elsif ($token->{tag_name} eq 'style' or
2379 $token->{tag_name} eq 'noframes') {
2380 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
2381 ## insertion mode IN_HEAD_IM)
2382 ## NOTE: There is a "as if in head" code clone.
2383 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2384 !!!cp ('t114');
2385 !!!parse-error (type => 'after head',
2386 text => $token->{tag_name}, token => $token);
2387 push @{$self->{open_elements}},
2388 [$self->{head_element}, $el_category->{head}];
2389 $self->{head_element_inserted} = 1;
2390 } else {
2391 !!!cp ('t115');
2392 }
2393 $parse_rcdata->(CDATA_CONTENT_MODEL);
2394 ## ISSUE: A spec bug [Bug 6038]
2395 splice @{$self->{open_elements}}, -2, 1, () # <head>
2396 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2397 next B;
2398 } elsif ($token->{tag_name} eq 'noscript') {
2399 if ($self->{insertion_mode} == IN_HEAD_IM) {
2400 !!!cp ('t116');
2401 ## NOTE: and scripting is disalbed
2402 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2403 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
2404 !!!nack ('t116.1');
2405 !!!next-token;
2406 next B;
2407 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2408 !!!cp ('t117');
2409 !!!parse-error (type => 'in noscript', text => 'noscript',
2410 token => $token);
2411 ## Ignore the token
2412 !!!nack ('t117.1');
2413 !!!next-token;
2414 next B;
2415 } else {
2416 !!!cp ('t118');
2417 #
2418 }
2419 } elsif ($token->{tag_name} eq 'script') {
2420 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2421 !!!cp ('t119');
2422 ## As if </noscript>
2423 pop @{$self->{open_elements}};
2424 !!!parse-error (type => 'in noscript', text => 'script',
2425 token => $token);
2426
2427 $self->{insertion_mode} = IN_HEAD_IM;
2428 ## Reprocess in the "in head" insertion mode...
2429 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2430 !!!cp ('t120');
2431 !!!parse-error (type => 'after head',
2432 text => $token->{tag_name}, token => $token);
2433 push @{$self->{open_elements}},
2434 [$self->{head_element}, $el_category->{head}];
2435 $self->{head_element_inserted} = 1;
2436 } else {
2437 !!!cp ('t121');
2438 }
2439
2440 ## NOTE: There is a "as if in head" code clone.
2441 $script_start_tag->();
2442 ## ISSUE: A spec bug [Bug 6038]
2443 splice @{$self->{open_elements}}, -2, 1 # <head>
2444 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2445 next B;
2446 } elsif ($token->{tag_name} eq 'body' or
2447 $token->{tag_name} eq 'frameset') {
2448 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2449 !!!cp ('t122');
2450 ## As if </noscript>
2451 pop @{$self->{open_elements}};
2452 !!!parse-error (type => 'in noscript',
2453 text => $token->{tag_name}, token => $token);
2454
2455 ## Reprocess in the "in head" insertion mode...
2456 ## As if </head>
2457 pop @{$self->{open_elements}};
2458
2459 ## Reprocess in the "after head" insertion mode...
2460 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2461 !!!cp ('t124');
2462 pop @{$self->{open_elements}};
2463
2464 ## Reprocess in the "after head" insertion mode...
2465 } else {
2466 !!!cp ('t125');
2467 }
2468
2469 ## "after head" insertion mode
2470 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2471 if ($token->{tag_name} eq 'body') {
2472 !!!cp ('t126');
2473 delete $self->{frameset_ok};
2474 $self->{insertion_mode} = IN_BODY_IM;
2475 } elsif ($token->{tag_name} eq 'frameset') {
2476 !!!cp ('t127');
2477 $self->{insertion_mode} = IN_FRAMESET_IM;
2478 } else {
2479 die "$0: tag name: $self->{tag_name}";
2480 }
2481 !!!nack ('t127.1');
2482 !!!next-token;
2483 next B;
2484 } else {
2485 !!!cp ('t128');
2486 #
2487 }
2488
2489 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2490 !!!cp ('t129');
2491 ## As if </noscript>
2492 pop @{$self->{open_elements}};
2493 !!!parse-error (type => 'in noscript:/',
2494 text => $token->{tag_name}, token => $token);
2495
2496 ## Reprocess in the "in head" insertion mode...
2497 ## As if </head>
2498 pop @{$self->{open_elements}};
2499
2500 ## Reprocess in the "after head" insertion mode...
2501 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2502 !!!cp ('t130');
2503 ## As if </head>
2504 pop @{$self->{open_elements}};
2505
2506 ## Reprocess in the "after head" insertion mode...
2507 } else {
2508 !!!cp ('t131');
2509 }
2510
2511 ## "after head" insertion mode
2512 ## As if <body>
2513 !!!insert-element ('body',, $token);
2514 $self->{insertion_mode} = IN_BODY_IM;
2515 ## The "frameset-ok" flag is not changed in this case.
2516 ## Reprocess the token.
2517 !!!ack-later;
2518 next B;
2519 } elsif ($token->{type} == END_TAG_TOKEN) {
2520 ## "Before head", "in head", and "after head" insertion modes
2521 ## ignore most of end tags. Exceptions are "body", "html",
2522 ## and "br" end tags. "Before head" and "in head" insertion
2523 ## modes also recognize "head" end tag. "In head noscript"
2524 ## insertion modes ignore end tags except for "noscript" and
2525 ## "br".
2526
2527 if ($token->{tag_name} eq 'head') {
2528 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2529 !!!cp ('t132');
2530 ## As if <head>
2531 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2532 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2533 push @{$self->{open_elements}},
2534 [$self->{head_element}, $el_category->{head}];
2535
2536 ## Reprocess in the "in head" insertion mode...
2537 pop @{$self->{open_elements}};
2538 $self->{insertion_mode} = AFTER_HEAD_IM;
2539 !!!next-token;
2540 next B;
2541 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2542 !!!cp ('t133');
2543 #
2544 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2545 !!!cp ('t134');
2546 pop @{$self->{open_elements}};
2547 $self->{insertion_mode} = AFTER_HEAD_IM;
2548 !!!next-token;
2549 next B;
2550 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2551 !!!cp ('t134.1');
2552 #
2553 } else {
2554 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2555 }
2556 } elsif ($token->{tag_name} eq 'noscript') {
2557 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2558 !!!cp ('t136');
2559 pop @{$self->{open_elements}};
2560 $self->{insertion_mode} = IN_HEAD_IM;
2561 !!!next-token;
2562 next B;
2563 } else {
2564 !!!cp ('t138');
2565 #
2566 }
2567 } elsif ({
2568 body => ($self->{insertion_mode} != IN_HEAD_NOSCRIPT_IM),
2569 html => ($self->{insertion_mode} != IN_HEAD_NOSCRIPT_IM),
2570 br => 1,
2571 }->{$token->{tag_name}}) {
2572 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2573 !!!cp ('t142.2');
2574 ## (before head) as if <head>, (in head) as if </head>
2575 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2576 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2577 $self->{insertion_mode} = AFTER_HEAD_IM;
2578
2579 ## Reprocess in the "after head" insertion mode...
2580 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2581 !!!cp ('t143.2');
2582 ## As if </head>
2583 pop @{$self->{open_elements}};
2584 $self->{insertion_mode} = AFTER_HEAD_IM;
2585
2586 ## Reprocess in the "after head" insertion mode...
2587 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2588 !!!cp ('t143.3');
2589 ## NOTE: Two parse errors for <head><noscript></br>
2590 !!!parse-error (type => 'unmatched end tag',
2591 text => $token->{tag_name}, token => $token);
2592 ## As if </noscript>
2593 pop @{$self->{open_elements}};
2594 $self->{insertion_mode} = IN_HEAD_IM;
2595
2596 ## Reprocess in the "in head" insertion mode...
2597 ## As if </head>
2598 pop @{$self->{open_elements}};
2599 $self->{insertion_mode} = AFTER_HEAD_IM;
2600
2601 ## Reprocess in the "after head" insertion mode...
2602 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2603 !!!cp ('t143.4');
2604 #
2605 } else {
2606 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2607 }
2608
2609 ## "after head" insertion mode
2610 ## As if <body>
2611 !!!insert-element ('body',, $token);
2612 $self->{insertion_mode} = IN_BODY_IM;
2613 ## The "frameset-ok" flag is left unchanged in this case.
2614 ## Reprocess the token.
2615 next B;
2616 }
2617
2618 ## End tags are ignored by default.
2619 !!!cp ('t145');
2620 !!!parse-error (type => 'unmatched end tag',
2621 text => $token->{tag_name}, token => $token);
2622 ## Ignore the token.
2623 !!!next-token;
2624 next B;
2625 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2626 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2627 !!!cp ('t149.1');
2628
2629 ## NOTE: As if <head>
2630 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2631 $self->{open_elements}->[-1]->[0]->append_child
2632 ($self->{head_element});
2633 #push @{$self->{open_elements}},
2634 # [$self->{head_element}, $el_category->{head}];
2635 #$self->{insertion_mode} = IN_HEAD_IM;
2636 ## NOTE: Reprocess.
2637
2638 ## NOTE: As if </head>
2639 #pop @{$self->{open_elements}};
2640 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2641 ## NOTE: Reprocess.
2642
2643 #
2644 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2645 !!!cp ('t149.2');
2646
2647 ## NOTE: As if </head>
2648 pop @{$self->{open_elements}};
2649 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2650 ## NOTE: Reprocess.
2651
2652 #
2653 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2654 !!!cp ('t149.3');
2655
2656 !!!parse-error (type => 'in noscript:#eof', token => $token);
2657
2658 ## As if </noscript>
2659 pop @{$self->{open_elements}};
2660 #$self->{insertion_mode} = IN_HEAD_IM;
2661 ## NOTE: Reprocess.
2662
2663 ## NOTE: As if </head>
2664 pop @{$self->{open_elements}};
2665 #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2666 ## NOTE: Reprocess.
2667
2668 #
2669 } else {
2670 !!!cp ('t149.4');
2671 #
2672 }
2673
2674 ## NOTE: As if <body>
2675 !!!insert-element ('body',, $token);
2676 $self->{insertion_mode} = IN_BODY_IM;
2677 ## The "frameset-ok" flag is left unchanged in this case.
2678 ## Reprocess the token.
2679 next B;
2680 } else {
2681 die "$0: $token->{type}: Unknown token type";
2682 }
2683 } elsif ($self->{insertion_mode} & BODY_IMS) {
2684 if ($token->{type} == CHARACTER_TOKEN) {
2685 !!!cp ('t150');
2686 $reconstruct_active_formatting_elements->($insert_to_current);
2687
2688 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
2689
2690 if ($token->{data} =~ /[^\x09\x0A\x0C\x0D\x20]/) {
2691 delete $self->{frameset_ok};
2692 }
2693
2694 !!!next-token;
2695 next B;
2696 } elsif ($token->{type} == START_TAG_TOKEN) {
2697 if ({
2698 caption => 1, col => 1, colgroup => 1, tbody => 1,
2699 td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2700 }->{$token->{tag_name}}) {
2701 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2702 ## have an element in table scope
2703 for (reverse 0..$#{$self->{open_elements}}) {
2704 my $node = $self->{open_elements}->[$_];
2705 if ($node->[1] == TABLE_CELL_EL) {
2706 !!!cp ('t151');
2707
2708 ## Close the cell
2709 !!!back-token; # <x>
2710 $token = {type => END_TAG_TOKEN,
2711 tag_name => $node->[0]->manakai_local_name,
2712 line => $token->{line},
2713 column => $token->{column}};
2714 next B;
2715 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2716 !!!cp ('t152');
2717 ## ISSUE: This case can never be reached, maybe.
2718 last;
2719 }
2720 }
2721
2722 !!!cp ('t153');
2723 !!!parse-error (type => 'start tag not allowed',
2724 text => $token->{tag_name}, token => $token);
2725 ## Ignore the token
2726 !!!nack ('t153.1');
2727 !!!next-token;
2728 next B;
2729 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2730 !!!parse-error (type => 'not closed', text => 'caption',
2731 token => $token);
2732
2733 ## NOTE: As if </caption>.
2734 ## have a table element in table scope
2735 my $i;
2736 INSCOPE: {
2737 for (reverse 0..$#{$self->{open_elements}}) {
2738 my $node = $self->{open_elements}->[$_];
2739 if ($node->[1] == CAPTION_EL) {
2740 !!!cp ('t155');
2741 $i = $_;
2742 last INSCOPE;
2743 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2744 !!!cp ('t156');
2745 last;
2746 }
2747 }
2748
2749 !!!cp ('t157');
2750 !!!parse-error (type => 'start tag not allowed',
2751 text => $token->{tag_name}, token => $token);
2752 ## Ignore the token
2753 !!!nack ('t157.1');
2754 !!!next-token;
2755 next B;
2756 } # INSCOPE
2757
2758 ## generate implied end tags
2759 while ($self->{open_elements}->[-1]->[1]
2760 & END_TAG_OPTIONAL_EL) {
2761 !!!cp ('t158');
2762 pop @{$self->{open_elements}};
2763 }
2764
2765 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2766 !!!cp ('t159');
2767 !!!parse-error (type => 'not closed',
2768 text => $self->{open_elements}->[-1]->[0]
2769 ->manakai_local_name,
2770 token => $token);
2771 } else {
2772 !!!cp ('t160');
2773 }
2774
2775 splice @{$self->{open_elements}}, $i;
2776
2777 $clear_up_to_marker->();
2778
2779 $self->{insertion_mode} = IN_TABLE_IM;
2780
2781 ## reprocess
2782 !!!ack-later;
2783 next B;
2784 } else {
2785 !!!cp ('t161');
2786 #
2787 }
2788 } else {
2789 !!!cp ('t162');
2790 #
2791 }
2792 } elsif ($token->{type} == END_TAG_TOKEN) {
2793 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
2794 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2795 ## have an element in table scope
2796 my $i;
2797 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2798 my $node = $self->{open_elements}->[$_];
2799 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2800 !!!cp ('t163');
2801 $i = $_;
2802 last INSCOPE;
2803 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2804 !!!cp ('t164');
2805 last INSCOPE;
2806 }
2807 } # INSCOPE
2808 unless (defined $i) {
2809 !!!cp ('t165');
2810 !!!parse-error (type => 'unmatched end tag',
2811 text => $token->{tag_name},
2812 token => $token);
2813 ## Ignore the token
2814 !!!next-token;
2815 next B;
2816 }
2817
2818 ## generate implied end tags
2819 while ($self->{open_elements}->[-1]->[1]
2820 & END_TAG_OPTIONAL_EL) {
2821 !!!cp ('t166');
2822 pop @{$self->{open_elements}};
2823 }
2824
2825 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
2826 ne $token->{tag_name}) {
2827 !!!cp ('t167');
2828 !!!parse-error (type => 'not closed',
2829 text => $self->{open_elements}->[-1]->[0]
2830 ->manakai_local_name,
2831 token => $token);
2832 } else {
2833 !!!cp ('t168');
2834 }
2835
2836 splice @{$self->{open_elements}}, $i;
2837
2838 $clear_up_to_marker->();
2839
2840 $self->{insertion_mode} = IN_ROW_IM;
2841
2842 !!!next-token;
2843 next B;
2844 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2845 !!!cp ('t169');
2846 !!!parse-error (type => 'unmatched end tag',
2847 text => $token->{tag_name}, token => $token);
2848 ## Ignore the token
2849 !!!next-token;
2850 next B;
2851 } else {
2852 !!!cp ('t170');
2853 #
2854 }
2855 } elsif ($token->{tag_name} eq 'caption') {
2856 if (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2857 ## have a table element in table scope
2858 my $i;
2859 INSCOPE: {
2860 for (reverse 0..$#{$self->{open_elements}}) {
2861 my $node = $self->{open_elements}->[$_];
2862 if ($node->[1] == CAPTION_EL) {
2863 !!!cp ('t171');
2864 $i = $_;
2865 last INSCOPE;
2866 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2867 !!!cp ('t172');
2868 last;
2869 }
2870 }
2871
2872 !!!cp ('t173');
2873 !!!parse-error (type => 'unmatched end tag',
2874 text => $token->{tag_name}, token => $token);
2875 ## Ignore the token
2876 !!!next-token;
2877 next B;
2878 } # INSCOPE
2879
2880 ## generate implied end tags
2881 while ($self->{open_elements}->[-1]->[1]
2882 & END_TAG_OPTIONAL_EL) {
2883 !!!cp ('t174');
2884 pop @{$self->{open_elements}};
2885 }
2886
2887 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2888 !!!cp ('t175');
2889 !!!parse-error (type => 'not closed',
2890 text => $self->{open_elements}->[-1]->[0]
2891 ->manakai_local_name,
2892 token => $token);
2893 } else {
2894 !!!cp ('t176');
2895 }
2896
2897 splice @{$self->{open_elements}}, $i;
2898
2899 $clear_up_to_marker->();
2900
2901 $self->{insertion_mode} = IN_TABLE_IM;
2902
2903 !!!next-token;
2904 next B;
2905 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2906 !!!cp ('t177');
2907 !!!parse-error (type => 'unmatched end tag',
2908 text => $token->{tag_name}, token => $token);
2909 ## Ignore the token
2910 !!!next-token;
2911 next B;
2912 } else {
2913 !!!cp ('t178');
2914 #
2915 }
2916 } elsif ({
2917 table => 1, tbody => 1, tfoot => 1,
2918 thead => 1, tr => 1,
2919 }->{$token->{tag_name}} and
2920 ($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2921 ## have an element in table scope
2922 my $i;
2923 my $tn;
2924 INSCOPE: {
2925 for (reverse 0..$#{$self->{open_elements}}) {
2926 my $node = $self->{open_elements}->[$_];
2927 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2928 !!!cp ('t179');
2929 $i = $_;
2930
2931 ## Close the cell
2932 !!!back-token; # </x>
2933 $token = {type => END_TAG_TOKEN, tag_name => $tn,
2934 line => $token->{line},
2935 column => $token->{column}};
2936 next B;
2937 } elsif ($node->[1] == TABLE_CELL_EL) {
2938 !!!cp ('t180');
2939 $tn = $node->[0]->manakai_local_name;
2940 ## NOTE: There is exactly one |td| or |th| element
2941 ## in scope in the stack of open elements by definition.
2942 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2943 ## ISSUE: Can this be reached?
2944 !!!cp ('t181');
2945 last;
2946 }
2947 }
2948
2949 !!!cp ('t182');
2950 !!!parse-error (type => 'unmatched end tag',
2951 text => $token->{tag_name}, token => $token);
2952 ## Ignore the token
2953 !!!next-token;
2954 next B;
2955 } # INSCOPE
2956 } elsif ($token->{tag_name} eq 'table' and
2957 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2958 !!!parse-error (type => 'not closed', text => 'caption',
2959 token => $token);
2960
2961 ## As if </caption>
2962 ## have a table element in table scope
2963 my $i;
2964 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2965 my $node = $self->{open_elements}->[$_];
2966 if ($node->[1] == CAPTION_EL) {
2967 !!!cp ('t184');
2968 $i = $_;
2969 last INSCOPE;
2970 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2971 !!!cp ('t185');
2972 last INSCOPE;
2973 }
2974 } # INSCOPE
2975 unless (defined $i) {
2976 !!!cp ('t186');
2977 ## TODO: Wrong error type?
2978 !!!parse-error (type => 'unmatched end tag',
2979 text => 'caption', token => $token);
2980 ## Ignore the token
2981 !!!next-token;
2982 next B;
2983 }
2984
2985 ## generate implied end tags
2986 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
2987 !!!cp ('t187');
2988 pop @{$self->{open_elements}};
2989 }
2990
2991 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2992 !!!cp ('t188');
2993 !!!parse-error (type => 'not closed',
2994 text => $self->{open_elements}->[-1]->[0]
2995 ->manakai_local_name,
2996 token => $token);
2997 } else {
2998 !!!cp ('t189');
2999 }
3000
3001 splice @{$self->{open_elements}}, $i;
3002
3003 $clear_up_to_marker->();
3004
3005 $self->{insertion_mode} = IN_TABLE_IM;
3006
3007 ## reprocess
3008 next B;
3009 } elsif ({
3010 body => 1, col => 1, colgroup => 1, html => 1,
3011 }->{$token->{tag_name}}) {
3012 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
3013 !!!cp ('t190');
3014 !!!parse-error (type => 'unmatched end tag',
3015 text => $token->{tag_name}, token => $token);
3016 ## Ignore the token
3017 !!!next-token;
3018 next B;
3019 } else {
3020 !!!cp ('t191');
3021 #
3022 }
3023 } elsif ({
3024 tbody => 1, tfoot => 1,
3025 thead => 1, tr => 1,
3026 }->{$token->{tag_name}} and
3027 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
3028 !!!cp ('t192');
3029 !!!parse-error (type => 'unmatched end tag',
3030 text => $token->{tag_name}, token => $token);
3031 ## Ignore the token
3032 !!!next-token;
3033 next B;
3034 } else {
3035 !!!cp ('t193');
3036 #
3037 }
3038 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3039 for my $entry (@{$self->{open_elements}}) {
3040 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
3041 !!!cp ('t75');
3042 !!!parse-error (type => 'in body:#eof', token => $token);
3043 last;
3044 }
3045 }
3046
3047 ## Stop parsing.
3048 last B;
3049 } else {
3050 die "$0: $token->{type}: Unknown token type";
3051 }
3052
3053 $insert = $insert_to_current;
3054 #
3055 } elsif ($self->{insertion_mode} & TABLE_IMS) {
3056 if ($token->{type} == START_TAG_TOKEN) {
3057 if ({
3058 tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM),
3059 th => 1, td => 1,
3060 }->{$token->{tag_name}}) {
3061 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_IM) {
3062 ## Clear back to table context
3063 while (not ($self->{open_elements}->[-1]->[1]
3064 & TABLE_SCOPING_EL)) {
3065 !!!cp ('t201');
3066 pop @{$self->{open_elements}};
3067 }
3068
3069 !!!insert-element ('tbody',, $token);
3070 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3071 ## reprocess in the "in table body" insertion mode...
3072 }
3073
3074 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3075 unless ($token->{tag_name} eq 'tr') {
3076 !!!cp ('t202');
3077 !!!parse-error (type => 'missing start tag:tr', token => $token);
3078 }
3079
3080 ## Clear back to table body context
3081 while (not ($self->{open_elements}->[-1]->[1]
3082 & TABLE_ROWS_SCOPING_EL)) {
3083 !!!cp ('t203');
3084 ## ISSUE: Can this case be reached?
3085 pop @{$self->{open_elements}};
3086 }
3087
3088 $self->{insertion_mode} = IN_ROW_IM;
3089 if ($token->{tag_name} eq 'tr') {
3090 !!!cp ('t204');
3091 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3092 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3093 !!!nack ('t204');
3094 !!!next-token;
3095 next B;
3096 } else {
3097 !!!cp ('t205');
3098 !!!insert-element ('tr',, $token);
3099 ## reprocess in the "in row" insertion mode
3100 }
3101 } else {
3102 !!!cp ('t206');
3103 }
3104
3105 ## Clear back to table row context
3106 while (not ($self->{open_elements}->[-1]->[1]
3107 & TABLE_ROW_SCOPING_EL)) {
3108 !!!cp ('t207');
3109 pop @{$self->{open_elements}};
3110 }
3111
3112 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3113 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3114 $self->{insertion_mode} = IN_CELL_IM;
3115
3116 push @$active_formatting_elements, ['#marker', ''];
3117
3118 !!!nack ('t207.1');
3119 !!!next-token;
3120 next B;
3121 } elsif ({
3122 caption => 1, col => 1, colgroup => 1,
3123 tbody => 1, tfoot => 1, thead => 1,
3124 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3125 }->{$token->{tag_name}}) {
3126 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3127 ## As if </tr>
3128 ## have an element in table scope
3129 my $i;
3130 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3131 my $node = $self->{open_elements}->[$_];
3132 if ($node->[1] == TABLE_ROW_EL) {
3133 !!!cp ('t208');
3134 $i = $_;
3135 last INSCOPE;
3136 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3137 !!!cp ('t209');
3138 last INSCOPE;
3139 }
3140 } # INSCOPE
3141 unless (defined $i) {
3142 !!!cp ('t210');
3143 ## TODO: This type is wrong.
3144 !!!parse-error (type => 'unmacthed end tag',
3145 text => $token->{tag_name}, token => $token);
3146 ## Ignore the token
3147 !!!nack ('t210.1');
3148 !!!next-token;
3149 next B;
3150 }
3151
3152 ## Clear back to table row context
3153 while (not ($self->{open_elements}->[-1]->[1]
3154 & TABLE_ROW_SCOPING_EL)) {
3155 !!!cp ('t211');
3156 ## ISSUE: Can this case be reached?
3157 pop @{$self->{open_elements}};
3158 }
3159
3160 pop @{$self->{open_elements}}; # tr
3161 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3162 if ($token->{tag_name} eq 'tr') {
3163 !!!cp ('t212');
3164 ## reprocess
3165 !!!ack-later;
3166 next B;
3167 } else {
3168 !!!cp ('t213');
3169 ## reprocess in the "in table body" insertion mode...
3170 }
3171 }
3172
3173 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3174 ## have an element in table scope
3175 my $i;
3176 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3177 my $node = $self->{open_elements}->[$_];
3178 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3179 !!!cp ('t214');
3180 $i = $_;
3181 last INSCOPE;
3182 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3183 !!!cp ('t215');
3184 last INSCOPE;
3185 }
3186 } # INSCOPE
3187 unless (defined $i) {
3188 !!!cp ('t216');
3189 ## TODO: This erorr type is wrong.
3190 !!!parse-error (type => 'unmatched end tag',
3191 text => $token->{tag_name}, token => $token);
3192 ## Ignore the token
3193 !!!nack ('t216.1');
3194 !!!next-token;
3195 next B;
3196 }
3197
3198 ## Clear back to table body context
3199 while (not ($self->{open_elements}->[-1]->[1]
3200 & TABLE_ROWS_SCOPING_EL)) {
3201 !!!cp ('t217');
3202 ## ISSUE: Can this state be reached?
3203 pop @{$self->{open_elements}};
3204 }
3205
3206 ## As if <{current node}>
3207 ## have an element in table scope
3208 ## true by definition
3209
3210 ## Clear back to table body context
3211 ## nop by definition
3212
3213 pop @{$self->{open_elements}};
3214 $self->{insertion_mode} = IN_TABLE_IM;
3215 ## reprocess in "in table" insertion mode...
3216 } else {
3217 !!!cp ('t218');
3218 }
3219
3220 if ($token->{tag_name} eq 'col') {
3221 ## Clear back to table context
3222 while (not ($self->{open_elements}->[-1]->[1]
3223 & TABLE_SCOPING_EL)) {
3224 !!!cp ('t219');
3225 ## ISSUE: Can this state be reached?
3226 pop @{$self->{open_elements}};
3227 }
3228
3229 !!!insert-element ('colgroup',, $token);
3230 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
3231 ## reprocess
3232 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3233 !!!ack-later;
3234 next B;
3235 } elsif ({
3236 caption => 1,
3237 colgroup => 1,
3238 tbody => 1, tfoot => 1, thead => 1,
3239 }->{$token->{tag_name}}) {
3240 ## Clear back to table context
3241 while (not ($self->{open_elements}->[-1]->[1]
3242 & TABLE_SCOPING_EL)) {
3243 !!!cp ('t220');
3244 ## ISSUE: Can this state be reached?
3245 pop @{$self->{open_elements}};
3246 }
3247
3248 push @$active_formatting_elements, ['#marker', '']
3249 if $token->{tag_name} eq 'caption';
3250
3251 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3252 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3253 $self->{insertion_mode} = {
3254 caption => IN_CAPTION_IM,
3255 colgroup => IN_COLUMN_GROUP_IM,
3256 tbody => IN_TABLE_BODY_IM,
3257 tfoot => IN_TABLE_BODY_IM,
3258 thead => IN_TABLE_BODY_IM,
3259 }->{$token->{tag_name}};
3260 !!!next-token;
3261 !!!nack ('t220.1');
3262 next B;
3263 } else {
3264 die "$0: in table: <>: $token->{tag_name}";
3265 }
3266 } elsif ($token->{tag_name} eq 'table') {
3267 !!!parse-error (type => 'not closed',
3268 text => $self->{open_elements}->[-1]->[0]
3269 ->manakai_local_name,
3270 token => $token);
3271
3272 ## As if </table>
3273 ## have a table element in table scope
3274 my $i;
3275 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3276 my $node = $self->{open_elements}->[$_];
3277 if ($node->[1] == TABLE_EL) {
3278 !!!cp ('t221');
3279 $i = $_;
3280 last INSCOPE;
3281 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3282 !!!cp ('t222');
3283 last INSCOPE;
3284 }
3285 } # INSCOPE
3286 unless (defined $i) {
3287 !!!cp ('t223');
3288 ## TODO: The following is wrong, maybe.
3289 !!!parse-error (type => 'unmatched end tag', text => 'table',
3290 token => $token);
3291 ## Ignore tokens </table><table>
3292 !!!nack ('t223.1');
3293 !!!next-token;
3294 next B;
3295 }
3296
3297 ## TODO: Followings are removed from the latest spec.
3298 ## generate implied end tags
3299 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3300 !!!cp ('t224');
3301 pop @{$self->{open_elements}};
3302 }
3303
3304 unless ($self->{open_elements}->[-1]->[1] == TABLE_EL) {
3305 !!!cp ('t225');
3306 ## NOTE: |<table><tr><table>|
3307 !!!parse-error (type => 'not closed',
3308 text => $self->{open_elements}->[-1]->[0]
3309 ->manakai_local_name,
3310 token => $token);
3311 } else {
3312 !!!cp ('t226');
3313 }
3314
3315 splice @{$self->{open_elements}}, $i;
3316 pop @{$open_tables};
3317
3318 $self->_reset_insertion_mode;
3319
3320 ## reprocess
3321 !!!ack-later;
3322 next B;
3323 } elsif ($token->{tag_name} eq 'style') {
3324 !!!cp ('t227.8');
3325 ## NOTE: This is a "as if in head" code clone.
3326 $parse_rcdata->(CDATA_CONTENT_MODEL);
3327 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3328 next B;
3329 } elsif ($token->{tag_name} eq 'script') {
3330 !!!cp ('t227.6');
3331 ## NOTE: This is a "as if in head" code clone.
3332 $script_start_tag->();
3333 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3334 next B;
3335 } elsif ($token->{tag_name} eq 'input') {
3336 if ($token->{attributes}->{type}) {
3337 my $type = $token->{attributes}->{type}->{value};
3338 $type =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
3339 if ($type eq 'hidden') {
3340 !!!cp ('t227.3');
3341 !!!parse-error (type => 'in table',
3342 text => $token->{tag_name}, token => $token);
3343
3344 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3345 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3346
3347 ## TODO: form element pointer
3348
3349 pop @{$self->{open_elements}};
3350
3351 !!!next-token;
3352 !!!ack ('t227.2.1');
3353 next B;
3354 } else {
3355 !!!cp ('t227.1');
3356 #
3357 }
3358 } else {
3359 !!!cp ('t227.4');
3360 #
3361 }
3362 } else {
3363 !!!cp ('t227');
3364 #
3365 }
3366
3367 !!!parse-error (type => 'in table', text => $token->{tag_name},
3368 token => $token);
3369
3370 $insert = $insert_to_foster;
3371 #
3372 } elsif ($token->{type} == END_TAG_TOKEN) {
3373 if ($token->{tag_name} eq 'tr' and
3374 ($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3375 ## have an element in table scope
3376 my $i;
3377 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3378 my $node = $self->{open_elements}->[$_];
3379 if ($node->[1] == TABLE_ROW_EL) {
3380 !!!cp ('t228');
3381 $i = $_;
3382 last INSCOPE;
3383 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3384 !!!cp ('t229');
3385 last INSCOPE;
3386 }
3387 } # INSCOPE
3388 unless (defined $i) {
3389 !!!cp ('t230');
3390 !!!parse-error (type => 'unmatched end tag',
3391 text => $token->{tag_name}, token => $token);
3392 ## Ignore the token
3393 !!!nack ('t230.1');
3394 !!!next-token;
3395 next B;
3396 } else {
3397 !!!cp ('t232');
3398 }
3399
3400 ## Clear back to table row context
3401 while (not ($self->{open_elements}->[-1]->[1]
3402 & TABLE_ROW_SCOPING_EL)) {
3403 !!!cp ('t231');
3404 ## ISSUE: Can this state be reached?
3405 pop @{$self->{open_elements}};
3406 }
3407
3408 pop @{$self->{open_elements}}; # tr
3409 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3410 !!!next-token;
3411 !!!nack ('t231.1');
3412 next B;
3413 } elsif ($token->{tag_name} eq 'table') {
3414 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3415 ## As if </tr>
3416 ## have an element in table scope
3417 my $i;
3418 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3419 my $node = $self->{open_elements}->[$_];
3420 if ($node->[1] == TABLE_ROW_EL) {
3421 !!!cp ('t233');
3422 $i = $_;
3423 last INSCOPE;
3424 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3425 !!!cp ('t234');
3426 last INSCOPE;
3427 }
3428 } # INSCOPE
3429 unless (defined $i) {
3430 !!!cp ('t235');
3431 ## TODO: The following is wrong.
3432 !!!parse-error (type => 'unmatched end tag',
3433 text => $token->{type}, token => $token);
3434 ## Ignore the token
3435 !!!nack ('t236.1');
3436 !!!next-token;
3437 next B;
3438 }
3439
3440 ## Clear back to table row context
3441 while (not ($self->{open_elements}->[-1]->[1]
3442 & TABLE_ROW_SCOPING_EL)) {
3443 !!!cp ('t236');
3444 ## ISSUE: Can this state be reached?
3445 pop @{$self->{open_elements}};
3446 }
3447
3448 pop @{$self->{open_elements}}; # tr
3449 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3450 ## reprocess in the "in table body" insertion mode...
3451 }
3452
3453 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3454 ## have an element in table scope
3455 my $i;
3456 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3457 my $node = $self->{open_elements}->[$_];
3458 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3459 !!!cp ('t237');
3460 $i = $_;
3461 last INSCOPE;
3462 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3463 !!!cp ('t238');
3464 last INSCOPE;
3465 }
3466 } # INSCOPE
3467 unless (defined $i) {
3468 !!!cp ('t239');
3469 !!!parse-error (type => 'unmatched end tag',
3470 text => $token->{tag_name}, token => $token);
3471 ## Ignore the token
3472 !!!nack ('t239.1');
3473 !!!next-token;
3474 next B;
3475 }
3476
3477 ## Clear back to table body context
3478 while (not ($self->{open_elements}->[-1]->[1]
3479 & TABLE_ROWS_SCOPING_EL)) {
3480 !!!cp ('t240');
3481 pop @{$self->{open_elements}};
3482 }
3483
3484 ## As if <{current node}>
3485 ## have an element in table scope
3486 ## true by definition
3487
3488 ## Clear back to table body context
3489 ## nop by definition
3490
3491 pop @{$self->{open_elements}};
3492 $self->{insertion_mode} = IN_TABLE_IM;
3493 ## reprocess in the "in table" insertion mode...
3494 }
3495
3496 ## NOTE: </table> in the "in table" insertion mode.
3497 ## When you edit the code fragment below, please ensure that
3498 ## the code for <table> in the "in table" insertion mode
3499 ## is synced with it.
3500
3501 ## have a table element in table scope
3502 my $i;
3503 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3504 my $node = $self->{open_elements}->[$_];
3505 if ($node->[1] == TABLE_EL) {
3506 !!!cp ('t241');
3507 $i = $_;
3508 last INSCOPE;
3509 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3510 !!!cp ('t242');
3511 last INSCOPE;
3512 }
3513 } # INSCOPE
3514 unless (defined $i) {
3515 !!!cp ('t243');
3516 !!!parse-error (type => 'unmatched end tag',
3517 text => $token->{tag_name}, token => $token);
3518 ## Ignore the token
3519 !!!nack ('t243.1');
3520 !!!next-token;
3521 next B;
3522 }
3523
3524 splice @{$self->{open_elements}}, $i;
3525 pop @{$open_tables};
3526
3527 $self->_reset_insertion_mode;
3528
3529 !!!next-token;
3530 next B;
3531 } elsif ({
3532 tbody => 1, tfoot => 1, thead => 1,
3533 }->{$token->{tag_name}} and
3534 $self->{insertion_mode} & ROW_IMS) {
3535 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3536 ## have an element in table scope
3537 my $i;
3538 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3539 my $node = $self->{open_elements}->[$_];
3540 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3541 !!!cp ('t247');
3542 $i = $_;
3543 last INSCOPE;
3544 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3545 !!!cp ('t248');
3546 last INSCOPE;
3547 }
3548 } # INSCOPE
3549 unless (defined $i) {
3550 !!!cp ('t249');
3551 !!!parse-error (type => 'unmatched end tag',
3552 text => $token->{tag_name}, token => $token);
3553 ## Ignore the token
3554 !!!nack ('t249.1');
3555 !!!next-token;
3556 next B;
3557 }
3558
3559 ## As if </tr>
3560 ## have an element in table scope
3561 my $i;
3562 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3563 my $node = $self->{open_elements}->[$_];
3564 if ($node->[1] == TABLE_ROW_EL) {
3565 !!!cp ('t250');
3566 $i = $_;
3567 last INSCOPE;
3568 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3569 !!!cp ('t251');
3570 last INSCOPE;
3571 }
3572 } # INSCOPE
3573 unless (defined $i) {
3574 !!!cp ('t252');
3575 !!!parse-error (type => 'unmatched end tag',
3576 text => 'tr', token => $token);
3577 ## Ignore the token
3578 !!!nack ('t252.1');
3579 !!!next-token;
3580 next B;
3581 }
3582
3583 ## Clear back to table row context
3584 while (not ($self->{open_elements}->[-1]->[1]
3585 & TABLE_ROW_SCOPING_EL)) {
3586 !!!cp ('t253');
3587 ## ISSUE: Can this case be reached?
3588 pop @{$self->{open_elements}};
3589 }
3590
3591 pop @{$self->{open_elements}}; # tr
3592 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3593 ## reprocess in the "in table body" insertion mode...
3594 }
3595
3596 ## have an element in table scope
3597 my $i;
3598 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3599 my $node = $self->{open_elements}->[$_];
3600 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3601 !!!cp ('t254');
3602 $i = $_;
3603 last INSCOPE;
3604 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3605 !!!cp ('t255');
3606 last INSCOPE;
3607 }
3608 } # INSCOPE
3609 unless (defined $i) {
3610 !!!cp ('t256');
3611 !!!parse-error (type => 'unmatched end tag',
3612 text => $token->{tag_name}, token => $token);
3613 ## Ignore the token
3614 !!!nack ('t256.1');
3615 !!!next-token;
3616 next B;
3617 }
3618
3619 ## Clear back to table body context
3620 while (not ($self->{open_elements}->[-1]->[1]
3621 & TABLE_ROWS_SCOPING_EL)) {
3622 !!!cp ('t257');
3623 ## ISSUE: Can this case be reached?
3624 pop @{$self->{open_elements}};
3625 }
3626
3627 pop @{$self->{open_elements}};
3628 $self->{insertion_mode} = IN_TABLE_IM;
3629 !!!nack ('t257.1');
3630 !!!next-token;
3631 next B;
3632 } elsif ({
3633 body => 1, caption => 1, col => 1, colgroup => 1,
3634 html => 1, td => 1, th => 1,
3635 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3636 tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
3637 }->{$token->{tag_name}}) {
3638 !!!cp ('t258');
3639 !!!parse-error (type => 'unmatched end tag',
3640 text => $token->{tag_name}, token => $token);
3641 ## Ignore the token
3642 !!!nack ('t258.1');
3643 !!!next-token;
3644 next B;
3645 } else {
3646 !!!cp ('t259');
3647 !!!parse-error (type => 'in table:/',
3648 text => $token->{tag_name}, token => $token);
3649
3650 $insert = $insert_to_foster;
3651 #
3652 }
3653 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3654 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
3655 @{$self->{open_elements}} == 1) { # redundant, maybe
3656 !!!parse-error (type => 'in body:#eof', token => $token);
3657 !!!cp ('t259.1');
3658 #
3659 } else {
3660 !!!cp ('t259.2');
3661 #
3662 }
3663
3664 ## Stop parsing
3665 last B;
3666 } else {
3667 die "$0: $token->{type}: Unknown token type";
3668 }
3669 } elsif (($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
3670 if ($token->{type} == CHARACTER_TOKEN) {
3671 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3672 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3673 unless (length $token->{data}) {
3674 !!!cp ('t260');
3675 !!!next-token;
3676 next B;
3677 }
3678 }
3679
3680 !!!cp ('t261');
3681 #
3682 } elsif ($token->{type} == START_TAG_TOKEN) {
3683 if ($token->{tag_name} eq 'col') {
3684 !!!cp ('t262');
3685 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3686 pop @{$self->{open_elements}};
3687 !!!ack ('t262.1');
3688 !!!next-token;
3689 next B;
3690 } else {
3691 !!!cp ('t263');
3692 #
3693 }
3694 } elsif ($token->{type} == END_TAG_TOKEN) {
3695 if ($token->{tag_name} eq 'colgroup') {
3696 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3697 !!!cp ('t264');
3698 !!!parse-error (type => 'unmatched end tag',
3699 text => 'colgroup', token => $token);
3700 ## Ignore the token
3701 !!!next-token;
3702 next B;
3703 } else {
3704 !!!cp ('t265');
3705 pop @{$self->{open_elements}}; # colgroup
3706 $self->{insertion_mode} = IN_TABLE_IM;
3707 !!!next-token;
3708 next B;
3709 }
3710 } elsif ($token->{tag_name} eq 'col') {
3711 !!!cp ('t266');
3712 !!!parse-error (type => 'unmatched end tag',
3713 text => 'col', token => $token);
3714 ## Ignore the token
3715 !!!next-token;
3716 next B;
3717 } else {
3718 !!!cp ('t267');
3719 #
3720 }
3721 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3722 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
3723 @{$self->{open_elements}} == 1) { # redundant, maybe
3724 !!!cp ('t270.2');
3725 ## Stop parsing.
3726 last B;
3727 } else {
3728 ## NOTE: As if </colgroup>.
3729 !!!cp ('t270.1');
3730 pop @{$self->{open_elements}}; # colgroup
3731 $self->{insertion_mode} = IN_TABLE_IM;
3732 ## Reprocess.
3733 next B;
3734 }
3735 } else {
3736 die "$0: $token->{type}: Unknown token type";
3737 }
3738
3739 ## As if </colgroup>
3740 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3741 !!!cp ('t269');
3742 ## TODO: Wrong error type?
3743 !!!parse-error (type => 'unmatched end tag',
3744 text => 'colgroup', token => $token);
3745 ## Ignore the token
3746 !!!nack ('t269.1');
3747 !!!next-token;
3748 next B;
3749 } else {
3750 !!!cp ('t270');
3751 pop @{$self->{open_elements}}; # colgroup
3752 $self->{insertion_mode} = IN_TABLE_IM;
3753 !!!ack-later;
3754 ## reprocess
3755 next B;
3756 }
3757 } elsif ($self->{insertion_mode} & SELECT_IMS) {
3758 if ($token->{type} == CHARACTER_TOKEN) {
3759 !!!cp ('t271');
3760 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3761 !!!next-token;
3762 next B;
3763 } elsif ($token->{type} == START_TAG_TOKEN) {
3764 if ($token->{tag_name} eq 'option') {
3765 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3766 !!!cp ('t272');
3767 ## As if </option>
3768 pop @{$self->{open_elements}};
3769 } else {
3770 !!!cp ('t273');
3771 }
3772
3773 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3774 !!!nack ('t273.1');
3775 !!!next-token;
3776 next B;
3777 } elsif ($token->{tag_name} eq 'optgroup') {
3778 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3779 !!!cp ('t274');
3780 ## As if </option>
3781 pop @{$self->{open_elements}};
3782 } else {
3783 !!!cp ('t275');
3784 }
3785
3786 if ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3787 !!!cp ('t276');
3788 ## As if </optgroup>
3789 pop @{$self->{open_elements}};
3790 } else {
3791 !!!cp ('t277');
3792 }
3793
3794 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3795 !!!nack ('t277.1');
3796 !!!next-token;
3797 next B;
3798 } elsif ({
3799 select => 1, input => 1, textarea => 1, keygen => 1,
3800 }->{$token->{tag_name}} or
3801 (($self->{insertion_mode} & IM_MASK)
3802 == IN_SELECT_IN_TABLE_IM and
3803 {
3804 caption => 1, table => 1,
3805 tbody => 1, tfoot => 1, thead => 1,
3806 tr => 1, td => 1, th => 1,
3807 }->{$token->{tag_name}})) {
3808
3809 ## 1. Parse error.
3810 if ($token->{tag_name} eq 'select') {
3811 !!!parse-error (type => 'select in select', ## XXX: documentation
3812 token => $token);
3813 } else {
3814 !!!parse-error (type => 'not closed', text => 'select',
3815 token => $token);
3816 }
3817
3818 ## 2./<select>-1. Unless "have an element in table scope" (select):
3819 my $i;
3820 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3821 my $node = $self->{open_elements}->[$_];
3822 if ($node->[1] == SELECT_EL) {
3823 !!!cp ('t278');
3824 $i = $_;
3825 last INSCOPE;
3826 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3827 !!!cp ('t279');
3828 last INSCOPE;
3829 }
3830 } # INSCOPE
3831 unless (defined $i) {
3832 !!!cp ('t280');
3833 if ($token->{tag_name} eq 'select') {
3834 ## NOTE: This error would be raised when
3835 ## |select.innerHTML = '<select>'| is executed; in this
3836 ## case two errors, "select in select" and "unmatched
3837 ## end tags" are reported to the user, the latter might
3838 ## be confusing but this is what the spec requires.
3839 !!!parse-error (type => 'unmatched end tag',
3840 text => 'select',
3841 token => $token);
3842 }
3843 ## Ignore the token.
3844 !!!nack ('t280.1');
3845 !!!next-token;
3846 next B;
3847 }
3848
3849 ## 3. Otherwise, as if there were <select>:
3850
3851 !!!cp ('t281');
3852 splice @{$self->{open_elements}}, $i;
3853
3854 $self->_reset_insertion_mode;
3855
3856 if ($token->{tag_name} eq 'select') {
3857 !!!nack ('t281.2');
3858 !!!next-token;
3859 next B;
3860 } else {
3861 !!!cp ('t281.1');
3862 !!!ack-later;
3863 ## Reprocess the token.
3864 next B;
3865 }
3866 } elsif ($token->{tag_name} eq 'script') {
3867 !!!cp ('t281.3');
3868 ## NOTE: This is an "as if in head" code clone
3869 $script_start_tag->();
3870 next B;
3871 } else {
3872 !!!cp ('t282');
3873 !!!parse-error (type => 'in select',
3874 text => $token->{tag_name}, token => $token);
3875 ## Ignore the token
3876 !!!nack ('t282.1');
3877 !!!next-token;
3878 next B;
3879 }
3880 } elsif ($token->{type} == END_TAG_TOKEN) {
3881 if ($token->{tag_name} eq 'optgroup') {
3882 if ($self->{open_elements}->[-1]->[1] == OPTION_EL and
3883 $self->{open_elements}->[-2]->[1] == OPTGROUP_EL) {
3884 !!!cp ('t283');
3885 ## As if </option>
3886 splice @{$self->{open_elements}}, -2;
3887 } elsif ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3888 !!!cp ('t284');
3889 pop @{$self->{open_elements}};
3890 } else {
3891 !!!cp ('t285');
3892 !!!parse-error (type => 'unmatched end tag',
3893 text => $token->{tag_name}, token => $token);
3894 ## Ignore the token
3895 }
3896 !!!nack ('t285.1');
3897 !!!next-token;
3898 next B;
3899 } elsif ($token->{tag_name} eq 'option') {
3900 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3901 !!!cp ('t286');
3902 pop @{$self->{open_elements}};
3903 } else {
3904 !!!cp ('t287');
3905 !!!parse-error (type => 'unmatched end tag',
3906 text => $token->{tag_name}, token => $token);
3907 ## Ignore the token
3908 }
3909 !!!nack ('t287.1');
3910 !!!next-token;
3911 next B;
3912 } elsif ($token->{tag_name} eq 'select') {
3913 ## have an element in table scope
3914 my $i;
3915 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3916 my $node = $self->{open_elements}->[$_];
3917 if ($node->[1] == SELECT_EL) {
3918 !!!cp ('t288');
3919 $i = $_;
3920 last INSCOPE;
3921 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3922 !!!cp ('t289');
3923 last INSCOPE;
3924 }
3925 } # INSCOPE
3926 unless (defined $i) {
3927 !!!cp ('t290');
3928 !!!parse-error (type => 'unmatched end tag',
3929 text => $token->{tag_name}, token => $token);
3930 ## Ignore the token
3931 !!!nack ('t290.1');
3932 !!!next-token;
3933 next B;
3934 }
3935
3936 !!!cp ('t291');
3937 splice @{$self->{open_elements}}, $i;
3938
3939 $self->_reset_insertion_mode;
3940
3941 !!!nack ('t291.1');
3942 !!!next-token;
3943 next B;
3944 } elsif (($self->{insertion_mode} & IM_MASK)
3945 == IN_SELECT_IN_TABLE_IM and
3946 {
3947 caption => 1, table => 1, tbody => 1,
3948 tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
3949 }->{$token->{tag_name}}) {
3950 ## TODO: The following is wrong?
3951 !!!parse-error (type => 'unmatched end tag',
3952 text => $token->{tag_name}, token => $token);
3953
3954 ## have an element in table scope
3955 my $i;
3956 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3957 my $node = $self->{open_elements}->[$_];
3958 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3959 !!!cp ('t292');
3960 $i = $_;
3961 last INSCOPE;
3962 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3963 !!!cp ('t293');
3964 last INSCOPE;
3965 }
3966 } # INSCOPE
3967 unless (defined $i) {
3968 !!!cp ('t294');
3969 ## Ignore the token
3970 !!!nack ('t294.1');
3971 !!!next-token;
3972 next B;
3973 }
3974
3975 ## As if </select>
3976 ## have an element in table scope
3977 undef $i;
3978 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3979 my $node = $self->{open_elements}->[$_];
3980 if ($node->[1] == SELECT_EL) {
3981 !!!cp ('t295');
3982 $i = $_;
3983 last INSCOPE;
3984 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3985 ## ISSUE: Can this state be reached?
3986 !!!cp ('t296');
3987 last INSCOPE;
3988 }
3989 } # INSCOPE
3990 unless (defined $i) {
3991 !!!cp ('t297');
3992 ## TODO: The following error type is correct?
3993 !!!parse-error (type => 'unmatched end tag',
3994 text => 'select', token => $token);
3995 ## Ignore the </select> token
3996 !!!nack ('t297.1');
3997 !!!next-token; ## TODO: ok?
3998 next B;
3999 }
4000
4001 !!!cp ('t298');
4002 splice @{$self->{open_elements}}, $i;
4003
4004 $self->_reset_insertion_mode;
4005
4006 !!!ack-later;
4007 ## reprocess
4008 next B;
4009 } else {
4010 !!!cp ('t299');
4011 !!!parse-error (type => 'in select:/',
4012 text => $token->{tag_name}, token => $token);
4013 ## Ignore the token
4014 !!!nack ('t299.3');
4015 !!!next-token;
4016 next B;
4017 }
4018 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4019 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4020 @{$self->{open_elements}} == 1) { # redundant, maybe
4021 !!!cp ('t299.1');
4022 !!!parse-error (type => 'in body:#eof', token => $token);
4023 } else {
4024 !!!cp ('t299.2');
4025 }
4026
4027 ## Stop parsing.
4028 last B;
4029 } else {
4030 die "$0: $token->{type}: Unknown token type";
4031 }
4032 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
4033 if ($token->{type} == CHARACTER_TOKEN) {
4034 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4035 my $data = $1;
4036 ## As if in body
4037 $reconstruct_active_formatting_elements->($insert_to_current);
4038
4039 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4040
4041 unless (length $token->{data}) {
4042 !!!cp ('t300');
4043 !!!next-token;
4044 next B;
4045 }
4046 }
4047
4048 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4049 !!!cp ('t301');
4050 !!!parse-error (type => 'after html:#text', token => $token);
4051 #
4052 } else {
4053 !!!cp ('t302');
4054 ## "after body" insertion mode
4055 !!!parse-error (type => 'after body:#text', token => $token);
4056 #
4057 }
4058
4059 $self->{insertion_mode} = IN_BODY_IM;
4060 ## reprocess
4061 next B;
4062 } elsif ($token->{type} == START_TAG_TOKEN) {
4063 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4064 !!!cp ('t303');
4065 !!!parse-error (type => 'after html',
4066 text => $token->{tag_name}, token => $token);
4067 #
4068 } else {
4069 !!!cp ('t304');
4070 ## "after body" insertion mode
4071 !!!parse-error (type => 'after body',
4072 text => $token->{tag_name}, token => $token);
4073 #
4074 }
4075
4076 $self->{insertion_mode} = IN_BODY_IM;
4077 !!!ack-later;
4078 ## reprocess
4079 next B;
4080 } elsif ($token->{type} == END_TAG_TOKEN) {
4081 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4082 !!!cp ('t305');
4083 !!!parse-error (type => 'after html:/',
4084 text => $token->{tag_name}, token => $token);
4085
4086 $self->{insertion_mode} = IN_BODY_IM;
4087 ## Reprocess.
4088 next B;
4089 } else {
4090 !!!cp ('t306');
4091 }
4092
4093 ## "after body" insertion mode
4094 if ($token->{tag_name} eq 'html') {
4095 if (defined $self->{inner_html_node}) {
4096 !!!cp ('t307');
4097 !!!parse-error (type => 'unmatched end tag',
4098 text => 'html', token => $token);
4099 ## Ignore the token
4100 !!!next-token;
4101 next B;
4102 } else {
4103 !!!cp ('t308');
4104 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
4105 !!!next-token;
4106 next B;
4107 }
4108 } else {
4109 !!!cp ('t309');
4110 !!!parse-error (type => 'after body:/',
4111 text => $token->{tag_name}, token => $token);
4112
4113 $self->{insertion_mode} = IN_BODY_IM;
4114 ## reprocess
4115 next B;
4116 }
4117 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4118 !!!cp ('t309.2');
4119 ## Stop parsing
4120 last B;
4121 } else {
4122 die "$0: $token->{type}: Unknown token type";
4123 }
4124 } elsif ($self->{insertion_mode} & FRAME_IMS) {
4125 if ($token->{type} == CHARACTER_TOKEN) {
4126 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4127 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4128
4129 unless (length $token->{data}) {
4130 !!!cp ('t310');
4131 !!!next-token;
4132 next B;
4133 }
4134 }
4135
4136 if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
4137 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4138 !!!cp ('t311');
4139 !!!parse-error (type => 'in frameset:#text', token => $token);
4140 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4141 !!!cp ('t312');
4142 !!!parse-error (type => 'after frameset:#text', token => $token);
4143 } else { # "after after frameset"
4144 !!!cp ('t313');
4145 !!!parse-error (type => 'after html:#text', token => $token);
4146 }
4147
4148 ## Ignore the token.
4149 if (length $token->{data}) {
4150 !!!cp ('t314');
4151 ## reprocess the rest of characters
4152 } else {
4153 !!!cp ('t315');
4154 !!!next-token;
4155 }
4156 next B;
4157 }
4158
4159 die qq[$0: Character "$token->{data}"];
4160 } elsif ($token->{type} == START_TAG_TOKEN) {
4161 if ($token->{tag_name} eq 'frameset' and
4162 $self->{insertion_mode} == IN_FRAMESET_IM) {
4163 !!!cp ('t318');
4164 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4165 !!!nack ('t318.1');
4166 !!!next-token;
4167 next B;
4168 } elsif ($token->{tag_name} eq 'frame' and
4169 $self->{insertion_mode} == IN_FRAMESET_IM) {
4170 !!!cp ('t319');
4171 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4172 pop @{$self->{open_elements}};
4173 !!!ack ('t319.1');
4174 !!!next-token;
4175 next B;
4176 } elsif ($token->{tag_name} eq 'noframes') {
4177 !!!cp ('t320');
4178 ## NOTE: As if in head.
4179 $parse_rcdata->(CDATA_CONTENT_MODEL);
4180 next B;
4181
4182 ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
4183 ## has no parse error.
4184 } else {
4185 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4186 !!!cp ('t321');
4187 !!!parse-error (type => 'in frameset',
4188 text => $token->{tag_name}, token => $token);
4189 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4190 !!!cp ('t322');
4191 !!!parse-error (type => 'after frameset',
4192 text => $token->{tag_name}, token => $token);
4193 } else { # "after after frameset"
4194 !!!cp ('t322.2');
4195 !!!parse-error (type => 'after after frameset',
4196 text => $token->{tag_name}, token => $token);
4197 }
4198 ## Ignore the token
4199 !!!nack ('t322.1');
4200 !!!next-token;
4201 next B;
4202 }
4203 } elsif ($token->{type} == END_TAG_TOKEN) {
4204 if ($token->{tag_name} eq 'frameset' and
4205 $self->{insertion_mode} == IN_FRAMESET_IM) {
4206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
4207 @{$self->{open_elements}} == 1) {
4208 !!!cp ('t325');
4209 !!!parse-error (type => 'unmatched end tag',
4210 text => $token->{tag_name}, token => $token);
4211 ## Ignore the token
4212 !!!next-token;
4213 } else {
4214 !!!cp ('t326');
4215 pop @{$self->{open_elements}};
4216 !!!next-token;
4217 }
4218
4219 if (not defined $self->{inner_html_node} and
4220 not ($self->{open_elements}->[-1]->[1] == FRAMESET_EL)) {
4221 !!!cp ('t327');
4222 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4223 } else {
4224 !!!cp ('t328');
4225 }
4226 next B;
4227 } elsif ($token->{tag_name} eq 'html' and
4228 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
4229 !!!cp ('t329');
4230 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
4231 !!!next-token;
4232 next B;
4233 } else {
4234 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4235 !!!cp ('t330');
4236 !!!parse-error (type => 'in frameset:/',
4237 text => $token->{tag_name}, token => $token);
4238 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4239 !!!cp ('t330.1');
4240 !!!parse-error (type => 'after frameset:/',
4241 text => $token->{tag_name}, token => $token);
4242 } else { # "after after html"
4243 !!!cp ('t331');
4244 !!!parse-error (type => 'after after frameset:/',
4245 text => $token->{tag_name}, token => $token);
4246 }
4247 ## Ignore the token
4248 !!!next-token;
4249 next B;
4250 }
4251 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4252 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4253 @{$self->{open_elements}} == 1) { # redundant, maybe
4254 !!!cp ('t331.1');
4255 !!!parse-error (type => 'in body:#eof', token => $token);
4256 } else {
4257 !!!cp ('t331.2');
4258 }
4259
4260 ## Stop parsing
4261 last B;
4262 } else {
4263 die "$0: $token->{type}: Unknown token type";
4264 }
4265 } else {
4266 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4267 }
4268
4269 ## "in body" insertion mode
4270 if ($token->{type} == START_TAG_TOKEN) {
4271 if ($token->{tag_name} eq 'script') {
4272 !!!cp ('t332');
4273 ## NOTE: This is an "as if in head" code clone
4274 $script_start_tag->();
4275 next B;
4276 } elsif ($token->{tag_name} eq 'style') {
4277 !!!cp ('t333');
4278 ## NOTE: This is an "as if in head" code clone
4279 $parse_rcdata->(CDATA_CONTENT_MODEL);
4280 next B;
4281 } elsif ({
4282 base => 1, command => 1, link => 1,
4283 }->{$token->{tag_name}}) {
4284 !!!cp ('t334');
4285 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4286 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4287 pop @{$self->{open_elements}};
4288 !!!ack ('t334.1');
4289 !!!next-token;
4290 next B;
4291 } elsif ($token->{tag_name} eq 'meta') {
4292 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4293 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4294 my $meta_el = pop @{$self->{open_elements}};
4295
4296 unless ($self->{confident}) {
4297 if ($token->{attributes}->{charset}) {
4298 !!!cp ('t335');
4299 ## NOTE: Whether the encoding is supported or not is handled
4300 ## in the {change_encoding} callback.
4301 $self->{change_encoding}
4302 ->($self, $token->{attributes}->{charset}->{value}, $token);
4303
4304 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4305 ->set_user_data (manakai_has_reference =>
4306 $token->{attributes}->{charset}
4307 ->{has_reference});
4308 } elsif ($token->{attributes}->{content}) {
4309 if ($token->{attributes}->{content}->{value}
4310 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4311 [\x09\x0A\x0C\x0D\x20]*=
4312 [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4313 ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
4314 /x) {
4315 !!!cp ('t336');
4316 ## NOTE: Whether the encoding is supported or not is handled
4317 ## in the {change_encoding} callback.
4318 $self->{change_encoding}
4319 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
4320 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4321 ->set_user_data (manakai_has_reference =>
4322 $token->{attributes}->{content}
4323 ->{has_reference});
4324 }
4325 }
4326 } else {
4327 if ($token->{attributes}->{charset}) {
4328 !!!cp ('t337');
4329 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4330 ->set_user_data (manakai_has_reference =>
4331 $token->{attributes}->{charset}
4332 ->{has_reference});
4333 }
4334 if ($token->{attributes}->{content}) {
4335 !!!cp ('t338');
4336 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4337 ->set_user_data (manakai_has_reference =>
4338 $token->{attributes}->{content}
4339 ->{has_reference});
4340 }
4341 }
4342
4343 !!!ack ('t338.1');
4344 !!!next-token;
4345 next B;
4346 } elsif ($token->{tag_name} eq 'title') {
4347 !!!cp ('t341');
4348 ## NOTE: This is an "as if in head" code clone
4349 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4350 next B;
4351 } elsif ($token->{tag_name} eq 'body') {
4352 !!!parse-error (type => 'in body', text => 'body', token => $token);
4353
4354 if (@{$self->{open_elements}} == 1 or
4355 not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
4356 !!!cp ('t342');
4357 ## Ignore the token
4358 } else {
4359 my $body_el = $self->{open_elements}->[1]->[0];
4360 for my $attr_name (keys %{$token->{attributes}}) {
4361 unless ($body_el->has_attribute_ns (undef, $attr_name)) {
4362 !!!cp ('t343');
4363 $body_el->set_attribute_ns
4364 (undef, [undef, $attr_name],
4365 $token->{attributes}->{$attr_name}->{value});
4366 }
4367 }
4368 }
4369 !!!nack ('t343.1');
4370 !!!next-token;
4371 next B;
4372 } elsif ($token->{tag_name} eq 'frameset') {
4373 !!!parse-error (type => 'in body', text => $token->{tag_name},
4374 token => $token);
4375
4376 if (@{$self->{open_elements}} == 1 or
4377 not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
4378 !!!cp ('t343.2');
4379 ## Ignore the token.
4380 } elsif (not $self->{frameset_ok}) {
4381 !!!cp ('t343.3');
4382 ## Ignore the token.
4383 } else {
4384 !!!cp ('t343.4');
4385
4386 ## 1. Remove the second element.
4387 my $body = $self->{open_elements}->[1]->[0];
4388 my $body_parent = $body->parent_node;
4389 $body_parent->remove_child ($body) if $body_parent;
4390
4391 ## 2. Pop nodes.
4392 splice @{$self->{open_elements}}, 1;
4393
4394 ## 3. Insert.
4395 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4396
4397 ## 4. Switch.
4398 $self->{insertion_mode} = IN_FRAMESET_IM;
4399 }
4400
4401 !!!nack ('t343.5');
4402 !!!next-token;
4403 next B;
4404 } elsif ({
4405 ## NOTE: Start tags for non-phrasing flow content elements
4406
4407 ## NOTE: The normal one
4408 address => 1, article => 1, aside => 1, blockquote => 1,
4409 center => 1, datagrid => 1, details => 1, dialog => 1,
4410 dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
4411 footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1,
4412 h6 => 1, header => 1, hgroup => 1,
4413 menu => 1, nav => 1, ol => 1, p => 1,
4414 section => 1, ul => 1,
4415 ## NOTE: As normal, but drops leading newline
4416 pre => 1, listing => 1,
4417 ## NOTE: As normal, but interacts with the form element pointer
4418 form => 1,
4419
4420 table => 1,
4421 hr => 1,
4422 }->{$token->{tag_name}}) {
4423
4424 ## 1. When there is an opening |form| element:
4425 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
4426 !!!cp ('t350');
4427 !!!parse-error (type => 'in form:form', token => $token);
4428 ## Ignore the token
4429 !!!nack ('t350.1');
4430 !!!next-token;
4431 next B;
4432 }
4433
4434 ## 2. Close the |p| element, if any.
4435 if ($token->{tag_name} ne 'table' or # The Hixie Quirk
4436 $self->{document}->manakai_compat_mode ne 'quirks') {
4437 ## has a p element in scope
4438 INSCOPE: for (reverse @{$self->{open_elements}}) {
4439 if ($_->[1] == P_EL) {
4440 !!!cp ('t344');
4441 !!!back-token; # <form>
4442 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4443 line => $token->{line}, column => $token->{column}};
4444 next B;
4445 } elsif ($_->[1] & SCOPING_EL) {
4446 !!!cp ('t345');
4447 last INSCOPE;
4448 }
4449 } # INSCOPE
4450 }
4451
4452 ## 3. Close the opening <hn> element, if any.
4453 if ({h1 => 1, h2 => 1, h3 => 1,
4454 h4 => 1, h5 => 1, h6 => 1}->{$token->{tag_name}}) {
4455 if ($self->{open_elements}->[-1]->[1] == HEADING_EL) {
4456 !!!parse-error (type => 'not closed',
4457 text => $self->{open_elements}->[-1]->[0]->manakai_local_name,
4458 token => $token);
4459 pop @{$self->{open_elements}};
4460 }
4461 }
4462
4463 ## 4. Insertion.
4464 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4465 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
4466 !!!nack ('t346.1');
4467 !!!next-token;
4468 if ($token->{type} == CHARACTER_TOKEN) {
4469 $token->{data} =~ s/^\x0A//;
4470 unless (length $token->{data}) {
4471 !!!cp ('t346');
4472 !!!next-token;
4473 } else {
4474 !!!cp ('t349');
4475 }
4476 } else {
4477 !!!cp ('t348');
4478 }
4479
4480 delete $self->{frameset_ok};
4481 } elsif ($token->{tag_name} eq 'form') {
4482 !!!cp ('t347.1');
4483 $self->{form_element} = $self->{open_elements}->[-1]->[0];
4484
4485 !!!nack ('t347.2');
4486 !!!next-token;
4487 } elsif ($token->{tag_name} eq 'table') {
4488 !!!cp ('t382');
4489 push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
4490
4491 delete $self->{frameset_ok};
4492
4493 $self->{insertion_mode} = IN_TABLE_IM;
4494
4495 !!!nack ('t382.1');
4496 !!!next-token;
4497 } elsif ($token->{tag_name} eq 'hr') {
4498 !!!cp ('t386');
4499 pop @{$self->{open_elements}};
4500
4501 !!!ack ('t386.1');
4502
4503 delete $self->{frameset_ok};
4504
4505 !!!next-token;
4506 } else {
4507 !!!nack ('t347.1');
4508 !!!next-token;
4509 }
4510 next B;
4511 } elsif ($token->{tag_name} eq 'li') {
4512 ## NOTE: As normal, but imply </li> when there's another <li> ...
4513
4514 ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)::
4515 ## Interpreted as <li><foo/></li><li/> (non-conforming):
4516 ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
4517 ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
4518 ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
4519 ## object (Fx)
4520 ## Generate non-tree (non-conforming):
4521 ## basefont (IE7 (where basefont is non-void)), center (IE),
4522 ## form (IE), hn (IE)
4523 ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)::
4524 ## Interpreted as <li><foo><li/></foo></li> (non-conforming):
4525 ## div (Fx, S)
4526
4527 ## 1. Frameset-ng
4528 delete $self->{frameset_ok};
4529
4530 my $non_optional;
4531 my $i = -1;
4532
4533 ## 2.
4534 for my $node (reverse @{$self->{open_elements}}) {
4535 if ($node->[1] == LI_EL) {
4536 ## 3. (a) As if </li>
4537 {
4538 ## If no </li> - not applied
4539 #
4540
4541 ## Otherwise
4542
4543 ## 1. generate implied end tags, except for </li>
4544 #
4545
4546 ## 2. If current node != "li", parse error
4547 if ($non_optional) {
4548 !!!parse-error (type => 'not closed',
4549 text => $non_optional->[0]->manakai_local_name,
4550 token => $token);
4551 !!!cp ('t355');
4552 } else {
4553 !!!cp ('t356');
4554 }
4555
4556 ## 3. Pop
4557 splice @{$self->{open_elements}}, $i;
4558 }
4559
4560 last; ## 3. (b) goto 5.
4561 } elsif (
4562 ## NOTE: not "formatting" and not "phrasing"
4563 ($node->[1] & SPECIAL_EL or
4564 $node->[1] & SCOPING_EL) and
4565 ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4566 (not $node->[1] & ADDRESS_DIV_P_EL)
4567 ) {
4568 ## 4.
4569 !!!cp ('t357');
4570 last; ## goto 6.
4571 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4572 !!!cp ('t358');
4573 #
4574 } else {
4575 !!!cp ('t359');
4576 $non_optional ||= $node;
4577 #
4578 }
4579 ## 5.
4580 ## goto 3.
4581 $i--;
4582 }
4583
4584 ## 6. (a) has a |p| element in scope
4585 INSCOPE: for (reverse @{$self->{open_elements}}) {
4586 if ($_->[1] == P_EL) {
4587 !!!cp ('t353');
4588
4589 ## NOTE: |<p><li>|, for example.
4590
4591 !!!back-token; # <x>
4592 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4593 line => $token->{line}, column => $token->{column}};
4594 next B;
4595 } elsif ($_->[1] & SCOPING_EL) {
4596 !!!cp ('t354');
4597 last INSCOPE;
4598 }
4599 } # INSCOPE
4600
4601 ## 6. (b) insert
4602 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4603 !!!nack ('t359.1');
4604 !!!next-token;
4605 next B;
4606 } elsif ($token->{tag_name} eq 'dt' or
4607 $token->{tag_name} eq 'dd') {
4608 ## NOTE: As normal, but imply </dt> or </dd> when ...
4609
4610 ## 1. Frameset-ng
4611 delete $self->{frameset_ok};
4612
4613 my $non_optional;
4614 my $i = -1;
4615
4616 ## 2.
4617 for my $node (reverse @{$self->{open_elements}}) {
4618 if ($node->[1] == DTDD_EL) {
4619 ## 3. (a) As if </li>
4620 {
4621 ## If no </li> - not applied
4622 #
4623
4624 ## Otherwise
4625
4626 ## 1. generate implied end tags, except for </dt> or </dd>
4627 #
4628
4629 ## 2. If current node != "dt"|"dd", parse error
4630 if ($non_optional) {
4631 !!!parse-error (type => 'not closed',
4632 text => $non_optional->[0]->manakai_local_name,
4633 token => $token);
4634 !!!cp ('t355.1');
4635 } else {
4636 !!!cp ('t356.1');
4637 }
4638
4639 ## 3. Pop
4640 splice @{$self->{open_elements}}, $i;
4641 }
4642
4643 last; ## 3. (b) goto 5.
4644 } elsif (
4645 ## NOTE: not "formatting" and not "phrasing"
4646 ($node->[1] & SPECIAL_EL or
4647 $node->[1] & SCOPING_EL) and
4648 ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4649
4650 (not $node->[1] & ADDRESS_DIV_P_EL)
4651 ) {
4652 ## 4.
4653 !!!cp ('t357.1');
4654 last; ## goto 5.
4655 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4656 !!!cp ('t358.1');
4657 #
4658 } else {
4659 !!!cp ('t359.1');
4660 $non_optional ||= $node;
4661 #
4662 }
4663 ## 5.
4664 ## goto 3.
4665 $i--;
4666 }
4667
4668 ## 6. (a) has a |p| element in scope
4669 INSCOPE: for (reverse @{$self->{open_elements}}) {
4670 if ($_->[1] == P_EL) {
4671 !!!cp ('t353.1');
4672 !!!back-token; # <x>
4673 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4674 line => $token->{line}, column => $token->{column}};
4675 next B;
4676 } elsif ($_->[1] & SCOPING_EL) {
4677 !!!cp ('t354.1');
4678 last INSCOPE;
4679 }
4680 } # INSCOPE
4681
4682 ## 6. (b) insert
4683 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4684 !!!nack ('t359.2');
4685 !!!next-token;
4686 next B;
4687 } elsif ($token->{tag_name} eq 'plaintext') {
4688 ## NOTE: As normal, but effectively ends parsing
4689
4690 ## has a p element in scope
4691 INSCOPE: for (reverse @{$self->{open_elements}}) {
4692 if ($_->[1] == P_EL) {
4693 !!!cp ('t367');
4694 !!!back-token; # <plaintext>
4695 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4696 line => $token->{line}, column => $token->{column}};
4697 next B;
4698 } elsif ($_->[1] & SCOPING_EL) {
4699 !!!cp ('t368');
4700 last INSCOPE;
4701 }
4702 } # INSCOPE
4703
4704 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4705
4706 $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
4707
4708 !!!nack ('t368.1');
4709 !!!next-token;
4710 next B;
4711 } elsif ($token->{tag_name} eq 'a') {
4712 AFE: for my $i (reverse 0..$#$active_formatting_elements) {
4713 my $node = $active_formatting_elements->[$i];
4714 if ($node->[1] == A_EL) {
4715 !!!cp ('t371');
4716 !!!parse-error (type => 'in a:a', token => $token);
4717
4718 !!!back-token; # <a>
4719 $token = {type => END_TAG_TOKEN, tag_name => 'a',
4720 line => $token->{line}, column => $token->{column}};
4721 $formatting_end_tag->($token);
4722
4723 AFE2: for (reverse 0..$#$active_formatting_elements) {
4724 if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4725 !!!cp ('t372');
4726 splice @$active_formatting_elements, $_, 1;
4727 last AFE2;
4728 }
4729 } # AFE2
4730 OE: for (reverse 0..$#{$self->{open_elements}}) {
4731 if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
4732 !!!cp ('t373');
4733 splice @{$self->{open_elements}}, $_, 1;
4734 last OE;
4735 }
4736 } # OE
4737 last AFE;
4738 } elsif ($node->[0] eq '#marker') {
4739 !!!cp ('t374');
4740 last AFE;
4741 }
4742 } # AFE
4743
4744 $reconstruct_active_formatting_elements->($insert_to_current);
4745
4746 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4747 push @$active_formatting_elements, $self->{open_elements}->[-1];
4748
4749 !!!nack ('t374.1');
4750 !!!next-token;
4751 next B;
4752 } elsif ($token->{tag_name} eq 'nobr') {
4753 $reconstruct_active_formatting_elements->($insert_to_current);
4754
4755 ## has a |nobr| element in scope
4756 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4757 my $node = $self->{open_elements}->[$_];
4758 if ($node->[1] == NOBR_EL) {
4759 !!!cp ('t376');
4760 !!!parse-error (type => 'in nobr:nobr', token => $token);
4761 !!!back-token; # <nobr>
4762 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
4763 line => $token->{line}, column => $token->{column}};
4764 next B;
4765 } elsif ($node->[1] & SCOPING_EL) {
4766 !!!cp ('t377');
4767 last INSCOPE;
4768 }
4769 } # INSCOPE
4770
4771 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4772 push @$active_formatting_elements, $self->{open_elements}->[-1];
4773
4774 !!!nack ('t377.1');
4775 !!!next-token;
4776 next B;
4777 } elsif ($token->{tag_name} eq 'button') {
4778 ## has a button element in scope
4779 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4780 my $node = $self->{open_elements}->[$_];
4781 if ($node->[1] == BUTTON_EL) {
4782 !!!cp ('t378');
4783 !!!parse-error (type => 'in button:button', token => $token);
4784 !!!back-token; # <button>
4785 $token = {type => END_TAG_TOKEN, tag_name => 'button',
4786 line => $token->{line}, column => $token->{column}};
4787 next B;
4788 } elsif ($node->[1] & SCOPING_EL) {
4789 !!!cp ('t379');
4790 last INSCOPE;
4791 }
4792 } # INSCOPE
4793
4794 $reconstruct_active_formatting_elements->($insert_to_current);
4795
4796 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4797
4798 ## TODO: associate with $self->{form_element} if defined
4799
4800 push @$active_formatting_elements, ['#marker', ''];
4801
4802 delete $self->{frameset_ok};
4803
4804 !!!nack ('t379.1');
4805 !!!next-token;
4806 next B;
4807 } elsif ({
4808 xmp => 1,
4809 iframe => 1,
4810 noembed => 1,
4811 noframes => 1, ## NOTE: This is an "as if in head" code clone.
4812 noscript => 0, ## TODO: 1 if scripting is enabled
4813 }->{$token->{tag_name}}) {
4814 if ($token->{tag_name} eq 'xmp') {
4815 !!!cp ('t381');
4816 $reconstruct_active_formatting_elements->($insert_to_current);
4817
4818 delete $self->{frameset_ok};
4819 } elsif ($token->{tag_name} eq 'iframe') {
4820 !!!cp ('t381.1');
4821 delete $self->{frameset_ok};
4822 } else {
4823 !!!cp ('t399');
4824 }
4825 ## NOTE: There is an "as if in body" code clone.
4826 $parse_rcdata->(CDATA_CONTENT_MODEL);
4827 next B;
4828 } elsif ($token->{tag_name} eq 'isindex') {
4829 !!!parse-error (type => 'isindex', token => $token);
4830
4831 if (defined $self->{form_element}) {
4832 !!!cp ('t389');
4833 ## Ignore the token
4834 !!!nack ('t389'); ## NOTE: Not acknowledged.
4835 !!!next-token;
4836 next B;
4837 } else {
4838 !!!ack ('t391.1');
4839
4840 my $at = $token->{attributes};
4841 my $form_attrs;
4842 $form_attrs->{action} = $at->{action} if $at->{action};
4843 my $prompt_attr = $at->{prompt};
4844 $at->{name} = {name => 'name', value => 'isindex'};
4845 delete $at->{action};
4846 delete $at->{prompt};
4847 my @tokens = (
4848 {type => START_TAG_TOKEN, tag_name => 'form',
4849 attributes => $form_attrs,
4850 line => $token->{line}, column => $token->{column}},
4851 {type => START_TAG_TOKEN, tag_name => 'hr',
4852 line => $token->{line}, column => $token->{column}},
4853 {type => START_TAG_TOKEN, tag_name => 'label',
4854 line => $token->{line}, column => $token->{column}},
4855 );
4856 if ($prompt_attr) {
4857 !!!cp ('t390');
4858 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
4859 #line => $token->{line}, column => $token->{column},
4860 };
4861 } else {
4862 !!!cp ('t391');
4863 push @tokens, {type => CHARACTER_TOKEN,
4864 data => 'This is a searchable index. Insert your search keywords here: ',
4865 #line => $token->{line}, column => $token->{column},
4866 }; # SHOULD
4867 ## TODO: make this configurable
4868 }
4869 push @tokens,
4870 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
4871 line => $token->{line}, column => $token->{column}},
4872 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
4873 {type => END_TAG_TOKEN, tag_name => 'label',
4874 line => $token->{line}, column => $token->{column}},
4875 {type => START_TAG_TOKEN, tag_name => 'hr',
4876 line => $token->{line}, column => $token->{column}},
4877 {type => END_TAG_TOKEN, tag_name => 'form',
4878 line => $token->{line}, column => $token->{column}};
4879 !!!back-token (@tokens);
4880 !!!next-token;
4881 next B;
4882 }
4883 } elsif ($token->{tag_name} eq 'textarea') {
4884 ## 1. Insert
4885 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4886
4887 ## Step 2 # XXX
4888 ## TODO: $self->{form_element} if defined
4889
4890 ## 2. Drop U+000A LINE FEED
4891 $self->{ignore_newline} = 1;
4892
4893 ## 3. RCDATA
4894 $self->{content_model} = RCDATA_CONTENT_MODEL;
4895 delete $self->{escape}; # MUST
4896
4897 ## 4., 6. Insertion mode
4898 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
4899
4900 ## 5. Frameset-ng.
4901 delete $self->{frameset_ok};
4902
4903 !!!nack ('t392.1');
4904 !!!next-token;
4905 next B;
4906 } elsif ($token->{tag_name} eq 'optgroup' or
4907 $token->{tag_name} eq 'option') {
4908 ## has an |option| element in scope
4909 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4910 my $node = $self->{open_elements}->[$_];
4911 if ($node->[1] == OPTION_EL) {
4912 !!!cp ('t397.1');
4913 ## NOTE: As if </option>
4914 !!!back-token; # <option> or <optgroup>
4915 $token = {type => END_TAG_TOKEN, tag_name => 'option',
4916 line => $token->{line}, column => $token->{column}};
4917 next B;
4918 } elsif ($node->[1] & SCOPING_EL) {
4919 !!!cp ('t397.2');
4920 last INSCOPE;
4921 }
4922 } # INSCOPE
4923
4924 $reconstruct_active_formatting_elements->($insert_to_current);
4925
4926 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4927
4928 !!!nack ('t397.3');
4929 !!!next-token;
4930 redo B;
4931 } elsif ($token->{tag_name} eq 'rt' or
4932 $token->{tag_name} eq 'rp') {
4933 ## has a |ruby| element in scope
4934 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4935 my $node = $self->{open_elements}->[$_];
4936 if ($node->[1] == RUBY_EL) {
4937 !!!cp ('t398.1');
4938 ## generate implied end tags
4939 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4940 !!!cp ('t398.2');
4941 pop @{$self->{open_elements}};
4942 }
4943 unless ($self->{open_elements}->[-1]->[1] == RUBY_EL) {
4944 !!!cp ('t398.3');
4945 !!!parse-error (type => 'not closed',
4946 text => $self->{open_elements}->[-1]->[0]
4947 ->manakai_local_name,
4948 token => $token);
4949 pop @{$self->{open_elements}}
4950 while not $self->{open_elements}->[-1]->[1] == RUBY_EL;
4951 }
4952 last INSCOPE;
4953 } elsif ($node->[1] & SCOPING_EL) {
4954 !!!cp ('t398.4');
4955 last INSCOPE;
4956 }
4957 } # INSCOPE
4958
4959 ## TODO: <non-ruby><rt> is not allowed.
4960
4961 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4962
4963 !!!nack ('t398.5');
4964 !!!next-token;
4965 redo B;
4966 } elsif ($token->{tag_name} eq 'math' or
4967 $token->{tag_name} eq 'svg') {
4968 $reconstruct_active_formatting_elements->($insert_to_current);
4969
4970 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
4971
4972 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
4973
4974 ## "adjust foreign attributes" - done in insert-element-f
4975
4976 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
4977
4978 if ($self->{self_closing}) {
4979 pop @{$self->{open_elements}};
4980 !!!ack ('t398.6');
4981 } else {
4982 !!!cp ('t398.7');
4983 $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
4984 ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
4985 ## mode, "in body" (not "in foreign content") secondary insertion
4986 ## mode, maybe.
4987 }
4988
4989 !!!next-token;
4990 next B;
4991 } elsif ({
4992 caption => 1, col => 1, colgroup => 1, frame => 1,
4993 head => 1,
4994 tbody => 1, td => 1, tfoot => 1, th => 1,
4995 thead => 1, tr => 1,
4996 }->{$token->{tag_name}}) {
4997 !!!cp ('t401');
4998 !!!parse-error (type => 'in body',
4999 text => $token->{tag_name}, token => $token);
5000 ## Ignore the token
5001 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
5002 !!!next-token;
5003 next B;
5004 } elsif ($token->{tag_name} eq 'param' or
5005 $token->{tag_name} eq 'source') {
5006 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5007 pop @{$self->{open_elements}};
5008
5009 !!!ack ('t398.5');
5010 !!!next-token;
5011 redo B;
5012 } else {
5013 if ($token->{tag_name} eq 'image') {
5014 !!!cp ('t384');
5015 !!!parse-error (type => 'image', token => $token);
5016 $token->{tag_name} = 'img';
5017 } else {
5018 !!!cp ('t385');
5019 }
5020
5021 ## NOTE: There is an "as if <br>" code clone.
5022 $reconstruct_active_formatting_elements->($insert_to_current);
5023
5024 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5025
5026 if ({
5027 applet => 1, marquee => 1, object => 1,
5028 }->{$token->{tag_name}}) {
5029 !!!cp ('t380');
5030
5031 push @$active_formatting_elements, ['#marker', ''];
5032
5033 delete $self->{frameset_ok};
5034
5035 !!!nack ('t380.1');
5036 } elsif ({
5037 b => 1, big => 1, em => 1, font => 1, i => 1,
5038 s => 1, small => 1, strike => 1,
5039 strong => 1, tt => 1, u => 1,
5040 }->{$token->{tag_name}}) {
5041 !!!cp ('t375');
5042 push @$active_formatting_elements, $self->{open_elements}->[-1];
5043 !!!nack ('t375.1');
5044 } elsif ($token->{tag_name} eq 'input') {
5045 !!!cp ('t388');
5046 ## TODO: associate with $self->{form_element} if defined
5047 pop @{$self->{open_elements}};
5048 !!!ack ('t388.2');
5049 } elsif ({
5050 area => 1, basefont => 1, bgsound => 1, br => 1,
5051 embed => 1, img => 1, spacer => 1, wbr => 1,
5052 keygen => 1,
5053 }->{$token->{tag_name}}) {
5054 !!!cp ('t388.1');
5055
5056 pop @{$self->{open_elements}};
5057
5058 delete $self->{frameset_ok};
5059
5060 !!!ack ('t388.3');
5061 } elsif ($token->{tag_name} eq 'select') {
5062 ## TODO: associate with $self->{form_element} if defined
5063
5064 delete $self->{frameset_ok};
5065
5066 if ($self->{insertion_mode} & TABLE_IMS or
5067 $self->{insertion_mode} & BODY_TABLE_IMS or
5068 ($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
5069 !!!cp ('t400.1');
5070 $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
5071 } else {
5072 !!!cp ('t400.2');
5073 $self->{insertion_mode} = IN_SELECT_IM;
5074 }
5075 !!!nack ('t400.3');
5076 } else {
5077 !!!nack ('t402');
5078 }
5079
5080 !!!next-token;
5081 next B;
5082 }
5083 } elsif ($token->{type} == END_TAG_TOKEN) {
5084 if ($token->{tag_name} eq 'body' or $token->{tag_name} eq 'html') {
5085
5086 ## 1. If not "have an element in scope":
5087 ## "has a |body| element in scope"
5088 my $i;
5089 INSCOPE: {
5090 for (reverse @{$self->{open_elements}}) {
5091 if ($_->[1] == BODY_EL) {
5092 !!!cp ('t405');
5093 $i = $_;
5094 last INSCOPE;
5095 } elsif ($_->[1] & SCOPING_EL) {
5096 !!!cp ('t405.1');
5097 last;
5098 }
5099 }
5100
5101 ## NOTE: |<marquee></body>|, |<svg><foreignobject></body>|,
5102 ## and fragment cases.
5103
5104 !!!parse-error (type => 'unmatched end tag',
5105 text => $token->{tag_name}, token => $token);
5106 ## Ignore the token. (</body> or </html>)
5107 !!!next-token;
5108 next B;
5109 } # INSCOPE
5110
5111 ## 2. If unclosed elements:
5112 for (@{$self->{open_elements}}) {
5113 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL ||
5114 $_->[1] == OPTGROUP_EL ||
5115 $_->[1] == OPTION_EL ||
5116 $_->[1] == RUBY_COMPONENT_EL) {
5117 !!!cp ('t403');
5118 !!!parse-error (type => 'not closed',
5119 text => $_->[0]->manakai_local_name,
5120 token => $token);
5121 last;
5122 } else {
5123 !!!cp ('t404');
5124 }
5125 }
5126
5127 ## 3. Switch the insertion mode.
5128 $self->{insertion_mode} = AFTER_BODY_IM;
5129 if ($token->{tag_name} eq 'body') {
5130 !!!next-token;
5131 } else { # html
5132 ## Reprocess.
5133 }
5134 next B;
5135 } elsif ({
5136 ## NOTE: End tags for non-phrasing flow content elements
5137
5138 ## NOTE: The normal ones
5139 address => 1, article => 1, aside => 1, blockquote => 1,
5140 center => 1, datagrid => 1, details => 1, dialog => 1,
5141 dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
5142 footer => 1, header => 1, hgroup => 1,
5143 listing => 1, menu => 1, nav => 1,
5144 ol => 1, pre => 1, section => 1, ul => 1,
5145
5146 ## NOTE: As normal, but ... optional tags
5147 dd => 1, dt => 1, li => 1,
5148
5149 applet => 1, button => 1, marquee => 1, object => 1,
5150 }->{$token->{tag_name}}) {
5151 ## NOTE: Code for <li> start tags includes "as if </li>" code.
5152 ## Code for <dt> or <dd> start tags includes "as if </dt> or
5153 ## </dd>" code.
5154
5155 ## has an element in scope
5156 my $i;
5157 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5158 my $node = $self->{open_elements}->[$_];
5159 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5160 !!!cp ('t410');
5161 $i = $_;
5162 last INSCOPE;
5163 } elsif ($node->[1] & SCOPING_EL) {
5164 !!!cp ('t411');
5165 last INSCOPE;
5166 }
5167 } # INSCOPE
5168
5169 unless (defined $i) { # has an element in scope
5170 !!!cp ('t413');
5171 !!!parse-error (type => 'unmatched end tag',
5172 text => $token->{tag_name}, token => $token);
5173 ## NOTE: Ignore the token.
5174 } else {
5175 ## Step 1. generate implied end tags
5176 while ({
5177 ## END_TAG_OPTIONAL_EL
5178 dd => ($token->{tag_name} ne 'dd'),
5179 dt => ($token->{tag_name} ne 'dt'),
5180 li => ($token->{tag_name} ne 'li'),
5181 option => 1,
5182 optgroup => 1,
5183 p => 1,
5184 rt => 1,
5185 rp => 1,
5186 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
5187 !!!cp ('t409');
5188 pop @{$self->{open_elements}};
5189 }
5190
5191 ## Step 2.
5192 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5193 ne $token->{tag_name}) {
5194 !!!cp ('t412');
5195 !!!parse-error (type => 'not closed',
5196 text => $self->{open_elements}->[-1]->[0]
5197 ->manakai_local_name,
5198 token => $token);
5199 } else {
5200 !!!cp ('t414');
5201 }
5202
5203 ## Step 3.
5204 splice @{$self->{open_elements}}, $i;
5205
5206 ## Step 4.
5207 $clear_up_to_marker->()
5208 if {
5209 applet => 1, button => 1, marquee => 1, object => 1,
5210 }->{$token->{tag_name}};
5211 }
5212 !!!next-token;
5213 next B;
5214 } elsif ($token->{tag_name} eq 'form') {
5215 ## NOTE: As normal, but interacts with the form element pointer
5216
5217 undef $self->{form_element};
5218
5219 ## has an element in scope
5220 my $i;
5221 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5222 my $node = $self->{open_elements}->[$_];
5223 if ($node->[1] == FORM_EL) {
5224 !!!cp ('t418');
5225 $i = $_;
5226 last INSCOPE;
5227 } elsif ($node->[1] & SCOPING_EL) {
5228 !!!cp ('t419');
5229 last INSCOPE;
5230 }
5231 } # INSCOPE
5232
5233 unless (defined $i) { # has an element in scope
5234 !!!cp ('t421');
5235 !!!parse-error (type => 'unmatched end tag',
5236 text => $token->{tag_name}, token => $token);
5237 ## NOTE: Ignore the token.
5238 } else {
5239 ## Step 1. generate implied end tags
5240 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5241 !!!cp ('t417');
5242 pop @{$self->{open_elements}};
5243 }
5244
5245 ## Step 2.
5246 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5247 ne $token->{tag_name}) {
5248 !!!cp ('t417.1');
5249 !!!parse-error (type => 'not closed',
5250 text => $self->{open_elements}->[-1]->[0]
5251 ->manakai_local_name,
5252 token => $token);
5253 } else {
5254 !!!cp ('t420');
5255 }
5256
5257 ## Step 3.
5258 splice @{$self->{open_elements}}, $i;
5259 }
5260
5261 !!!next-token;
5262 next B;
5263 } elsif ({
5264 ## NOTE: As normal, except acts as a closer for any ...
5265 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5266 }->{$token->{tag_name}}) {
5267 ## has an element in scope
5268 my $i;
5269 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5270 my $node = $self->{open_elements}->[$_];
5271 if ($node->[1] == HEADING_EL) {
5272 !!!cp ('t423');
5273 $i = $_;
5274 last INSCOPE;
5275 } elsif ($node->[1] & SCOPING_EL) {
5276 !!!cp ('t424');
5277 last INSCOPE;
5278 }
5279 } # INSCOPE
5280
5281 unless (defined $i) { # has an element in scope
5282 !!!cp ('t425.1');
5283 !!!parse-error (type => 'unmatched end tag',
5284 text => $token->{tag_name}, token => $token);
5285 ## NOTE: Ignore the token.
5286 } else {
5287 ## Step 1. generate implied end tags
5288 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5289 !!!cp ('t422');
5290 pop @{$self->{open_elements}};
5291 }
5292
5293 ## Step 2.
5294 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5295 ne $token->{tag_name}) {
5296 !!!cp ('t425');
5297 !!!parse-error (type => 'unmatched end tag',
5298 text => $token->{tag_name}, token => $token);
5299 } else {
5300 !!!cp ('t426');
5301 }
5302
5303 ## Step 3.
5304 splice @{$self->{open_elements}}, $i;
5305 }
5306
5307 !!!next-token;
5308 next B;
5309 } elsif ($token->{tag_name} eq 'p') {
5310 ## NOTE: As normal, except </p> implies <p> and ...
5311
5312 ## has an element in scope
5313 my $non_optional;
5314 my $i;
5315 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5316 my $node = $self->{open_elements}->[$_];
5317 if ($node->[1] == P_EL) {
5318 !!!cp ('t410.1');
5319 $i = $_;
5320 last INSCOPE;
5321 } elsif ($node->[1] & SCOPING_EL) {
5322 !!!cp ('t411.1');
5323 last INSCOPE;
5324 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
5325 ## NOTE: |END_TAG_OPTIONAL_EL| includes "p"
5326 !!!cp ('t411.2');
5327 #
5328 } else {
5329 !!!cp ('t411.3');
5330 $non_optional ||= $node;
5331 #
5332 }
5333 } # INSCOPE
5334
5335 if (defined $i) {
5336 ## 1. Generate implied end tags
5337 #
5338
5339 ## 2. If current node != "p", parse error
5340 if ($non_optional) {
5341 !!!cp ('t412.1');
5342 !!!parse-error (type => 'not closed',
5343 text => $non_optional->[0]->manakai_local_name,
5344 token => $token);
5345 } else {
5346 !!!cp ('t414.1');
5347 }
5348
5349 ## 3. Pop
5350 splice @{$self->{open_elements}}, $i;
5351 } else {
5352 !!!cp ('t413.1');
5353 !!!parse-error (type => 'unmatched end tag',
5354 text => $token->{tag_name}, token => $token);
5355
5356 !!!cp ('t415.1');
5357 ## As if <p>, then reprocess the current token
5358 my $el;
5359 !!!create-element ($el, $HTML_NS, 'p',, $token);
5360 $insert->($el);
5361 ## NOTE: Not inserted into |$self->{open_elements}|.
5362 }
5363
5364 !!!next-token;
5365 next B;
5366 } elsif ({
5367 a => 1,
5368 b => 1, big => 1, em => 1, font => 1, i => 1,
5369 nobr => 1, s => 1, small => 1, strike => 1,
5370 strong => 1, tt => 1, u => 1,
5371 }->{$token->{tag_name}}) {
5372 !!!cp ('t427');
5373 $formatting_end_tag->($token);
5374 next B;
5375 } elsif ($token->{tag_name} eq 'br') {
5376 !!!cp ('t428');
5377 !!!parse-error (type => 'unmatched end tag',
5378 text => 'br', token => $token);
5379
5380 ## As if <br>
5381 $reconstruct_active_formatting_elements->($insert_to_current);
5382
5383 my $el;
5384 !!!create-element ($el, $HTML_NS, 'br',, $token);
5385 $insert->($el);
5386
5387 ## Ignore the token.
5388 !!!next-token;
5389 next B;
5390 } else {
5391 if ($token->{tag_name} eq 'sarcasm') {
5392 sleep 0.001; # take a deep breath
5393 }
5394
5395 ## Step 1
5396 my $node_i = -1;
5397 my $node = $self->{open_elements}->[$node_i];
5398
5399 ## Step 2
5400 S2: {
5401 my $node_tag_name = $node->[0]->manakai_local_name;
5402 $node_tag_name =~ tr/A-Z/a-z/; # for SVG camelCase tag names
5403 if ($node_tag_name eq $token->{tag_name}) {
5404 ## Step 1
5405 ## generate implied end tags
5406 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5407 !!!cp ('t430');
5408 ## NOTE: |<ruby><rt></ruby>|.
5409 ## ISSUE: <ruby><rt></rt> will also take this code path,
5410 ## which seems wrong.
5411 pop @{$self->{open_elements}};
5412 $node_i++;
5413 }
5414
5415 ## Step 2
5416 my $current_tag_name
5417 = $self->{open_elements}->[-1]->[0]->manakai_local_name;
5418 $current_tag_name =~ tr/A-Z/a-z/;
5419 if ($current_tag_name ne $token->{tag_name}) {
5420 !!!cp ('t431');
5421 ## NOTE: <x><y></x>
5422 !!!parse-error (type => 'not closed',
5423 text => $self->{open_elements}->[-1]->[0]
5424 ->manakai_local_name,
5425 token => $token);
5426 } else {
5427 !!!cp ('t432');
5428 }
5429
5430 ## Step 3
5431 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
5432
5433 !!!next-token;
5434 last S2;
5435 } else {
5436 ## Step 3
5437 if (not ($node->[1] & FORMATTING_EL) and
5438 #not $phrasing_category->{$node->[1]} and
5439 ($node->[1] & SPECIAL_EL or
5440 $node->[1] & SCOPING_EL)) {
5441 !!!cp ('t433');
5442 !!!parse-error (type => 'unmatched end tag',
5443 text => $token->{tag_name}, token => $token);
5444 ## Ignore the token
5445 !!!next-token;
5446 last S2;
5447
5448 ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
5449 ## 9.27, "a" is a child of <dd> (conforming). In
5450 ## Firefox 3.0.2, "a" is a child of <body>. In WinIE 7,
5451 ## "a" is a child of both <body> and <dd>.
5452 }
5453
5454 !!!cp ('t434');
5455 }
5456
5457 ## Step 4
5458 $node_i--;
5459 $node = $self->{open_elements}->[$node_i];
5460
5461 ## Step 5;
5462 redo S2;
5463 } # S2
5464 next B;
5465 }
5466 }
5467 next B;
5468 } continue { # B
5469 if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
5470 ## NOTE: The code below is executed in cases where it does not have
5471 ## to be, but it it is harmless even in those cases.
5472 ## has an element in scope
5473 INSCOPE: {
5474 for (reverse 0..$#{$self->{open_elements}}) {
5475 my $node = $self->{open_elements}->[$_];
5476 if ($node->[1] & FOREIGN_EL) {
5477 last INSCOPE;
5478 } elsif ($node->[1] & SCOPING_EL) {
5479 last;
5480 }
5481 }
5482
5483 ## NOTE: No foreign element in scope.
5484 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
5485 } # INSCOPE
5486 }
5487 } # B
5488
5489 ## Stop parsing # MUST
5490
5491 ## TODO: script stuffs
5492 } # _tree_construct_main
5493
5494 ## XXX: How this method is organized is somewhat out of date, although
5495 ## it still does what the current spec documents.
5496 sub set_inner_html ($$$$;$) {
5497 my $class = shift;
5498 my $node = shift; # /context/
5499 #my $s = \$_[0];
5500 my $onerror = $_[1];
5501 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
5502
5503 my $nt = $node->node_type;
5504 if ($nt == 9) { # Document (invoke the algorithm with no /context/ element)
5505 # MUST
5506
5507 ## Step 1 # MUST
5508 ## TODO: If the document has an active parser, ...
5509 ## ISSUE: There is an issue in the spec.
5510
5511 ## Step 2 # MUST
5512 my @cn = @{$node->child_nodes};
5513 for (@cn) {
5514 $node->remove_child ($_);
5515 }
5516
5517 ## Step 3, 4, 5 # MUST
5518 $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
5519 } elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element)
5520 ## TODO: If non-html element
5521
5522 ## NOTE: Most of this code is copied from |parse_string|
5523
5524 ## TODO: Support for $get_wrapper
5525
5526 ## F1. Create an HTML document.
5527 my $this_doc = $node->owner_document;
5528 my $doc = $this_doc->implementation->create_document;
5529 $doc->manakai_is_html (1);
5530
5531 ## F2. Propagate quirkness flag
5532 my $node_doc = $node->owner_document;
5533 $doc->manakai_compat_mode ($node_doc->manakai_compat_mode);
5534
5535 ## F3. Create an HTML parser
5536 my $p = $class->new;
5537 $p->{document} = $doc;
5538
5539 ## Step 8 # MUST
5540 my $i = 0;
5541 $p->{line_prev} = $p->{line} = 1;
5542 $p->{column_prev} = $p->{column} = 0;
5543 require Whatpm::Charset::DecodeHandle;
5544 my $input = Whatpm::Charset::DecodeHandle::CharString->new (\($_[0]));
5545 $input = $get_wrapper->($input);
5546 $p->{set_nc} = sub {
5547 my $self = shift;
5548
5549 my $char = '';
5550 if (defined $self->{next_nc}) {
5551 $char = $self->{next_nc};
5552 delete $self->{next_nc};
5553 $self->{nc} = ord $char;
5554 } else {
5555 $self->{char_buffer} = '';
5556 $self->{char_buffer_pos} = 0;
5557
5558 my $count = $input->manakai_read_until
5559 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/,
5560 $self->{char_buffer_pos});
5561 if ($count) {
5562 $self->{line_prev} = $self->{line};
5563 $self->{column_prev} = $self->{column};
5564 $self->{column}++;
5565 $self->{nc}
5566 = ord substr ($self->{char_buffer},
5567 $self->{char_buffer_pos}++, 1);
5568 return;
5569 }
5570
5571 if ($input->read ($char, 1)) {
5572 $self->{nc} = ord $char;
5573 } else {
5574 $self->{nc} = -1;
5575 return;
5576 }
5577 }
5578
5579 ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
5580 $p->{column}++;
5581
5582 if ($self->{nc} == 0x000A) { # LF
5583 $p->{line}++;
5584 $p->{column} = 0;
5585 !!!cp ('i1');
5586 } elsif ($self->{nc} == 0x000D) { # CR
5587 ## TODO: support for abort/streaming
5588 my $next = '';
5589 if ($input->read ($next, 1) and $next ne "\x0A") {
5590 $self->{next_nc} = $next;
5591 }
5592 $self->{nc} = 0x000A; # LF # MUST
5593 $p->{line}++;
5594 $p->{column} = 0;
5595 !!!cp ('i2');
5596 } elsif ($self->{nc} == 0x0000) { # NULL
5597 !!!cp ('i4');
5598 !!!parse-error (type => 'NULL');
5599 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5600 }
5601 };
5602
5603 $p->{read_until} = sub {
5604 #my ($scalar, $specials_range, $offset) = @_;
5605 return 0 if defined $p->{next_nc};
5606
5607 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
5608 my $offset = $_[2] || 0;
5609
5610 if ($p->{char_buffer_pos} < length $p->{char_buffer}) {
5611 pos ($p->{char_buffer}) = $p->{char_buffer_pos};
5612 if ($p->{char_buffer} =~ /\G(?>$pattern)+/) {
5613 substr ($_[0], $offset)
5614 = substr ($p->{char_buffer}, $-[0], $+[0] - $-[0]);
5615 my $count = $+[0] - $-[0];
5616 if ($count) {
5617 $p->{column} += $count;
5618 $p->{char_buffer_pos} += $count;
5619 $p->{line_prev} = $p->{line};
5620 $p->{column_prev} = $p->{column} - 1;
5621 $p->{nc} = -1;
5622 }
5623 return $count;
5624 } else {
5625 return 0;
5626 }
5627 } else {
5628 my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
5629 if ($count) {
5630 $p->{column} += $count;
5631 $p->{column_prev} += $count;
5632 $p->{nc} = -1;
5633 }
5634 return $count;
5635 }
5636 }; # $p->{read_until}
5637
5638 my $ponerror = $onerror || sub {
5639 my (%opt) = @_;
5640 my $line = $opt{line};
5641 my $column = $opt{column};
5642 if (defined $opt{token} and defined $opt{token}->{line}) {
5643 $line = $opt{token}->{line};
5644 $column = $opt{token}->{column};
5645 }
5646 warn "Parse error ($opt{type}) at line $line column $column\n";
5647 };
5648 $p->{parse_error} = sub {
5649 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
5650 };
5651
5652 my $char_onerror = sub {
5653 my (undef, $type, %opt) = @_;
5654 $ponerror->(layer => 'encode',
5655 line => $p->{line}, column => $p->{column} + 1,
5656 %opt, type => $type);
5657 }; # $char_onerror
5658 $input->onerror ($char_onerror);
5659
5660 $p->_initialize_tokenizer;
5661 $p->_initialize_tree_constructor;
5662
5663 ## F4. If /context/ is not undef...
5664
5665 ## F4.1. content model flag
5666 my $node_ln = $node->manakai_local_name;
5667 $p->{content_model} = {
5668 title => RCDATA_CONTENT_MODEL,
5669 textarea => RCDATA_CONTENT_MODEL,
5670 style => CDATA_CONTENT_MODEL,
5671 script => CDATA_CONTENT_MODEL,
5672 xmp => CDATA_CONTENT_MODEL,
5673 iframe => CDATA_CONTENT_MODEL,
5674 noembed => CDATA_CONTENT_MODEL,
5675 noframes => CDATA_CONTENT_MODEL,
5676 noscript => CDATA_CONTENT_MODEL,
5677 plaintext => PLAINTEXT_CONTENT_MODEL,
5678 }->{$node_ln};
5679 $p->{content_model} = PCDATA_CONTENT_MODEL
5680 unless defined $p->{content_model};
5681
5682 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
5683 ## TODO: Foreign element OK?
5684
5685 ## F4.2. Root |html| element
5686 my $root = $doc->create_element_ns
5687 ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5688
5689 ## F4.3.
5690 $doc->append_child ($root);
5691
5692 ## F4.4.
5693 push @{$p->{open_elements}}, [$root, $el_category->{html}];
5694
5695 undef $p->{head_element};
5696 undef $p->{head_element_inserted};
5697
5698 ## F4.5.
5699 $p->_reset_insertion_mode;
5700
5701 ## F4.6.
5702 my $anode = $node;
5703 AN: while (defined $anode) {
5704 if ($anode->node_type == 1) {
5705 my $nsuri = $anode->namespace_uri;
5706 if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5707 if ($anode->manakai_local_name eq 'form') {
5708 !!!cp ('i5');
5709 $p->{form_element} = $anode;
5710 last AN;
5711 }
5712 }
5713 }
5714 $anode = $anode->parent_node;
5715 } # AN
5716
5717 ## F.5. Set the input stream.
5718 $p->{confident} = 1; ## Confident: irrelevant.
5719
5720 ## F.6. Start the parser.
5721 {
5722 my $self = $p;
5723 !!!next-token;
5724 }
5725 $p->_tree_construction_main;
5726
5727 ## F.7.
5728 my @cn = @{$node->child_nodes};
5729 for (@cn) {
5730 $node->remove_child ($_);
5731 }
5732 ## ISSUE: mutation events? read-only?
5733
5734 ## Step 11 # MUST
5735 @cn = @{$root->child_nodes};
5736 for (@cn) {
5737 $this_doc->adopt_node ($_);
5738 $node->append_child ($_);
5739 }
5740 ## ISSUE: mutation events?
5741
5742 $p->_terminate_tree_constructor;
5743
5744 delete $p->{parse_error}; # delete loop
5745 } else {
5746 die "$0: |set_inner_html| is not defined for node of type $nt";
5747 }
5748 } # set_inner_html
5749
5750 } # tree construction stage
5751
5752 package Whatpm::HTML::RestartParser;
5753 push our @ISA, 'Error';
5754
5755 1;
5756 # $Date: 2009/09/06 13:52:06 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24