/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.123 - (hide annotations) (download) (as text)
Sun Apr 6 10:32:00 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.122: +525 -446 lines
File MIME type: application/x-wais-source
++ whatpm/Whatpm/ChangeLog	6 Apr 2008 10:31:33 -0000
	* HTML.pm.src: The ->[1] property of stack entries are now
	replaced by constants representing element category.

2008-04-06  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.123 our $VERSION=do{my @r=(q$Revision: 1.122 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.70 ## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263)
12     ## TODO: 1252 parse error (revision 1264)
13     ## TODO: 8859-11 = 874 (revision 1271)
14    
15 wakaba 1.123 sub A_EL () { 0b1 }
16     sub ADDRESS_EL () { 0b10 }
17     sub BODY_EL () { 0b100 }
18     sub BUTTON_EL () { 0b1000 }
19     sub CAPTION_EL () { 0b10000 }
20     sub DD_EL () { 0b100000 }
21     sub DIV_EL () { 0b1000000 }
22     sub DT_EL () { 0b10000000 }
23     sub FORM_EL () { 0b100000000 }
24     sub FORMATTING_EL () { 0b1000000000 }
25     sub FRAMESET_EL () { 0b10000000000 }
26     sub HEADING_EL () { 0b100000000000 }
27     sub HTML_EL () { 0b1000000000000 }
28     sub LI_EL () { 0b10000000000000 }
29     sub NOBR_EL () { 0b100000000000000 }
30     sub OPTION_EL () { 0b1000000000000000 }
31     sub OPTGROUP_EL () { 0b10000000000000000 }
32     sub P_EL () { 0b100000000000000000 }
33     sub SELECT_EL () { 0b1000000000000000000 }
34     sub TABLE_EL () { 0b10000000000000000000 }
35     sub TABLE_CELL_EL () { 0b100000000000000000000 }
36     sub TABLE_ROW_EL () { 0b1000000000000000000000 }
37     sub TABLE_ROW_GROUP_EL () { 0b10000000000000000000000 }
38     sub MISC_SCOPING_EL () { 0b100000000000000000000000 }
39     sub MISC_SPECIAL_EL () { 0b1000000000000000000000000 }
40    
41     sub TABLE_ROWS_EL () {
42     TABLE_EL |
43     TABLE_ROW_EL |
44     TABLE_ROW_GROUP_EL
45     }
46    
47     sub END_TAG_OPTIONAL_EL () {
48     DD_EL |
49     DT_EL |
50     LI_EL |
51     P_EL
52     }
53    
54     sub ALL_END_TAG_OPTIONAL_EL () {
55     END_TAG_OPTIONAL_EL |
56     BODY_EL |
57     HTML_EL |
58     TABLE_CELL_EL |
59     TABLE_ROW_EL |
60     TABLE_ROW_GROUP_EL
61     }
62    
63     sub SCOPING_EL () {
64     BUTTON_EL |
65     CAPTION_EL |
66     HTML_EL |
67     TABLE_EL |
68     TABLE_CELL_EL |
69     MISC_SCOPING_EL
70     }
71    
72     sub TABLE_SCOPING_EL () {
73     HTML_EL |
74     TABLE_EL
75     }
76    
77     sub TABLE_ROWS_SCOPING_EL () {
78     HTML_EL |
79     TABLE_ROW_GROUP_EL
80     }
81    
82     sub TABLE_ROW_SCOPING_EL () {
83     HTML_EL |
84     TABLE_ROW_EL
85     }
86    
87     sub SPECIAL_EL () {
88     ADDRESS_EL |
89     BODY_EL |
90     DIV_EL |
91     END_TAG_OPTIONAL_EL |
92     FORM_EL |
93     FRAMESET_EL |
94     HEADING_EL |
95     OPTION_EL |
96     OPTGROUP_EL |
97     SELECT_EL |
98     TABLE_ROW_EL |
99     TABLE_ROW_GROUP_EL |
100     MISC_SPECIAL_EL
101     }
102    
103     my $el_category = {
104     a => A_EL | FORMATTING_EL,
105     address => ADDRESS_EL,
106     applet => MISC_SCOPING_EL,
107     area => MISC_SPECIAL_EL,
108     b => FORMATTING_EL,
109     base => MISC_SPECIAL_EL,
110     basefont => MISC_SPECIAL_EL,
111     bgsound => MISC_SPECIAL_EL,
112     big => FORMATTING_EL,
113     blockquote => MISC_SPECIAL_EL,
114     body => BODY_EL,
115     br => MISC_SPECIAL_EL,
116     button => BUTTON_EL,
117     caption => CAPTION_EL,
118     center => MISC_SPECIAL_EL,
119     col => MISC_SPECIAL_EL,
120     colgroup => MISC_SPECIAL_EL,
121     dd => DD_EL,
122     dir => MISC_SPECIAL_EL,
123     div => DIV_EL,
124     dl => MISC_SPECIAL_EL,
125     dt => DT_EL,
126     em => FORMATTING_EL,
127     embed => MISC_SPECIAL_EL,
128     fieldset => MISC_SPECIAL_EL,
129     font => FORMATTING_EL,
130     form => FORM_EL,
131     frame => MISC_SPECIAL_EL,
132     frameset => FRAMESET_EL,
133     h1 => HEADING_EL,
134     h2 => HEADING_EL,
135     h3 => HEADING_EL,
136     h4 => HEADING_EL,
137     h5 => HEADING_EL,
138     h6 => HEADING_EL,
139     head => MISC_SPECIAL_EL,
140     hr => MISC_SPECIAL_EL,
141     html => HTML_EL,
142     i => FORMATTING_EL,
143     iframe => MISC_SPECIAL_EL,
144     img => MISC_SPECIAL_EL,
145     input => MISC_SPECIAL_EL,
146     isindex => MISC_SPECIAL_EL,
147     li => LI_EL,
148     link => MISC_SPECIAL_EL,
149     listing => MISC_SPECIAL_EL,
150     marquee => MISC_SCOPING_EL,
151     menu => MISC_SPECIAL_EL,
152     meta => MISC_SPECIAL_EL,
153     nobr => NOBR_EL | FORMATTING_EL,
154     noembed => MISC_SPECIAL_EL,
155     noframes => MISC_SPECIAL_EL,
156     noscript => MISC_SPECIAL_EL,
157     object => MISC_SCOPING_EL,
158     ol => MISC_SPECIAL_EL,
159     optgroup => OPTGROUP_EL,
160     option => OPTION_EL,
161     p => P_EL,
162     param => MISC_SPECIAL_EL,
163     plaintext => MISC_SPECIAL_EL,
164     pre => MISC_SPECIAL_EL,
165     s => FORMATTING_EL,
166     script => MISC_SPECIAL_EL,
167     select => SELECT_EL,
168     small => FORMATTING_EL,
169     spacer => MISC_SPECIAL_EL,
170     strike => FORMATTING_EL,
171     strong => FORMATTING_EL,
172     style => MISC_SPECIAL_EL,
173     table => TABLE_EL,
174     tbody => TABLE_ROW_GROUP_EL,
175     td => TABLE_CELL_EL,
176     textarea => MISC_SPECIAL_EL,
177     tfoot => TABLE_ROW_GROUP_EL,
178     th => TABLE_CELL_EL,
179     thead => TABLE_ROW_GROUP_EL,
180     title => MISC_SPECIAL_EL,
181     tr => TABLE_ROW_EL,
182     tt => FORMATTING_EL,
183     u => FORMATTING_EL,
184     ul => MISC_SPECIAL_EL,
185     wbr => MISC_SPECIAL_EL,
186     };
187    
188 wakaba 1.1 my $permitted_slash_tag_name = {
189     base => 1,
190     link => 1,
191     meta => 1,
192     hr => 1,
193     br => 1,
194 wakaba 1.71 img => 1,
195 wakaba 1.1 embed => 1,
196     param => 1,
197     area => 1,
198     col => 1,
199     input => 1,
200     };
201    
202 wakaba 1.4 my $c1_entity_char = {
203 wakaba 1.10 0x80 => 0x20AC,
204     0x81 => 0xFFFD,
205     0x82 => 0x201A,
206     0x83 => 0x0192,
207     0x84 => 0x201E,
208     0x85 => 0x2026,
209     0x86 => 0x2020,
210     0x87 => 0x2021,
211     0x88 => 0x02C6,
212     0x89 => 0x2030,
213     0x8A => 0x0160,
214     0x8B => 0x2039,
215     0x8C => 0x0152,
216     0x8D => 0xFFFD,
217     0x8E => 0x017D,
218     0x8F => 0xFFFD,
219     0x90 => 0xFFFD,
220     0x91 => 0x2018,
221     0x92 => 0x2019,
222     0x93 => 0x201C,
223     0x94 => 0x201D,
224     0x95 => 0x2022,
225     0x96 => 0x2013,
226     0x97 => 0x2014,
227     0x98 => 0x02DC,
228     0x99 => 0x2122,
229     0x9A => 0x0161,
230     0x9B => 0x203A,
231     0x9C => 0x0153,
232     0x9D => 0xFFFD,
233     0x9E => 0x017E,
234     0x9F => 0x0178,
235 wakaba 1.4 }; # $c1_entity_char
236 wakaba 1.1
237     my $special_category = {
238     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
239     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
240     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
241     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
242     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
243     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
244     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
245     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
246     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
247     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
248     };
249     my $scoping_category = {
250 wakaba 1.103 applet => 1, button => 1, caption => 1, html => 1, marquee => 1, object => 1,
251 wakaba 1.1 table => 1, td => 1, th => 1,
252     };
253     my $formatting_category = {
254     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
255     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
256     };
257     # $phrasing_category: all other elements
258    
259 wakaba 1.63 sub parse_byte_string ($$$$;$) {
260     my $self = ref $_[0] ? shift : shift->new;
261     my $charset = shift;
262     my $bytes_s = ref $_[0] ? $_[0] : \($_[0]);
263     my $s;
264    
265     if (defined $charset) {
266 wakaba 1.64 require Encode; ## TODO: decode(utf8) don't delete BOM
267 wakaba 1.63 $s = \ (Encode::decode ($charset, $$bytes_s));
268 wakaba 1.64 $self->{input_encoding} = lc $charset; ## TODO: normalize name
269 wakaba 1.63 $self->{confident} = 1;
270     } else {
271 wakaba 1.65 ## TODO: Implement HTML5 detection algorithm
272     require Whatpm::Charset::UniversalCharDet;
273     $charset = Whatpm::Charset::UniversalCharDet->detect_byte_string
274     (substr ($$bytes_s, 0, 1024));
275     $charset ||= 'windows-1252';
276 wakaba 1.64 $s = \ (Encode::decode ($charset, $$bytes_s));
277     $self->{input_encoding} = $charset;
278 wakaba 1.63 $self->{confident} = 0;
279     }
280    
281     $self->{change_encoding} = sub {
282     my $self = shift;
283     my $charset = lc shift;
284 wakaba 1.114 my $token = shift;
285 wakaba 1.63 ## TODO: if $charset is supported
286     ## TODO: normalize charset name
287    
288     ## "Change the encoding" algorithm:
289    
290     ## Step 1
291     if ($charset eq 'utf-16') { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8?
292     $charset = 'utf-8';
293     }
294    
295     ## Step 2
296     if (defined $self->{input_encoding} and
297     $self->{input_encoding} eq $charset) {
298     $self->{confident} = 1;
299     return;
300     }
301    
302 wakaba 1.64 !!!parse-error (type => 'charset label detected:'.$self->{input_encoding}.
303 wakaba 1.114 ':'.$charset, level => 'w', token => $token);
304 wakaba 1.63
305     ## Step 3
306     # if (can) {
307     ## change the encoding on the fly.
308     #$self->{confident} = 1;
309     #return;
310     # }
311    
312     ## Step 4
313     throw Whatpm::HTML::RestartParser (charset => $charset);
314     }; # $self->{change_encoding}
315    
316     my @args = @_; shift @args; # $s
317     my $return;
318     try {
319     $return = $self->parse_char_string ($s, @args);
320     } catch Whatpm::HTML::RestartParser with {
321     my $charset = shift->{charset};
322     $s = \ (Encode::decode ($charset, $$bytes_s));
323 wakaba 1.64 $self->{input_encoding} = $charset; ## TODO: normalize
324 wakaba 1.63 $self->{confident} = 1;
325     $return = $self->parse_char_string ($s, @args);
326     };
327     return $return;
328     } # parse_byte_string
329    
330 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
331     ## and the HTML layer MUST ignore it. However, we does strip BOM in
332     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
333     ## because the core part of our HTML parser expects a string of character,
334     ## not a string of bytes or code units or anything which might contain a BOM.
335     ## Therefore, any parser interface that accepts a string of bytes,
336     ## such as |parse_byte_string| in this module, must ensure that it does
337     ## strip the BOM and never strip any ZWNBSP.
338    
339 wakaba 1.63 *parse_char_string = \&parse_string;
340    
341 wakaba 1.1 sub parse_string ($$$;$) {
342 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
343     my $s = ref $_[0] ? $_[0] : \($_[0]);
344 wakaba 1.1 $self->{document} = $_[1];
345 wakaba 1.63 @{$self->{document}->child_nodes} = ();
346 wakaba 1.1
347 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
348    
349 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
350 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
351     if defined $self->{input_encoding};
352 wakaba 1.63
353 wakaba 1.1 my $i = 0;
354 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
355     $self->{column_prev} = $self->{column} = 0;
356 wakaba 1.76 $self->{set_next_char} = sub {
357 wakaba 1.1 my $self = shift;
358 wakaba 1.13
359 wakaba 1.76 pop @{$self->{prev_char}};
360     unshift @{$self->{prev_char}}, $self->{next_char};
361 wakaba 1.13
362 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
363     $self->{next_char} = ord substr $$s, $i++, 1;
364 wakaba 1.112
365     ($self->{line_prev}, $self->{column_prev})
366     = ($self->{line}, $self->{column});
367     $self->{column}++;
368 wakaba 1.1
369 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
370 wakaba 1.112 $self->{line}++;
371     $self->{column} = 0;
372 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
373 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
374 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
375 wakaba 1.112 $self->{line}++;
376     $self->{column} = 0;
377 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
378     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
379     } elsif ($self->{next_char} == 0x0000) { # NULL
380 wakaba 1.8 !!!parse-error (type => 'NULL');
381 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
382 wakaba 1.1 }
383     };
384 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
385     $self->{next_char} = -1;
386 wakaba 1.1
387 wakaba 1.3 my $onerror = $_[2] || sub {
388     my (%opt) = @_;
389 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
390     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
391     warn "Parse error ($opt{type}) at line $line column $column\n";
392 wakaba 1.3 };
393     $self->{parse_error} = sub {
394 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
395 wakaba 1.1 };
396    
397     $self->_initialize_tokenizer;
398     $self->_initialize_tree_constructor;
399     $self->_construct_tree;
400     $self->_terminate_tree_constructor;
401    
402 wakaba 1.112 delete $self->{parse_error}; # remove loop
403    
404 wakaba 1.1 return $self->{document};
405     } # parse_string
406    
407     sub new ($) {
408     my $class = shift;
409     my $self = bless {}, $class;
410 wakaba 1.76 $self->{set_next_char} = sub {
411     $self->{next_char} = -1;
412 wakaba 1.1 };
413     $self->{parse_error} = sub {
414     #
415     };
416 wakaba 1.63 $self->{change_encoding} = sub {
417     # if ($_[0] is a supported encoding) {
418     # run "change the encoding" algorithm;
419     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
420     # }
421     };
422 wakaba 1.61 $self->{application_cache_selection} = sub {
423     #
424     };
425 wakaba 1.1 return $self;
426     } # new
427    
428 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
429     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
430     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
431    
432     sub PLAINTEXT_CONTENT_MODEL () { 0 }
433     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
434     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
435     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
436    
437 wakaba 1.57 sub DATA_STATE () { 0 }
438     sub ENTITY_DATA_STATE () { 1 }
439     sub TAG_OPEN_STATE () { 2 }
440     sub CLOSE_TAG_OPEN_STATE () { 3 }
441     sub TAG_NAME_STATE () { 4 }
442     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
443     sub ATTRIBUTE_NAME_STATE () { 6 }
444     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
445     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
446     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
447     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
448     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
449     sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
450     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
451     sub COMMENT_START_STATE () { 14 }
452     sub COMMENT_START_DASH_STATE () { 15 }
453     sub COMMENT_STATE () { 16 }
454     sub COMMENT_END_STATE () { 17 }
455     sub COMMENT_END_DASH_STATE () { 18 }
456     sub BOGUS_COMMENT_STATE () { 19 }
457     sub DOCTYPE_STATE () { 20 }
458     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
459     sub DOCTYPE_NAME_STATE () { 22 }
460     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
461     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
462     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
463     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
464     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
465     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
466     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
467     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
468     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
469     sub BOGUS_DOCTYPE_STATE () { 32 }
470 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
471 wakaba 1.57
472 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
473     sub COMMENT_TOKEN () { 2 }
474     sub START_TAG_TOKEN () { 3 }
475     sub END_TAG_TOKEN () { 4 }
476     sub END_OF_FILE_TOKEN () { 5 }
477     sub CHARACTER_TOKEN () { 6 }
478    
479 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
480     sub HEAD_IMS () { 0b1000 }
481     sub BODY_IMS () { 0b10000 }
482 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
483 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
484 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
485 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
486     sub FRAME_IMS () { 0b1000000000 }
487 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
488 wakaba 1.54
489 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
490    
491     ## NOTE: "after after body" insertion mode.
492 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
493 wakaba 1.84
494     ## NOTE: "after after frameset" insertion mode.
495 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
496 wakaba 1.84
497 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
498     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
499     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
500     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
501     sub IN_BODY_IM () { BODY_IMS }
502 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
503     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
504     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
505     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
506 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
507     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
508     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
509     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
510 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
511     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
512 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
513    
514 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
515    
516     sub _initialize_tokenizer ($) {
517     my $self = shift;
518 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
519 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
520 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
521     undef $self->{current_attribute};
522     undef $self->{last_emitted_start_tag_name};
523     undef $self->{last_attribute_value_state};
524     $self->{char} = [];
525 wakaba 1.76 # $self->{next_char}
526 wakaba 1.1 !!!next-input-character;
527     $self->{token} = [];
528 wakaba 1.18 # $self->{escape}
529 wakaba 1.1 } # _initialize_tokenizer
530    
531     ## A token has:
532 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
533     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
534     ## ->{name} (DOCTYPE_TOKEN)
535     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
536     ## ->{public_identifier} (DOCTYPE_TOKEN)
537     ## ->{system_identifier} (DOCTYPE_TOKEN)
538 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
539 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
540 wakaba 1.66 ## ->{name}
541     ## ->{value}
542     ## ->{has_reference} == 1 or 0
543 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
544 wakaba 1.1
545     ## Emitted token MUST immediately be handled by the tree construction state.
546    
547     ## Before each step, UA MAY check to see if either one of the scripts in
548     ## "list of scripts that will execute as soon as possible" or the first
549     ## script in the "list of scripts that will execute asynchronously",
550     ## has completed loading. If one has, then it MUST be executed
551     ## and removed from the list.
552    
553 wakaba 1.59 ## NOTE: HTML5 "Writing HTML documents" section, applied to
554     ## documents and not to user agents and conformance checkers,
555     ## contains some requirements that are not detected by the
556     ## parsing algorithm:
557     ## - Some requirements on character encoding declarations. ## TODO
558     ## - "Elements MUST NOT contain content that their content model disallows."
559     ## ... Some are parse error, some are not (will be reported by c.c.).
560     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
561     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
562     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
563    
564     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
565     ## be detected by the HTML5 parsing algorithm:
566     ## - Text,
567    
568 wakaba 1.1 sub _get_next_token ($) {
569     my $self = shift;
570     if (@{$self->{token}}) {
571     return shift @{$self->{token}};
572     }
573    
574     A: {
575 wakaba 1.57 if ($self->{state} == DATA_STATE) {
576 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
577 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
578     not $self->{escape}) {
579 wakaba 1.77 !!!cp (1);
580 wakaba 1.57 $self->{state} = ENTITY_DATA_STATE;
581 wakaba 1.1 !!!next-input-character;
582     redo A;
583     } else {
584 wakaba 1.77 !!!cp (2);
585 wakaba 1.1 #
586     }
587 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
588 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
589 wakaba 1.13 unless ($self->{escape}) {
590 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
591     $self->{prev_char}->[1] == 0x0021 and # !
592     $self->{prev_char}->[2] == 0x003C) { # <
593 wakaba 1.77 !!!cp (3);
594 wakaba 1.13 $self->{escape} = 1;
595 wakaba 1.77 } else {
596     !!!cp (4);
597 wakaba 1.13 }
598 wakaba 1.77 } else {
599     !!!cp (5);
600 wakaba 1.13 }
601     }
602    
603     #
604 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
605 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
606     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
607 wakaba 1.13 not $self->{escape})) {
608 wakaba 1.77 !!!cp (6);
609 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
610 wakaba 1.1 !!!next-input-character;
611     redo A;
612     } else {
613 wakaba 1.77 !!!cp (7);
614 wakaba 1.1 #
615     }
616 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
617 wakaba 1.13 if ($self->{escape} and
618 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
619 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
620     $self->{prev_char}->[1] == 0x002D) { # -
621 wakaba 1.77 !!!cp (8);
622 wakaba 1.13 delete $self->{escape};
623 wakaba 1.77 } else {
624     !!!cp (9);
625 wakaba 1.13 }
626 wakaba 1.77 } else {
627     !!!cp (10);
628 wakaba 1.13 }
629    
630     #
631 wakaba 1.76 } elsif ($self->{next_char} == -1) {
632 wakaba 1.77 !!!cp (11);
633 wakaba 1.112 !!!emit ({type => END_OF_FILE_TOKEN,
634     line => $self->{line}, column => $self->{column}});
635 wakaba 1.1 last A; ## TODO: ok?
636 wakaba 1.77 } else {
637     !!!cp (12);
638 wakaba 1.1 }
639     # Anything else
640 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
641 wakaba 1.112 data => chr $self->{next_char},
642 wakaba 1.120 line => $self->{line}, column => $self->{column},
643 wakaba 1.118 };
644 wakaba 1.1 ## Stay in the data state
645     !!!next-input-character;
646    
647     !!!emit ($token);
648    
649     redo A;
650 wakaba 1.57 } elsif ($self->{state} == ENTITY_DATA_STATE) {
651 wakaba 1.1 ## (cannot happen in CDATA state)
652 wakaba 1.112
653 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
654 wakaba 1.1
655 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
656 wakaba 1.1
657 wakaba 1.57 $self->{state} = DATA_STATE;
658 wakaba 1.1 # next-input-character is already done
659    
660     unless (defined $token) {
661 wakaba 1.77 !!!cp (13);
662 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '&',
663 wakaba 1.120 line => $l, column => $c,
664 wakaba 1.118 });
665 wakaba 1.1 } else {
666 wakaba 1.77 !!!cp (14);
667 wakaba 1.1 !!!emit ($token);
668     }
669    
670     redo A;
671 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
672 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
673 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
674 wakaba 1.77 !!!cp (15);
675 wakaba 1.1 !!!next-input-character;
676 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
677 wakaba 1.1 redo A;
678     } else {
679 wakaba 1.77 !!!cp (16);
680 wakaba 1.1 ## reconsume
681 wakaba 1.57 $self->{state} = DATA_STATE;
682 wakaba 1.1
683 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
684 wakaba 1.120 line => $self->{line_prev},
685     column => $self->{column_prev},
686 wakaba 1.118 });
687 wakaba 1.1
688     redo A;
689     }
690 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
691 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
692 wakaba 1.77 !!!cp (17);
693 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
694 wakaba 1.1 !!!next-input-character;
695     redo A;
696 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
697 wakaba 1.77 !!!cp (18);
698 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
699 wakaba 1.1 !!!next-input-character;
700     redo A;
701 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
702     $self->{next_char} <= 0x005A) { # A..Z
703 wakaba 1.77 !!!cp (19);
704 wakaba 1.1 $self->{current_token}
705 wakaba 1.55 = {type => START_TAG_TOKEN,
706 wakaba 1.112 tag_name => chr ($self->{next_char} + 0x0020),
707     line => $self->{line_prev},
708     column => $self->{column_prev}};
709 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
710 wakaba 1.1 !!!next-input-character;
711     redo A;
712 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
713     $self->{next_char} <= 0x007A) { # a..z
714 wakaba 1.77 !!!cp (20);
715 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
716 wakaba 1.112 tag_name => chr ($self->{next_char}),
717     line => $self->{line_prev},
718     column => $self->{column_prev}};
719 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
720 wakaba 1.1 !!!next-input-character;
721     redo A;
722 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
723 wakaba 1.77 !!!cp (21);
724 wakaba 1.115 !!!parse-error (type => 'empty start tag',
725     line => $self->{line_prev},
726     column => $self->{column_prev});
727 wakaba 1.57 $self->{state} = DATA_STATE;
728 wakaba 1.1 !!!next-input-character;
729    
730 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
731 wakaba 1.120 line => $self->{line_prev},
732     column => $self->{column_prev},
733 wakaba 1.118 });
734 wakaba 1.1
735     redo A;
736 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
737 wakaba 1.77 !!!cp (22);
738 wakaba 1.115 !!!parse-error (type => 'pio',
739     line => $self->{line_prev},
740     column => $self->{column_prev});
741 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
742 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
743 wakaba 1.120 line => $self->{line_prev},
744     column => $self->{column_prev},
745 wakaba 1.118 };
746 wakaba 1.76 ## $self->{next_char} is intentionally left as is
747 wakaba 1.1 redo A;
748     } else {
749 wakaba 1.77 !!!cp (23);
750 wakaba 1.3 !!!parse-error (type => 'bare stago');
751 wakaba 1.57 $self->{state} = DATA_STATE;
752 wakaba 1.1 ## reconsume
753    
754 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
755 wakaba 1.120 line => $self->{line_prev},
756     column => $self->{column_prev},
757 wakaba 1.118 });
758 wakaba 1.1
759     redo A;
760     }
761     } else {
762 wakaba 1.40 die "$0: $self->{content_model} in tag open";
763 wakaba 1.1 }
764 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
765 wakaba 1.113 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
766 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
767 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
768 wakaba 1.112
769 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
770 wakaba 1.23 my @next_char;
771     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
772 wakaba 1.76 push @next_char, $self->{next_char};
773 wakaba 1.23 my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
774     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
775 wakaba 1.76 if ($self->{next_char} == $c or $self->{next_char} == $C) {
776 wakaba 1.77 !!!cp (24);
777 wakaba 1.23 !!!next-input-character;
778     next TAGNAME;
779     } else {
780 wakaba 1.77 !!!cp (25);
781 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
782 wakaba 1.23 !!!back-next-input-character (@next_char);
783 wakaba 1.57 $self->{state} = DATA_STATE;
784 wakaba 1.23
785 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
786 wakaba 1.120 line => $l, column => $c,
787 wakaba 1.118 });
788 wakaba 1.23
789     redo A;
790     }
791     }
792 wakaba 1.76 push @next_char, $self->{next_char};
793 wakaba 1.23
794 wakaba 1.76 unless ($self->{next_char} == 0x0009 or # HT
795     $self->{next_char} == 0x000A or # LF
796     $self->{next_char} == 0x000B or # VT
797     $self->{next_char} == 0x000C or # FF
798     $self->{next_char} == 0x0020 or # SP
799     $self->{next_char} == 0x003E or # >
800     $self->{next_char} == 0x002F or # /
801     $self->{next_char} == -1) {
802 wakaba 1.77 !!!cp (26);
803 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
804 wakaba 1.1 !!!back-next-input-character (@next_char);
805 wakaba 1.57 $self->{state} = DATA_STATE;
806 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
807 wakaba 1.120 line => $l, column => $c,
808 wakaba 1.118 });
809 wakaba 1.1 redo A;
810 wakaba 1.23 } else {
811 wakaba 1.77 !!!cp (27);
812 wakaba 1.76 $self->{next_char} = shift @next_char;
813 wakaba 1.23 !!!back-next-input-character (@next_char);
814     # and consume...
815 wakaba 1.1 }
816 wakaba 1.23 } else {
817     ## No start tag token has ever been emitted
818 wakaba 1.77 !!!cp (28);
819 wakaba 1.23 # next-input-character is already done
820 wakaba 1.57 $self->{state} = DATA_STATE;
821 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
822 wakaba 1.120 line => $l, column => $c,
823 wakaba 1.118 });
824 wakaba 1.1 redo A;
825     }
826     }
827    
828 wakaba 1.76 if (0x0041 <= $self->{next_char} and
829     $self->{next_char} <= 0x005A) { # A..Z
830 wakaba 1.77 !!!cp (29);
831 wakaba 1.112 $self->{current_token}
832     = {type => END_TAG_TOKEN,
833     tag_name => chr ($self->{next_char} + 0x0020),
834     line => $l, column => $c};
835 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
836 wakaba 1.1 !!!next-input-character;
837     redo A;
838 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
839     $self->{next_char} <= 0x007A) { # a..z
840 wakaba 1.77 !!!cp (30);
841 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
842 wakaba 1.112 tag_name => chr ($self->{next_char}),
843     line => $l, column => $c};
844 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
845 wakaba 1.1 !!!next-input-character;
846     redo A;
847 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
848 wakaba 1.77 !!!cp (31);
849 wakaba 1.115 !!!parse-error (type => 'empty end tag',
850     line => $self->{line_prev}, ## "<" in "</>"
851     column => $self->{column_prev} - 1);
852 wakaba 1.57 $self->{state} = DATA_STATE;
853 wakaba 1.1 !!!next-input-character;
854     redo A;
855 wakaba 1.76 } elsif ($self->{next_char} == -1) {
856 wakaba 1.77 !!!cp (32);
857 wakaba 1.3 !!!parse-error (type => 'bare etago');
858 wakaba 1.57 $self->{state} = DATA_STATE;
859 wakaba 1.1 # reconsume
860    
861 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
862 wakaba 1.120 line => $l, column => $c,
863 wakaba 1.118 });
864 wakaba 1.1
865     redo A;
866     } else {
867 wakaba 1.77 !!!cp (33);
868 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
869 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
870 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
871 wakaba 1.120 line => $self->{line_prev}, # "<" of "</"
872     column => $self->{column_prev} - 1,
873 wakaba 1.118 };
874 wakaba 1.76 ## $self->{next_char} is intentionally left as is
875 wakaba 1.1 redo A;
876     }
877 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
878 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
879     $self->{next_char} == 0x000A or # LF
880     $self->{next_char} == 0x000B or # VT
881     $self->{next_char} == 0x000C or # FF
882     $self->{next_char} == 0x0020) { # SP
883 wakaba 1.77 !!!cp (34);
884 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
885 wakaba 1.1 !!!next-input-character;
886     redo A;
887 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
888 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
889 wakaba 1.77 !!!cp (35);
890 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
891 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
892 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
893 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
894     # ## NOTE: This should never be reached.
895     # !!! cp (36);
896     # !!! parse-error (type => 'end tag attribute');
897     #} else {
898 wakaba 1.77 !!!cp (37);
899 wakaba 1.78 #}
900 wakaba 1.1 } else {
901     die "$0: $self->{current_token}->{type}: Unknown token type";
902     }
903 wakaba 1.57 $self->{state} = DATA_STATE;
904 wakaba 1.1 !!!next-input-character;
905    
906     !!!emit ($self->{current_token}); # start tag or end tag
907    
908     redo A;
909 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
910     $self->{next_char} <= 0x005A) { # A..Z
911 wakaba 1.77 !!!cp (38);
912 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
913 wakaba 1.1 # start tag or end tag
914     ## Stay in this state
915     !!!next-input-character;
916     redo A;
917 wakaba 1.76 } elsif ($self->{next_char} == -1) {
918 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
919 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
920 wakaba 1.77 !!!cp (39);
921 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
922 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
923 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
924 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
925     # ## NOTE: This state should never be reached.
926     # !!! cp (40);
927     # !!! parse-error (type => 'end tag attribute');
928     #} else {
929 wakaba 1.77 !!!cp (41);
930 wakaba 1.78 #}
931 wakaba 1.1 } else {
932     die "$0: $self->{current_token}->{type}: Unknown token type";
933     }
934 wakaba 1.57 $self->{state} = DATA_STATE;
935 wakaba 1.1 # reconsume
936    
937     !!!emit ($self->{current_token}); # start tag or end tag
938    
939     redo A;
940 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
941 wakaba 1.1 !!!next-input-character;
942 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
943 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
944 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
945     # permitted slash
946 wakaba 1.77 !!!cp (42);
947 wakaba 1.1 #
948     } else {
949 wakaba 1.77 !!!cp (43);
950 wakaba 1.3 !!!parse-error (type => 'nestc');
951 wakaba 1.1 }
952 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
953 wakaba 1.1 # next-input-character is already done
954     redo A;
955     } else {
956 wakaba 1.77 !!!cp (44);
957 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
958 wakaba 1.1 # start tag or end tag
959     ## Stay in the state
960     !!!next-input-character;
961     redo A;
962     }
963 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
964 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
965     $self->{next_char} == 0x000A or # LF
966     $self->{next_char} == 0x000B or # VT
967     $self->{next_char} == 0x000C or # FF
968     $self->{next_char} == 0x0020) { # SP
969 wakaba 1.77 !!!cp (45);
970 wakaba 1.1 ## Stay in the state
971     !!!next-input-character;
972     redo A;
973 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
974 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
975 wakaba 1.77 !!!cp (46);
976 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
977 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
978 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
979 wakaba 1.1 if ($self->{current_token}->{attributes}) {
980 wakaba 1.77 !!!cp (47);
981 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
982 wakaba 1.77 } else {
983     !!!cp (48);
984 wakaba 1.1 }
985     } else {
986     die "$0: $self->{current_token}->{type}: Unknown token type";
987     }
988 wakaba 1.57 $self->{state} = DATA_STATE;
989 wakaba 1.1 !!!next-input-character;
990    
991     !!!emit ($self->{current_token}); # start tag or end tag
992    
993     redo A;
994 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
995     $self->{next_char} <= 0x005A) { # A..Z
996 wakaba 1.77 !!!cp (49);
997 wakaba 1.119 $self->{current_attribute}
998     = {name => chr ($self->{next_char} + 0x0020),
999     value => '',
1000     line => $self->{line}, column => $self->{column}};
1001 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1002 wakaba 1.1 !!!next-input-character;
1003     redo A;
1004 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1005 wakaba 1.1 !!!next-input-character;
1006 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
1007 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
1008 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1009     # permitted slash
1010 wakaba 1.77 !!!cp (50);
1011 wakaba 1.1 #
1012     } else {
1013 wakaba 1.77 !!!cp (51);
1014 wakaba 1.3 !!!parse-error (type => 'nestc');
1015 wakaba 1.1 }
1016     ## Stay in the state
1017     # next-input-character is already done
1018     redo A;
1019 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1020 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1021 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1022 wakaba 1.77 !!!cp (52);
1023 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1024 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1025 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1026 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1027 wakaba 1.77 !!!cp (53);
1028 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1029 wakaba 1.77 } else {
1030     !!!cp (54);
1031 wakaba 1.1 }
1032     } else {
1033     die "$0: $self->{current_token}->{type}: Unknown token type";
1034     }
1035 wakaba 1.57 $self->{state} = DATA_STATE;
1036 wakaba 1.1 # reconsume
1037    
1038     !!!emit ($self->{current_token}); # start tag or end tag
1039    
1040     redo A;
1041     } else {
1042 wakaba 1.72 if ({
1043     0x0022 => 1, # "
1044     0x0027 => 1, # '
1045     0x003D => 1, # =
1046 wakaba 1.76 }->{$self->{next_char}}) {
1047 wakaba 1.77 !!!cp (55);
1048 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1049 wakaba 1.77 } else {
1050     !!!cp (56);
1051 wakaba 1.72 }
1052 wakaba 1.119 $self->{current_attribute}
1053     = {name => chr ($self->{next_char}),
1054     value => '',
1055     line => $self->{line}, column => $self->{column}};
1056 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1057 wakaba 1.1 !!!next-input-character;
1058     redo A;
1059     }
1060 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1061 wakaba 1.1 my $before_leave = sub {
1062     if (exists $self->{current_token}->{attributes} # start tag or end tag
1063     ->{$self->{current_attribute}->{name}}) { # MUST
1064 wakaba 1.77 !!!cp (57);
1065 wakaba 1.120 !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
1066 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
1067     } else {
1068 wakaba 1.77 !!!cp (58);
1069 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
1070     = $self->{current_attribute};
1071     }
1072     }; # $before_leave
1073    
1074 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1075     $self->{next_char} == 0x000A or # LF
1076     $self->{next_char} == 0x000B or # VT
1077     $self->{next_char} == 0x000C or # FF
1078     $self->{next_char} == 0x0020) { # SP
1079 wakaba 1.77 !!!cp (59);
1080 wakaba 1.1 $before_leave->();
1081 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
1082 wakaba 1.1 !!!next-input-character;
1083     redo A;
1084 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1085 wakaba 1.77 !!!cp (60);
1086 wakaba 1.1 $before_leave->();
1087 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1088 wakaba 1.1 !!!next-input-character;
1089     redo A;
1090 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1091 wakaba 1.1 $before_leave->();
1092 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1093 wakaba 1.77 !!!cp (61);
1094 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1095 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1096 wakaba 1.77 !!!cp (62);
1097 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1098 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1099 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1100 wakaba 1.1 }
1101     } else {
1102     die "$0: $self->{current_token}->{type}: Unknown token type";
1103     }
1104 wakaba 1.57 $self->{state} = DATA_STATE;
1105 wakaba 1.1 !!!next-input-character;
1106    
1107     !!!emit ($self->{current_token}); # start tag or end tag
1108    
1109     redo A;
1110 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1111     $self->{next_char} <= 0x005A) { # A..Z
1112 wakaba 1.77 !!!cp (63);
1113 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
1114 wakaba 1.1 ## Stay in the state
1115     !!!next-input-character;
1116     redo A;
1117 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1118 wakaba 1.1 $before_leave->();
1119     !!!next-input-character;
1120 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
1121 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
1122 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1123     # permitted slash
1124 wakaba 1.77 !!!cp (64);
1125 wakaba 1.1 #
1126     } else {
1127 wakaba 1.77 !!!cp (65);
1128 wakaba 1.3 !!!parse-error (type => 'nestc');
1129 wakaba 1.1 }
1130 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1131 wakaba 1.1 # next-input-character is already done
1132     redo A;
1133 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1134 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1135 wakaba 1.1 $before_leave->();
1136 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1137 wakaba 1.77 !!!cp (66);
1138 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1139 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1140 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1141 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1142 wakaba 1.77 !!!cp (67);
1143 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1144 wakaba 1.77 } else {
1145 wakaba 1.78 ## NOTE: This state should never be reached.
1146 wakaba 1.77 !!!cp (68);
1147 wakaba 1.1 }
1148     } else {
1149     die "$0: $self->{current_token}->{type}: Unknown token type";
1150     }
1151 wakaba 1.57 $self->{state} = DATA_STATE;
1152 wakaba 1.1 # reconsume
1153    
1154     !!!emit ($self->{current_token}); # start tag or end tag
1155    
1156     redo A;
1157     } else {
1158 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
1159     $self->{next_char} == 0x0027) { # '
1160 wakaba 1.77 !!!cp (69);
1161 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1162 wakaba 1.77 } else {
1163     !!!cp (70);
1164 wakaba 1.72 }
1165 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
1166 wakaba 1.1 ## Stay in the state
1167     !!!next-input-character;
1168     redo A;
1169     }
1170 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1171 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1172     $self->{next_char} == 0x000A or # LF
1173     $self->{next_char} == 0x000B or # VT
1174     $self->{next_char} == 0x000C or # FF
1175     $self->{next_char} == 0x0020) { # SP
1176 wakaba 1.77 !!!cp (71);
1177 wakaba 1.1 ## Stay in the state
1178     !!!next-input-character;
1179     redo A;
1180 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1181 wakaba 1.77 !!!cp (72);
1182 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1183 wakaba 1.1 !!!next-input-character;
1184     redo A;
1185 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1186 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1187 wakaba 1.77 !!!cp (73);
1188 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1189 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1190 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1191 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1192 wakaba 1.77 !!!cp (74);
1193 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1194 wakaba 1.77 } else {
1195 wakaba 1.78 ## NOTE: This state should never be reached.
1196 wakaba 1.77 !!!cp (75);
1197 wakaba 1.1 }
1198     } else {
1199     die "$0: $self->{current_token}->{type}: Unknown token type";
1200     }
1201 wakaba 1.57 $self->{state} = DATA_STATE;
1202 wakaba 1.1 !!!next-input-character;
1203    
1204     !!!emit ($self->{current_token}); # start tag or end tag
1205    
1206     redo A;
1207 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1208     $self->{next_char} <= 0x005A) { # A..Z
1209 wakaba 1.77 !!!cp (76);
1210 wakaba 1.119 $self->{current_attribute}
1211     = {name => chr ($self->{next_char} + 0x0020),
1212     value => '',
1213     line => $self->{line}, column => $self->{column}};
1214 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1215 wakaba 1.1 !!!next-input-character;
1216     redo A;
1217 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1218 wakaba 1.1 !!!next-input-character;
1219 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
1220 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
1221 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1222     # permitted slash
1223 wakaba 1.77 !!!cp (77);
1224 wakaba 1.1 #
1225     } else {
1226 wakaba 1.77 !!!cp (78);
1227 wakaba 1.3 !!!parse-error (type => 'nestc');
1228 wakaba 1.33 ## TODO: Different error type for <aa / bb> than <aa/>
1229 wakaba 1.1 }
1230 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1231 wakaba 1.1 # next-input-character is already done
1232     redo A;
1233 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1234 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1235 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1236 wakaba 1.77 !!!cp (79);
1237 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1238 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1239 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1240 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1241 wakaba 1.77 !!!cp (80);
1242 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1243 wakaba 1.77 } else {
1244 wakaba 1.78 ## NOTE: This state should never be reached.
1245 wakaba 1.77 !!!cp (81);
1246 wakaba 1.1 }
1247     } else {
1248     die "$0: $self->{current_token}->{type}: Unknown token type";
1249     }
1250 wakaba 1.57 $self->{state} = DATA_STATE;
1251 wakaba 1.1 # reconsume
1252    
1253     !!!emit ($self->{current_token}); # start tag or end tag
1254    
1255     redo A;
1256     } else {
1257 wakaba 1.77 !!!cp (82);
1258 wakaba 1.119 $self->{current_attribute}
1259     = {name => chr ($self->{next_char}),
1260     value => '',
1261     line => $self->{line}, column => $self->{column}};
1262 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1263 wakaba 1.1 !!!next-input-character;
1264     redo A;
1265     }
1266 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1267 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1268     $self->{next_char} == 0x000A or # LF
1269     $self->{next_char} == 0x000B or # VT
1270     $self->{next_char} == 0x000C or # FF
1271     $self->{next_char} == 0x0020) { # SP
1272 wakaba 1.77 !!!cp (83);
1273 wakaba 1.1 ## Stay in the state
1274     !!!next-input-character;
1275     redo A;
1276 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1277 wakaba 1.77 !!!cp (84);
1278 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1279 wakaba 1.1 !!!next-input-character;
1280     redo A;
1281 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1282 wakaba 1.77 !!!cp (85);
1283 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1284 wakaba 1.1 ## reconsume
1285     redo A;
1286 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1287 wakaba 1.77 !!!cp (86);
1288 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1289 wakaba 1.1 !!!next-input-character;
1290     redo A;
1291 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1292 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1293 wakaba 1.77 !!!cp (87);
1294 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1295 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1296 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1297 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1298 wakaba 1.77 !!!cp (88);
1299 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1300 wakaba 1.77 } else {
1301 wakaba 1.78 ## NOTE: This state should never be reached.
1302 wakaba 1.77 !!!cp (89);
1303 wakaba 1.1 }
1304     } else {
1305     die "$0: $self->{current_token}->{type}: Unknown token type";
1306     }
1307 wakaba 1.57 $self->{state} = DATA_STATE;
1308 wakaba 1.1 !!!next-input-character;
1309    
1310     !!!emit ($self->{current_token}); # start tag or end tag
1311    
1312     redo A;
1313 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1314 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1315 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1316 wakaba 1.77 !!!cp (90);
1317 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1318 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1319 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1320 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1321 wakaba 1.77 !!!cp (91);
1322 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1323 wakaba 1.77 } else {
1324 wakaba 1.78 ## NOTE: This state should never be reached.
1325 wakaba 1.77 !!!cp (92);
1326 wakaba 1.1 }
1327     } else {
1328     die "$0: $self->{current_token}->{type}: Unknown token type";
1329     }
1330 wakaba 1.57 $self->{state} = DATA_STATE;
1331 wakaba 1.1 ## reconsume
1332    
1333     !!!emit ($self->{current_token}); # start tag or end tag
1334    
1335     redo A;
1336     } else {
1337 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1338 wakaba 1.77 !!!cp (93);
1339 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1340 wakaba 1.77 } else {
1341     !!!cp (94);
1342 wakaba 1.72 }
1343 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1344 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1345 wakaba 1.1 !!!next-input-character;
1346     redo A;
1347     }
1348 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1349 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1350 wakaba 1.77 !!!cp (95);
1351 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1352 wakaba 1.1 !!!next-input-character;
1353     redo A;
1354 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1355 wakaba 1.77 !!!cp (96);
1356 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1357     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1358 wakaba 1.1 !!!next-input-character;
1359     redo A;
1360 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1361 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1362 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1363 wakaba 1.77 !!!cp (97);
1364 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1365 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1366 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1367 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1368 wakaba 1.77 !!!cp (98);
1369 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1370 wakaba 1.77 } else {
1371 wakaba 1.78 ## NOTE: This state should never be reached.
1372 wakaba 1.77 !!!cp (99);
1373 wakaba 1.1 }
1374     } else {
1375     die "$0: $self->{current_token}->{type}: Unknown token type";
1376     }
1377 wakaba 1.57 $self->{state} = DATA_STATE;
1378 wakaba 1.1 ## reconsume
1379    
1380     !!!emit ($self->{current_token}); # start tag or end tag
1381    
1382     redo A;
1383     } else {
1384 wakaba 1.77 !!!cp (100);
1385 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1386 wakaba 1.1 ## Stay in the state
1387     !!!next-input-character;
1388     redo A;
1389     }
1390 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1391 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1392 wakaba 1.77 !!!cp (101);
1393 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1394 wakaba 1.1 !!!next-input-character;
1395     redo A;
1396 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1397 wakaba 1.77 !!!cp (102);
1398 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1399     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1400 wakaba 1.1 !!!next-input-character;
1401     redo A;
1402 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1403 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1404 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1405 wakaba 1.77 !!!cp (103);
1406 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1407 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1408 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1409 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1410 wakaba 1.77 !!!cp (104);
1411 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1412 wakaba 1.77 } else {
1413 wakaba 1.78 ## NOTE: This state should never be reached.
1414 wakaba 1.77 !!!cp (105);
1415 wakaba 1.1 }
1416     } else {
1417     die "$0: $self->{current_token}->{type}: Unknown token type";
1418     }
1419 wakaba 1.57 $self->{state} = DATA_STATE;
1420 wakaba 1.1 ## reconsume
1421    
1422     !!!emit ($self->{current_token}); # start tag or end tag
1423    
1424     redo A;
1425     } else {
1426 wakaba 1.77 !!!cp (106);
1427 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1428 wakaba 1.1 ## Stay in the state
1429     !!!next-input-character;
1430     redo A;
1431     }
1432 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1433 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1434     $self->{next_char} == 0x000A or # LF
1435     $self->{next_char} == 0x000B or # HT
1436     $self->{next_char} == 0x000C or # FF
1437     $self->{next_char} == 0x0020) { # SP
1438 wakaba 1.77 !!!cp (107);
1439 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1440 wakaba 1.1 !!!next-input-character;
1441     redo A;
1442 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1443 wakaba 1.77 !!!cp (108);
1444 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1445     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1446 wakaba 1.1 !!!next-input-character;
1447     redo A;
1448 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1449 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1450 wakaba 1.77 !!!cp (109);
1451 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1452 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1453 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1454 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1455 wakaba 1.77 !!!cp (110);
1456 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1457 wakaba 1.77 } else {
1458 wakaba 1.78 ## NOTE: This state should never be reached.
1459 wakaba 1.77 !!!cp (111);
1460 wakaba 1.1 }
1461     } else {
1462     die "$0: $self->{current_token}->{type}: Unknown token type";
1463     }
1464 wakaba 1.57 $self->{state} = DATA_STATE;
1465 wakaba 1.1 !!!next-input-character;
1466    
1467     !!!emit ($self->{current_token}); # start tag or end tag
1468    
1469     redo A;
1470 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1471 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1472 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1473 wakaba 1.77 !!!cp (112);
1474 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1475 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1476 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1477 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1478 wakaba 1.77 !!!cp (113);
1479 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1480 wakaba 1.77 } else {
1481 wakaba 1.78 ## NOTE: This state should never be reached.
1482 wakaba 1.77 !!!cp (114);
1483 wakaba 1.1 }
1484     } else {
1485     die "$0: $self->{current_token}->{type}: Unknown token type";
1486     }
1487 wakaba 1.57 $self->{state} = DATA_STATE;
1488 wakaba 1.1 ## reconsume
1489    
1490     !!!emit ($self->{current_token}); # start tag or end tag
1491    
1492     redo A;
1493     } else {
1494 wakaba 1.72 if ({
1495     0x0022 => 1, # "
1496     0x0027 => 1, # '
1497     0x003D => 1, # =
1498 wakaba 1.76 }->{$self->{next_char}}) {
1499 wakaba 1.77 !!!cp (115);
1500 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1501 wakaba 1.77 } else {
1502     !!!cp (116);
1503 wakaba 1.72 }
1504 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1505 wakaba 1.1 ## Stay in the state
1506     !!!next-input-character;
1507     redo A;
1508     }
1509 wakaba 1.57 } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1510 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity
1511     (1,
1512     $self->{last_attribute_value_state}
1513     == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # "
1514     $self->{last_attribute_value_state}
1515     == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # '
1516     -1);
1517 wakaba 1.1
1518     unless (defined $token) {
1519 wakaba 1.77 !!!cp (117);
1520 wakaba 1.1 $self->{current_attribute}->{value} .= '&';
1521     } else {
1522 wakaba 1.77 !!!cp (118);
1523 wakaba 1.1 $self->{current_attribute}->{value} .= $token->{data};
1524 wakaba 1.66 $self->{current_attribute}->{has_reference} = $token->{has_reference};
1525 wakaba 1.1 ## ISSUE: spec says "append the returned character token to the current attribute's value"
1526     }
1527    
1528     $self->{state} = $self->{last_attribute_value_state};
1529     # next-input-character is already done
1530     redo A;
1531 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1532 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1533     $self->{next_char} == 0x000A or # LF
1534     $self->{next_char} == 0x000B or # VT
1535     $self->{next_char} == 0x000C or # FF
1536     $self->{next_char} == 0x0020) { # SP
1537 wakaba 1.77 !!!cp (118);
1538 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1539     !!!next-input-character;
1540     redo A;
1541 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1542 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1543 wakaba 1.77 !!!cp (119);
1544 wakaba 1.72 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1545     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1546     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1547     if ($self->{current_token}->{attributes}) {
1548 wakaba 1.77 !!!cp (120);
1549 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1550 wakaba 1.77 } else {
1551 wakaba 1.78 ## NOTE: This state should never be reached.
1552 wakaba 1.77 !!!cp (121);
1553 wakaba 1.72 }
1554     } else {
1555     die "$0: $self->{current_token}->{type}: Unknown token type";
1556     }
1557     $self->{state} = DATA_STATE;
1558     !!!next-input-character;
1559    
1560     !!!emit ($self->{current_token}); # start tag or end tag
1561    
1562     redo A;
1563 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1564 wakaba 1.72 !!!next-input-character;
1565 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
1566 wakaba 1.72 $self->{current_token}->{type} == START_TAG_TOKEN and
1567     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1568     # permitted slash
1569 wakaba 1.77 !!!cp (122);
1570 wakaba 1.72 #
1571     } else {
1572 wakaba 1.77 !!!cp (123);
1573 wakaba 1.72 !!!parse-error (type => 'nestc');
1574     }
1575     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1576     # next-input-character is already done
1577     redo A;
1578     } else {
1579 wakaba 1.77 !!!cp (124);
1580 wakaba 1.72 !!!parse-error (type => 'no space between attributes');
1581     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1582     ## reconsume
1583     redo A;
1584     }
1585 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1586 wakaba 1.1 ## (only happen if PCDATA state)
1587    
1588 wakaba 1.112 ## NOTE: Set by the previous state
1589     #my $token = {type => COMMENT_TOKEN, data => ''};
1590 wakaba 1.1
1591     BC: {
1592 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1593 wakaba 1.77 !!!cp (124);
1594 wakaba 1.57 $self->{state} = DATA_STATE;
1595 wakaba 1.1 !!!next-input-character;
1596    
1597 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1598 wakaba 1.1
1599     redo A;
1600 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1601 wakaba 1.77 !!!cp (125);
1602 wakaba 1.57 $self->{state} = DATA_STATE;
1603 wakaba 1.1 ## reconsume
1604    
1605 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1606 wakaba 1.1
1607     redo A;
1608     } else {
1609 wakaba 1.77 !!!cp (126);
1610 wakaba 1.112 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1611 wakaba 1.1 !!!next-input-character;
1612     redo BC;
1613     }
1614     } # BC
1615 wakaba 1.77
1616     die "$0: _get_next_token: unexpected case [BC]";
1617 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1618 wakaba 1.1 ## (only happen if PCDATA state)
1619    
1620 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1);
1621 wakaba 1.112
1622 wakaba 1.1 my @next_char;
1623 wakaba 1.76 push @next_char, $self->{next_char};
1624 wakaba 1.1
1625 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1626 wakaba 1.1 !!!next-input-character;
1627 wakaba 1.76 push @next_char, $self->{next_char};
1628     if ($self->{next_char} == 0x002D) { # -
1629 wakaba 1.77 !!!cp (127);
1630 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1631 wakaba 1.120 line => $l, column => $c,
1632 wakaba 1.118 };
1633 wakaba 1.57 $self->{state} = COMMENT_START_STATE;
1634 wakaba 1.1 !!!next-input-character;
1635     redo A;
1636 wakaba 1.77 } else {
1637     !!!cp (128);
1638 wakaba 1.1 }
1639 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
1640     $self->{next_char} == 0x0064) { # d
1641 wakaba 1.1 !!!next-input-character;
1642 wakaba 1.76 push @next_char, $self->{next_char};
1643     if ($self->{next_char} == 0x004F or # O
1644     $self->{next_char} == 0x006F) { # o
1645 wakaba 1.1 !!!next-input-character;
1646 wakaba 1.76 push @next_char, $self->{next_char};
1647     if ($self->{next_char} == 0x0043 or # C
1648     $self->{next_char} == 0x0063) { # c
1649 wakaba 1.1 !!!next-input-character;
1650 wakaba 1.76 push @next_char, $self->{next_char};
1651     if ($self->{next_char} == 0x0054 or # T
1652     $self->{next_char} == 0x0074) { # t
1653 wakaba 1.1 !!!next-input-character;
1654 wakaba 1.76 push @next_char, $self->{next_char};
1655     if ($self->{next_char} == 0x0059 or # Y
1656     $self->{next_char} == 0x0079) { # y
1657 wakaba 1.1 !!!next-input-character;
1658 wakaba 1.76 push @next_char, $self->{next_char};
1659     if ($self->{next_char} == 0x0050 or # P
1660     $self->{next_char} == 0x0070) { # p
1661 wakaba 1.1 !!!next-input-character;
1662 wakaba 1.76 push @next_char, $self->{next_char};
1663     if ($self->{next_char} == 0x0045 or # E
1664     $self->{next_char} == 0x0065) { # e
1665 wakaba 1.77 !!!cp (129);
1666     ## TODO: What a stupid code this is!
1667 wakaba 1.57 $self->{state} = DOCTYPE_STATE;
1668 wakaba 1.112 $self->{current_token} = {type => DOCTYPE_TOKEN,
1669     quirks => 1,
1670 wakaba 1.120 line => $l, column => $c,
1671 wakaba 1.118 };
1672 wakaba 1.1 !!!next-input-character;
1673     redo A;
1674 wakaba 1.77 } else {
1675     !!!cp (130);
1676 wakaba 1.1 }
1677 wakaba 1.77 } else {
1678     !!!cp (131);
1679 wakaba 1.1 }
1680 wakaba 1.77 } else {
1681     !!!cp (132);
1682 wakaba 1.1 }
1683 wakaba 1.77 } else {
1684     !!!cp (133);
1685 wakaba 1.1 }
1686 wakaba 1.77 } else {
1687     !!!cp (134);
1688 wakaba 1.1 }
1689 wakaba 1.77 } else {
1690     !!!cp (135);
1691 wakaba 1.1 }
1692 wakaba 1.77 } else {
1693     !!!cp (136);
1694 wakaba 1.1 }
1695    
1696 wakaba 1.30 !!!parse-error (type => 'bogus comment');
1697 wakaba 1.76 $self->{next_char} = shift @next_char;
1698 wakaba 1.1 !!!back-next-input-character (@next_char);
1699 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1700 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1701 wakaba 1.120 line => $l, column => $c,
1702 wakaba 1.118 };
1703 wakaba 1.1 redo A;
1704    
1705     ## ISSUE: typos in spec: chacacters, is is a parse error
1706     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1707 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
1708 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1709 wakaba 1.77 !!!cp (137);
1710 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
1711 wakaba 1.23 !!!next-input-character;
1712     redo A;
1713 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1714 wakaba 1.77 !!!cp (138);
1715 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1716 wakaba 1.57 $self->{state} = DATA_STATE;
1717 wakaba 1.23 !!!next-input-character;
1718    
1719     !!!emit ($self->{current_token}); # comment
1720    
1721     redo A;
1722 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1723 wakaba 1.77 !!!cp (139);
1724 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1725 wakaba 1.57 $self->{state} = DATA_STATE;
1726 wakaba 1.23 ## reconsume
1727    
1728     !!!emit ($self->{current_token}); # comment
1729    
1730     redo A;
1731     } else {
1732 wakaba 1.77 !!!cp (140);
1733 wakaba 1.23 $self->{current_token}->{data} # comment
1734 wakaba 1.76 .= chr ($self->{next_char});
1735 wakaba 1.57 $self->{state} = COMMENT_STATE;
1736 wakaba 1.23 !!!next-input-character;
1737     redo A;
1738     }
1739 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
1740 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1741 wakaba 1.77 !!!cp (141);
1742 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1743 wakaba 1.23 !!!next-input-character;
1744     redo A;
1745 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1746 wakaba 1.77 !!!cp (142);
1747 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1748 wakaba 1.57 $self->{state} = DATA_STATE;
1749 wakaba 1.23 !!!next-input-character;
1750    
1751     !!!emit ($self->{current_token}); # comment
1752    
1753     redo A;
1754 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1755 wakaba 1.77 !!!cp (143);
1756 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1757 wakaba 1.57 $self->{state} = DATA_STATE;
1758 wakaba 1.23 ## reconsume
1759    
1760     !!!emit ($self->{current_token}); # comment
1761    
1762     redo A;
1763     } else {
1764 wakaba 1.77 !!!cp (144);
1765 wakaba 1.23 $self->{current_token}->{data} # comment
1766 wakaba 1.76 .= '-' . chr ($self->{next_char});
1767 wakaba 1.57 $self->{state} = COMMENT_STATE;
1768 wakaba 1.23 !!!next-input-character;
1769     redo A;
1770     }
1771 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
1772 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1773 wakaba 1.77 !!!cp (145);
1774 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
1775 wakaba 1.1 !!!next-input-character;
1776     redo A;
1777 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1778 wakaba 1.77 !!!cp (146);
1779 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1780 wakaba 1.57 $self->{state} = DATA_STATE;
1781 wakaba 1.1 ## reconsume
1782    
1783     !!!emit ($self->{current_token}); # comment
1784    
1785     redo A;
1786     } else {
1787 wakaba 1.77 !!!cp (147);
1788 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1789 wakaba 1.1 ## Stay in the state
1790     !!!next-input-character;
1791     redo A;
1792     }
1793 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
1794 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1795 wakaba 1.77 !!!cp (148);
1796 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1797 wakaba 1.1 !!!next-input-character;
1798     redo A;
1799 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1800 wakaba 1.77 !!!cp (149);
1801 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1802 wakaba 1.57 $self->{state} = DATA_STATE;
1803 wakaba 1.1 ## reconsume
1804    
1805     !!!emit ($self->{current_token}); # comment
1806    
1807     redo A;
1808     } else {
1809 wakaba 1.77 !!!cp (150);
1810 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
1811 wakaba 1.57 $self->{state} = COMMENT_STATE;
1812 wakaba 1.1 !!!next-input-character;
1813     redo A;
1814     }
1815 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
1816 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1817 wakaba 1.77 !!!cp (151);
1818 wakaba 1.57 $self->{state} = DATA_STATE;
1819 wakaba 1.1 !!!next-input-character;
1820    
1821     !!!emit ($self->{current_token}); # comment
1822    
1823     redo A;
1824 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
1825 wakaba 1.77 !!!cp (152);
1826 wakaba 1.114 !!!parse-error (type => 'dash in comment',
1827     line => $self->{line_prev},
1828     column => $self->{column_prev});
1829 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1830     ## Stay in the state
1831     !!!next-input-character;
1832     redo A;
1833 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1834 wakaba 1.77 !!!cp (153);
1835 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1836 wakaba 1.57 $self->{state} = DATA_STATE;
1837 wakaba 1.1 ## reconsume
1838    
1839     !!!emit ($self->{current_token}); # comment
1840    
1841     redo A;
1842     } else {
1843 wakaba 1.77 !!!cp (154);
1844 wakaba 1.114 !!!parse-error (type => 'dash in comment',
1845     line => $self->{line_prev},
1846     column => $self->{column_prev});
1847 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
1848 wakaba 1.57 $self->{state} = COMMENT_STATE;
1849 wakaba 1.1 !!!next-input-character;
1850     redo A;
1851     }
1852 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
1853 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1854     $self->{next_char} == 0x000A or # LF
1855     $self->{next_char} == 0x000B or # VT
1856     $self->{next_char} == 0x000C or # FF
1857     $self->{next_char} == 0x0020) { # SP
1858 wakaba 1.77 !!!cp (155);
1859 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1860 wakaba 1.1 !!!next-input-character;
1861     redo A;
1862     } else {
1863 wakaba 1.77 !!!cp (156);
1864 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1865 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1866 wakaba 1.1 ## reconsume
1867     redo A;
1868     }
1869 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
1870 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1871     $self->{next_char} == 0x000A or # LF
1872     $self->{next_char} == 0x000B or # VT
1873     $self->{next_char} == 0x000C or # FF
1874     $self->{next_char} == 0x0020) { # SP
1875 wakaba 1.77 !!!cp (157);
1876 wakaba 1.1 ## Stay in the state
1877     !!!next-input-character;
1878     redo A;
1879 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1880 wakaba 1.77 !!!cp (158);
1881 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1882 wakaba 1.57 $self->{state} = DATA_STATE;
1883 wakaba 1.1 !!!next-input-character;
1884    
1885 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
1886 wakaba 1.1
1887     redo A;
1888 wakaba 1.77 } elsif ($self->{next_char} == -1) {
1889     !!!cp (159);
1890 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1891 wakaba 1.57 $self->{state} = DATA_STATE;
1892 wakaba 1.1 ## reconsume
1893    
1894 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
1895 wakaba 1.1
1896     redo A;
1897     } else {
1898 wakaba 1.77 !!!cp (160);
1899 wakaba 1.112 $self->{current_token}->{name} = chr $self->{next_char};
1900     delete $self->{current_token}->{quirks};
1901 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1902 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
1903 wakaba 1.1 !!!next-input-character;
1904     redo A;
1905     }
1906 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
1907 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
1908 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1909     $self->{next_char} == 0x000A or # LF
1910     $self->{next_char} == 0x000B or # VT
1911     $self->{next_char} == 0x000C or # FF
1912     $self->{next_char} == 0x0020) { # SP
1913 wakaba 1.77 !!!cp (161);
1914 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
1915 wakaba 1.1 !!!next-input-character;
1916     redo A;
1917 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1918 wakaba 1.77 !!!cp (162);
1919 wakaba 1.57 $self->{state} = DATA_STATE;
1920 wakaba 1.1 !!!next-input-character;
1921    
1922     !!!emit ($self->{current_token}); # DOCTYPE
1923    
1924     redo A;
1925 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1926 wakaba 1.77 !!!cp (163);
1927 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1928 wakaba 1.57 $self->{state} = DATA_STATE;
1929 wakaba 1.1 ## reconsume
1930    
1931 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1932 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1933 wakaba 1.1
1934     redo A;
1935     } else {
1936 wakaba 1.77 !!!cp (164);
1937 wakaba 1.1 $self->{current_token}->{name}
1938 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
1939 wakaba 1.1 ## Stay in the state
1940     !!!next-input-character;
1941     redo A;
1942     }
1943 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
1944 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1945     $self->{next_char} == 0x000A or # LF
1946     $self->{next_char} == 0x000B or # VT
1947     $self->{next_char} == 0x000C or # FF
1948     $self->{next_char} == 0x0020) { # SP
1949 wakaba 1.77 !!!cp (165);
1950 wakaba 1.1 ## Stay in the state
1951     !!!next-input-character;
1952     redo A;
1953 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1954 wakaba 1.77 !!!cp (166);
1955 wakaba 1.57 $self->{state} = DATA_STATE;
1956 wakaba 1.1 !!!next-input-character;
1957    
1958     !!!emit ($self->{current_token}); # DOCTYPE
1959    
1960     redo A;
1961 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1962 wakaba 1.77 !!!cp (167);
1963 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1964 wakaba 1.57 $self->{state} = DATA_STATE;
1965 wakaba 1.1 ## reconsume
1966    
1967 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1968 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1969    
1970     redo A;
1971 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
1972     $self->{next_char} == 0x0070) { # p
1973 wakaba 1.18 !!!next-input-character;
1974 wakaba 1.76 if ($self->{next_char} == 0x0055 or # U
1975     $self->{next_char} == 0x0075) { # u
1976 wakaba 1.18 !!!next-input-character;
1977 wakaba 1.76 if ($self->{next_char} == 0x0042 or # B
1978     $self->{next_char} == 0x0062) { # b
1979 wakaba 1.18 !!!next-input-character;
1980 wakaba 1.76 if ($self->{next_char} == 0x004C or # L
1981     $self->{next_char} == 0x006C) { # l
1982 wakaba 1.18 !!!next-input-character;
1983 wakaba 1.76 if ($self->{next_char} == 0x0049 or # I
1984     $self->{next_char} == 0x0069) { # i
1985 wakaba 1.18 !!!next-input-character;
1986 wakaba 1.76 if ($self->{next_char} == 0x0043 or # C
1987     $self->{next_char} == 0x0063) { # c
1988 wakaba 1.77 !!!cp (168);
1989 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1990 wakaba 1.18 !!!next-input-character;
1991     redo A;
1992 wakaba 1.77 } else {
1993     !!!cp (169);
1994 wakaba 1.18 }
1995 wakaba 1.77 } else {
1996     !!!cp (170);
1997 wakaba 1.18 }
1998 wakaba 1.77 } else {
1999     !!!cp (171);
2000 wakaba 1.18 }
2001 wakaba 1.77 } else {
2002     !!!cp (172);
2003 wakaba 1.18 }
2004 wakaba 1.77 } else {
2005     !!!cp (173);
2006 wakaba 1.18 }
2007    
2008     #
2009 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
2010     $self->{next_char} == 0x0073) { # s
2011 wakaba 1.18 !!!next-input-character;
2012 wakaba 1.76 if ($self->{next_char} == 0x0059 or # Y
2013     $self->{next_char} == 0x0079) { # y
2014 wakaba 1.18 !!!next-input-character;
2015 wakaba 1.76 if ($self->{next_char} == 0x0053 or # S
2016     $self->{next_char} == 0x0073) { # s
2017 wakaba 1.18 !!!next-input-character;
2018 wakaba 1.76 if ($self->{next_char} == 0x0054 or # T
2019     $self->{next_char} == 0x0074) { # t
2020 wakaba 1.18 !!!next-input-character;
2021 wakaba 1.76 if ($self->{next_char} == 0x0045 or # E
2022     $self->{next_char} == 0x0065) { # e
2023 wakaba 1.18 !!!next-input-character;
2024 wakaba 1.76 if ($self->{next_char} == 0x004D or # M
2025     $self->{next_char} == 0x006D) { # m
2026 wakaba 1.77 !!!cp (174);
2027 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2028 wakaba 1.18 !!!next-input-character;
2029     redo A;
2030 wakaba 1.77 } else {
2031     !!!cp (175);
2032 wakaba 1.18 }
2033 wakaba 1.77 } else {
2034     !!!cp (176);
2035 wakaba 1.18 }
2036 wakaba 1.77 } else {
2037     !!!cp (177);
2038 wakaba 1.18 }
2039 wakaba 1.77 } else {
2040     !!!cp (178);
2041 wakaba 1.18 }
2042 wakaba 1.77 } else {
2043     !!!cp (179);
2044 wakaba 1.18 }
2045    
2046     #
2047     } else {
2048 wakaba 1.77 !!!cp (180);
2049 wakaba 1.18 !!!next-input-character;
2050     #
2051     }
2052    
2053     !!!parse-error (type => 'string after DOCTYPE name');
2054 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2055 wakaba 1.73
2056 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2057 wakaba 1.18 # next-input-character is already done
2058     redo A;
2059 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2060 wakaba 1.18 if ({
2061     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2062     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2063 wakaba 1.76 }->{$self->{next_char}}) {
2064 wakaba 1.77 !!!cp (181);
2065 wakaba 1.18 ## Stay in the state
2066     !!!next-input-character;
2067     redo A;
2068 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
2069 wakaba 1.77 !!!cp (182);
2070 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2071 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2072 wakaba 1.18 !!!next-input-character;
2073     redo A;
2074 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
2075 wakaba 1.77 !!!cp (183);
2076 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2077 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2078 wakaba 1.18 !!!next-input-character;
2079     redo A;
2080 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
2081 wakaba 1.77 !!!cp (184);
2082 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
2083    
2084 wakaba 1.57 $self->{state} = DATA_STATE;
2085 wakaba 1.18 !!!next-input-character;
2086    
2087 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2088 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2089    
2090     redo A;
2091 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2092 wakaba 1.77 !!!cp (185);
2093 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2094    
2095 wakaba 1.57 $self->{state} = DATA_STATE;
2096 wakaba 1.18 ## reconsume
2097    
2098 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2099 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2100    
2101     redo A;
2102     } else {
2103 wakaba 1.77 !!!cp (186);
2104 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
2105 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2106 wakaba 1.73
2107 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2108 wakaba 1.18 !!!next-input-character;
2109     redo A;
2110     }
2111 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2112 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2113 wakaba 1.77 !!!cp (187);
2114 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2115 wakaba 1.18 !!!next-input-character;
2116     redo A;
2117 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2118 wakaba 1.77 !!!cp (188);
2119 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2120    
2121     $self->{state} = DATA_STATE;
2122     !!!next-input-character;
2123    
2124 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2125 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2126    
2127     redo A;
2128 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2129 wakaba 1.77 !!!cp (189);
2130 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2131    
2132 wakaba 1.57 $self->{state} = DATA_STATE;
2133 wakaba 1.18 ## reconsume
2134    
2135 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2136 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2137    
2138     redo A;
2139     } else {
2140 wakaba 1.77 !!!cp (190);
2141 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2142 wakaba 1.76 .= chr $self->{next_char};
2143 wakaba 1.18 ## Stay in the state
2144     !!!next-input-character;
2145     redo A;
2146     }
2147 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2148 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2149 wakaba 1.77 !!!cp (191);
2150 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2151 wakaba 1.18 !!!next-input-character;
2152     redo A;
2153 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2154 wakaba 1.77 !!!cp (192);
2155 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2156    
2157     $self->{state} = DATA_STATE;
2158     !!!next-input-character;
2159    
2160 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2161 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2162    
2163     redo A;
2164 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2165 wakaba 1.77 !!!cp (193);
2166 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2167    
2168 wakaba 1.57 $self->{state} = DATA_STATE;
2169 wakaba 1.18 ## reconsume
2170    
2171 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2172 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2173    
2174     redo A;
2175     } else {
2176 wakaba 1.77 !!!cp (194);
2177 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2178 wakaba 1.76 .= chr $self->{next_char};
2179 wakaba 1.18 ## Stay in the state
2180     !!!next-input-character;
2181     redo A;
2182     }
2183 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2184 wakaba 1.18 if ({
2185     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2186     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2187 wakaba 1.76 }->{$self->{next_char}}) {
2188 wakaba 1.77 !!!cp (195);
2189 wakaba 1.18 ## Stay in the state
2190     !!!next-input-character;
2191     redo A;
2192 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2193 wakaba 1.77 !!!cp (196);
2194 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2195 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2196 wakaba 1.18 !!!next-input-character;
2197     redo A;
2198 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2199 wakaba 1.77 !!!cp (197);
2200 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2201 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2202 wakaba 1.18 !!!next-input-character;
2203     redo A;
2204 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2205 wakaba 1.77 !!!cp (198);
2206 wakaba 1.57 $self->{state} = DATA_STATE;
2207 wakaba 1.18 !!!next-input-character;
2208    
2209     !!!emit ($self->{current_token}); # DOCTYPE
2210    
2211     redo A;
2212 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2213 wakaba 1.77 !!!cp (199);
2214 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2215    
2216 wakaba 1.57 $self->{state} = DATA_STATE;
2217 wakaba 1.26 ## reconsume
2218 wakaba 1.18
2219 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2220 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2221    
2222     redo A;
2223     } else {
2224 wakaba 1.77 !!!cp (200);
2225 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2226 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2227 wakaba 1.73
2228 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2229 wakaba 1.18 !!!next-input-character;
2230     redo A;
2231     }
2232 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2233 wakaba 1.18 if ({
2234     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2235     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2236 wakaba 1.76 }->{$self->{next_char}}) {
2237 wakaba 1.77 !!!cp (201);
2238 wakaba 1.18 ## Stay in the state
2239     !!!next-input-character;
2240     redo A;
2241 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2242 wakaba 1.77 !!!cp (202);
2243 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2244 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2245 wakaba 1.18 !!!next-input-character;
2246     redo A;
2247 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2248 wakaba 1.77 !!!cp (203);
2249 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2250 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2251 wakaba 1.18 !!!next-input-character;
2252     redo A;
2253 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2254 wakaba 1.77 !!!cp (204);
2255 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2256 wakaba 1.57 $self->{state} = DATA_STATE;
2257 wakaba 1.18 !!!next-input-character;
2258    
2259 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2260 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2261    
2262     redo A;
2263 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2264 wakaba 1.77 !!!cp (205);
2265 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2266    
2267 wakaba 1.57 $self->{state} = DATA_STATE;
2268 wakaba 1.26 ## reconsume
2269 wakaba 1.18
2270 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2271 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2272    
2273     redo A;
2274     } else {
2275 wakaba 1.77 !!!cp (206);
2276 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2277 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2278 wakaba 1.73
2279 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2280 wakaba 1.18 !!!next-input-character;
2281     redo A;
2282     }
2283 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2284 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2285 wakaba 1.77 !!!cp (207);
2286 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2287 wakaba 1.18 !!!next-input-character;
2288     redo A;
2289 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2290 wakaba 1.77 !!!cp (208);
2291 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2292    
2293     $self->{state} = DATA_STATE;
2294     !!!next-input-character;
2295    
2296 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2297 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2298    
2299     redo A;
2300 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2301 wakaba 1.77 !!!cp (209);
2302 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2303    
2304 wakaba 1.57 $self->{state} = DATA_STATE;
2305 wakaba 1.18 ## reconsume
2306    
2307 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2308 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2309    
2310     redo A;
2311     } else {
2312 wakaba 1.77 !!!cp (210);
2313 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2314 wakaba 1.76 .= chr $self->{next_char};
2315 wakaba 1.18 ## Stay in the state
2316     !!!next-input-character;
2317     redo A;
2318     }
2319 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2320 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2321 wakaba 1.77 !!!cp (211);
2322 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2323 wakaba 1.18 !!!next-input-character;
2324     redo A;
2325 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2326 wakaba 1.77 !!!cp (212);
2327 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2328    
2329     $self->{state} = DATA_STATE;
2330     !!!next-input-character;
2331    
2332 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2333 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2334    
2335     redo A;
2336 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2337 wakaba 1.77 !!!cp (213);
2338 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2339    
2340 wakaba 1.57 $self->{state} = DATA_STATE;
2341 wakaba 1.18 ## reconsume
2342    
2343 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2344 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2345    
2346     redo A;
2347     } else {
2348 wakaba 1.77 !!!cp (214);
2349 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2350 wakaba 1.76 .= chr $self->{next_char};
2351 wakaba 1.18 ## Stay in the state
2352     !!!next-input-character;
2353     redo A;
2354     }
2355 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2356 wakaba 1.18 if ({
2357     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2358     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2359 wakaba 1.76 }->{$self->{next_char}}) {
2360 wakaba 1.77 !!!cp (215);
2361 wakaba 1.18 ## Stay in the state
2362     !!!next-input-character;
2363     redo A;
2364 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2365 wakaba 1.77 !!!cp (216);
2366 wakaba 1.57 $self->{state} = DATA_STATE;
2367 wakaba 1.18 !!!next-input-character;
2368    
2369     !!!emit ($self->{current_token}); # DOCTYPE
2370    
2371     redo A;
2372 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2373 wakaba 1.77 !!!cp (217);
2374 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2375    
2376 wakaba 1.57 $self->{state} = DATA_STATE;
2377 wakaba 1.26 ## reconsume
2378 wakaba 1.18
2379 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2380 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2381    
2382     redo A;
2383     } else {
2384 wakaba 1.77 !!!cp (218);
2385 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2386 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2387 wakaba 1.73
2388 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2389 wakaba 1.1 !!!next-input-character;
2390     redo A;
2391     }
2392 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2393 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2394 wakaba 1.77 !!!cp (219);
2395 wakaba 1.57 $self->{state} = DATA_STATE;
2396 wakaba 1.1 !!!next-input-character;
2397    
2398     !!!emit ($self->{current_token}); # DOCTYPE
2399    
2400     redo A;
2401 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2402 wakaba 1.77 !!!cp (220);
2403 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2404 wakaba 1.57 $self->{state} = DATA_STATE;
2405 wakaba 1.1 ## reconsume
2406    
2407     !!!emit ($self->{current_token}); # DOCTYPE
2408    
2409     redo A;
2410     } else {
2411 wakaba 1.77 !!!cp (221);
2412 wakaba 1.1 ## Stay in the state
2413     !!!next-input-character;
2414     redo A;
2415     }
2416     } else {
2417     die "$0: $self->{state}: Unknown state";
2418     }
2419     } # A
2420    
2421     die "$0: _get_next_token: unexpected case";
2422     } # _get_next_token
2423    
2424 wakaba 1.72 sub _tokenize_attempt_to_consume_an_entity ($$$) {
2425     my ($self, $in_attr, $additional) = @_;
2426 wakaba 1.20
2427 wakaba 1.112 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
2428    
2429 wakaba 1.20 if ({
2430     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2431     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2432 wakaba 1.72 $additional => 1,
2433 wakaba 1.76 }->{$self->{next_char}}) {
2434 wakaba 1.78 !!!cp (1001);
2435 wakaba 1.20 ## Don't consume
2436     ## No error
2437     return undef;
2438 wakaba 1.76 } elsif ($self->{next_char} == 0x0023) { # #
2439 wakaba 1.1 !!!next-input-character;
2440 wakaba 1.76 if ($self->{next_char} == 0x0078 or # x
2441     $self->{next_char} == 0x0058) { # X
2442 wakaba 1.26 my $code;
2443 wakaba 1.1 X: {
2444 wakaba 1.76 my $x_char = $self->{next_char};
2445 wakaba 1.1 !!!next-input-character;
2446 wakaba 1.76 if (0x0030 <= $self->{next_char} and
2447     $self->{next_char} <= 0x0039) { # 0..9
2448 wakaba 1.78 !!!cp (1002);
2449 wakaba 1.26 $code ||= 0;
2450     $code *= 0x10;
2451 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2452 wakaba 1.1 redo X;
2453 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
2454     $self->{next_char} <= 0x0066) { # a..f
2455 wakaba 1.78 !!!cp (1003);
2456 wakaba 1.26 $code ||= 0;
2457     $code *= 0x10;
2458 wakaba 1.76 $code += $self->{next_char} - 0x0060 + 9;
2459 wakaba 1.1 redo X;
2460 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
2461     $self->{next_char} <= 0x0046) { # A..F
2462 wakaba 1.78 !!!cp (1004);
2463 wakaba 1.26 $code ||= 0;
2464     $code *= 0x10;
2465 wakaba 1.76 $code += $self->{next_char} - 0x0040 + 9;
2466 wakaba 1.1 redo X;
2467 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2468 wakaba 1.78 !!!cp (1005);
2469 wakaba 1.112 !!!parse-error (type => 'bare hcro', line => $l, column => $c);
2470 wakaba 1.76 !!!back-next-input-character ($x_char, $self->{next_char});
2471     $self->{next_char} = 0x0023; # #
2472 wakaba 1.1 return undef;
2473 wakaba 1.76 } elsif ($self->{next_char} == 0x003B) { # ;
2474 wakaba 1.78 !!!cp (1006);
2475 wakaba 1.1 !!!next-input-character;
2476     } else {
2477 wakaba 1.78 !!!cp (1007);
2478 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2479 wakaba 1.1 }
2480    
2481 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2482 wakaba 1.78 !!!cp (1008);
2483 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2484 wakaba 1.26 $code = 0xFFFD;
2485     } elsif ($code > 0x10FFFF) {
2486 wakaba 1.78 !!!cp (1009);
2487 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2488 wakaba 1.26 $code = 0xFFFD;
2489     } elsif ($code == 0x000D) {
2490 wakaba 1.78 !!!cp (1010);
2491 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2492 wakaba 1.26 $code = 0x000A;
2493     } elsif (0x80 <= $code and $code <= 0x9F) {
2494 wakaba 1.78 !!!cp (1011);
2495 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2496 wakaba 1.26 $code = $c1_entity_char->{$code};
2497 wakaba 1.1 }
2498    
2499 wakaba 1.66 return {type => CHARACTER_TOKEN, data => chr $code,
2500 wakaba 1.118 has_reference => 1,
2501 wakaba 1.120 line => $l, column => $c,
2502 wakaba 1.118 };
2503 wakaba 1.1 } # X
2504 wakaba 1.76 } elsif (0x0030 <= $self->{next_char} and
2505     $self->{next_char} <= 0x0039) { # 0..9
2506     my $code = $self->{next_char} - 0x0030;
2507 wakaba 1.1 !!!next-input-character;
2508    
2509 wakaba 1.76 while (0x0030 <= $self->{next_char} and
2510     $self->{next_char} <= 0x0039) { # 0..9
2511 wakaba 1.78 !!!cp (1012);
2512 wakaba 1.1 $code *= 10;
2513 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2514 wakaba 1.1
2515     !!!next-input-character;
2516     }
2517    
2518 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2519 wakaba 1.78 !!!cp (1013);
2520 wakaba 1.1 !!!next-input-character;
2521     } else {
2522 wakaba 1.78 !!!cp (1014);
2523 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2524 wakaba 1.1 }
2525    
2526 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2527 wakaba 1.78 !!!cp (1015);
2528 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2529 wakaba 1.26 $code = 0xFFFD;
2530     } elsif ($code > 0x10FFFF) {
2531 wakaba 1.78 !!!cp (1016);
2532 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2533 wakaba 1.26 $code = 0xFFFD;
2534     } elsif ($code == 0x000D) {
2535 wakaba 1.78 !!!cp (1017);
2536 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2537 wakaba 1.26 $code = 0x000A;
2538 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2539 wakaba 1.78 !!!cp (1018);
2540 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2541 wakaba 1.4 $code = $c1_entity_char->{$code};
2542 wakaba 1.1 }
2543    
2544 wakaba 1.112 return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
2545 wakaba 1.120 line => $l, column => $c,
2546 wakaba 1.118 };
2547 wakaba 1.1 } else {
2548 wakaba 1.78 !!!cp (1019);
2549 wakaba 1.112 !!!parse-error (type => 'bare nero', line => $l, column => $c);
2550 wakaba 1.76 !!!back-next-input-character ($self->{next_char});
2551     $self->{next_char} = 0x0023; # #
2552 wakaba 1.1 return undef;
2553     }
2554 wakaba 1.76 } elsif ((0x0041 <= $self->{next_char} and
2555     $self->{next_char} <= 0x005A) or
2556     (0x0061 <= $self->{next_char} and
2557     $self->{next_char} <= 0x007A)) {
2558     my $entity_name = chr $self->{next_char};
2559 wakaba 1.1 !!!next-input-character;
2560    
2561     my $value = $entity_name;
2562 wakaba 1.37 my $match = 0;
2563 wakaba 1.16 require Whatpm::_NamedEntityList;
2564     our $EntityChar;
2565 wakaba 1.1
2566     while (length $entity_name < 10 and
2567     ## NOTE: Some number greater than the maximum length of entity name
2568 wakaba 1.76 ((0x0041 <= $self->{next_char} and # a
2569     $self->{next_char} <= 0x005A) or # x
2570     (0x0061 <= $self->{next_char} and # a
2571     $self->{next_char} <= 0x007A) or # z
2572     (0x0030 <= $self->{next_char} and # 0
2573     $self->{next_char} <= 0x0039) or # 9
2574     $self->{next_char} == 0x003B)) { # ;
2575     $entity_name .= chr $self->{next_char};
2576 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2577 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2578 wakaba 1.78 !!!cp (1020);
2579 wakaba 1.26 $value = $EntityChar->{$entity_name};
2580 wakaba 1.16 $match = 1;
2581     !!!next-input-character;
2582     last;
2583 wakaba 1.37 } else {
2584 wakaba 1.78 !!!cp (1021);
2585 wakaba 1.26 $value = $EntityChar->{$entity_name};
2586     $match = -1;
2587 wakaba 1.37 !!!next-input-character;
2588 wakaba 1.16 }
2589 wakaba 1.1 } else {
2590 wakaba 1.78 !!!cp (1022);
2591 wakaba 1.76 $value .= chr $self->{next_char};
2592 wakaba 1.37 $match *= 2;
2593     !!!next-input-character;
2594 wakaba 1.1 }
2595     }
2596    
2597 wakaba 1.16 if ($match > 0) {
2598 wakaba 1.78 !!!cp (1023);
2599 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2600 wakaba 1.120 line => $l, column => $c,
2601 wakaba 1.118 };
2602 wakaba 1.16 } elsif ($match < 0) {
2603 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2604 wakaba 1.37 if ($in_attr and $match < -1) {
2605 wakaba 1.78 !!!cp (1024);
2606 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
2607 wakaba 1.120 line => $l, column => $c,
2608 wakaba 1.118 };
2609 wakaba 1.37 } else {
2610 wakaba 1.78 !!!cp (1025);
2611 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2612 wakaba 1.120 line => $l, column => $c,
2613 wakaba 1.118 };
2614 wakaba 1.37 }
2615 wakaba 1.1 } else {
2616 wakaba 1.78 !!!cp (1026);
2617 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
2618 wakaba 1.66 ## NOTE: "No characters are consumed" in the spec.
2619 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$value,
2620 wakaba 1.120 line => $l, column => $c,
2621 wakaba 1.118 };
2622 wakaba 1.1 }
2623     } else {
2624 wakaba 1.78 !!!cp (1027);
2625 wakaba 1.1 ## no characters are consumed
2626 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
2627 wakaba 1.1 return undef;
2628     }
2629     } # _tokenize_attempt_to_consume_an_entity
2630    
2631     sub _initialize_tree_constructor ($) {
2632     my $self = shift;
2633     ## NOTE: $self->{document} MUST be specified before this method is called
2634     $self->{document}->strict_error_checking (0);
2635     ## TODO: Turn mutation events off # MUST
2636     ## TODO: Turn loose Document option (manakai extension) on
2637 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2638 wakaba 1.1 } # _initialize_tree_constructor
2639    
2640     sub _terminate_tree_constructor ($) {
2641     my $self = shift;
2642     $self->{document}->strict_error_checking (1);
2643     ## TODO: Turn mutation events on
2644     } # _terminate_tree_constructor
2645    
2646     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2647    
2648 wakaba 1.3 { # tree construction stage
2649     my $token;
2650    
2651 wakaba 1.1 sub _construct_tree ($) {
2652     my ($self) = @_;
2653    
2654     ## When an interactive UA render the $self->{document} available
2655     ## to the user, or when it begin accepting user input, are
2656     ## not defined.
2657    
2658     ## Append a character: collect it and all subsequent consecutive
2659     ## characters and insert one Text node whose data is concatenation
2660     ## of all those characters. # MUST
2661    
2662     !!!next-token;
2663    
2664 wakaba 1.3 undef $self->{form_element};
2665     undef $self->{head_element};
2666     $self->{open_elements} = [];
2667     undef $self->{inner_html_node};
2668    
2669 wakaba 1.84 ## NOTE: The "initial" insertion mode.
2670 wakaba 1.3 $self->_tree_construction_initial; # MUST
2671 wakaba 1.84
2672     ## NOTE: The "before html" insertion mode.
2673 wakaba 1.3 $self->_tree_construction_root_element;
2674 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
2675    
2676     ## NOTE: The "before head" insertion mode and so on.
2677 wakaba 1.3 $self->_tree_construction_main;
2678     } # _construct_tree
2679    
2680     sub _tree_construction_initial ($) {
2681     my $self = shift;
2682 wakaba 1.84
2683     ## NOTE: "initial" insertion mode
2684    
2685 wakaba 1.18 INITIAL: {
2686 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2687 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2688     ## error, switch to a conformance checking mode for another
2689     ## language.
2690     my $doctype_name = $token->{name};
2691     $doctype_name = '' unless defined $doctype_name;
2692     $doctype_name =~ tr/a-z/A-Z/;
2693     if (not defined $token->{name} or # <!DOCTYPE>
2694     defined $token->{public_identifier} or
2695     defined $token->{system_identifier}) {
2696 wakaba 1.79 !!!cp ('t1');
2697 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
2698 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
2699 wakaba 1.79 !!!cp ('t2');
2700 wakaba 1.18 ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2701 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
2702 wakaba 1.79 } else {
2703     !!!cp ('t3');
2704 wakaba 1.18 }
2705    
2706     my $doctype = $self->{document}->create_document_type_definition
2707     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2708 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
2709     ## are empty strings, so that we don't set any value in missing cases.
2710 wakaba 1.18 $doctype->public_id ($token->{public_identifier})
2711     if defined $token->{public_identifier};
2712     $doctype->system_id ($token->{system_identifier})
2713     if defined $token->{system_identifier};
2714     ## NOTE: Other DocumentType attributes are null or empty lists.
2715     ## ISSUE: internalSubset = null??
2716     $self->{document}->append_child ($doctype);
2717    
2718 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
2719 wakaba 1.79 !!!cp ('t4');
2720 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2721     } elsif (defined $token->{public_identifier}) {
2722     my $pubid = $token->{public_identifier};
2723     $pubid =~ tr/a-z/A-z/;
2724     if ({
2725     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2726     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2727     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2728     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2729     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2730     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2731     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2732     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2733     "-//IETF//DTD HTML 2.0//EN" => 1,
2734     "-//IETF//DTD HTML 2.1E//EN" => 1,
2735     "-//IETF//DTD HTML 3.0//EN" => 1,
2736     "-//IETF//DTD HTML 3.0//EN//" => 1,
2737     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2738     "-//IETF//DTD HTML 3.2//EN" => 1,
2739     "-//IETF//DTD HTML 3//EN" => 1,
2740     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2741     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2742     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2743     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2744     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2745     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2746     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2747     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2748     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2749     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2750     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2751     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2752     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2753     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2754     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2755     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2756     "-//IETF//DTD HTML STRICT//EN" => 1,
2757     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2758     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2759     "-//IETF//DTD HTML//EN" => 1,
2760     "-//IETF//DTD HTML//EN//2.0" => 1,
2761     "-//IETF//DTD HTML//EN//3.0" => 1,
2762     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2763     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2764     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2765     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2766     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2767     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2768     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2769     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2770     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2771     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2772     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2773 wakaba 1.72 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//EN" => 1,
2774     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//EN" => 1,
2775     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//EN" => 1,
2776 wakaba 1.18 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2777     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2778     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2779     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2780     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2781     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2782     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2783     "-//W3C//DTD HTML 3.2//EN" => 1,
2784     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2785     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2786     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2787     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2788     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2789     "-//W3C//DTD W3 HTML//EN" => 1,
2790     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2791     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2792     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2793     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2794     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2795     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2796     "HTML" => 1,
2797     }->{$pubid}) {
2798 wakaba 1.79 !!!cp ('t5');
2799 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2800     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2801     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2802     if (defined $token->{system_identifier}) {
2803 wakaba 1.79 !!!cp ('t6');
2804 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2805     } else {
2806 wakaba 1.79 !!!cp ('t7');
2807 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
2808 wakaba 1.3 }
2809 wakaba 1.80 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 FRAMESET//EN" or
2810     $pubid eq "-//W3C//DTD XHTML 1.0 TRANSITIONAL//EN") {
2811 wakaba 1.79 !!!cp ('t8');
2812 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
2813 wakaba 1.79 } else {
2814     !!!cp ('t9');
2815 wakaba 1.18 }
2816 wakaba 1.79 } else {
2817     !!!cp ('t10');
2818 wakaba 1.18 }
2819     if (defined $token->{system_identifier}) {
2820     my $sysid = $token->{system_identifier};
2821     $sysid =~ tr/A-Z/a-z/;
2822     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2823 wakaba 1.80 ## TODO: Check the spec: PUBLIC "(limited quirks)" "(quirks)"
2824 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2825 wakaba 1.79 !!!cp ('t11');
2826     } else {
2827     !!!cp ('t12');
2828 wakaba 1.18 }
2829 wakaba 1.79 } else {
2830     !!!cp ('t13');
2831 wakaba 1.18 }
2832    
2833 wakaba 1.84 ## Go to the "before html" insertion mode.
2834 wakaba 1.18 !!!next-token;
2835     return;
2836     } elsif ({
2837 wakaba 1.55 START_TAG_TOKEN, 1,
2838     END_TAG_TOKEN, 1,
2839     END_OF_FILE_TOKEN, 1,
2840 wakaba 1.18 }->{$token->{type}}) {
2841 wakaba 1.79 !!!cp ('t14');
2842 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
2843 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2844 wakaba 1.84 ## Go to the "before html" insertion mode.
2845 wakaba 1.18 ## reprocess
2846     return;
2847 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
2848 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2849     ## Ignore the token
2850 wakaba 1.26
2851 wakaba 1.18 unless (length $token->{data}) {
2852 wakaba 1.79 !!!cp ('t15');
2853 wakaba 1.84 ## Stay in the insertion mode.
2854 wakaba 1.18 !!!next-token;
2855     redo INITIAL;
2856 wakaba 1.79 } else {
2857     !!!cp ('t16');
2858 wakaba 1.3 }
2859 wakaba 1.79 } else {
2860     !!!cp ('t17');
2861 wakaba 1.3 }
2862 wakaba 1.18
2863 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
2864 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2865 wakaba 1.84 ## Go to the "before html" insertion mode.
2866 wakaba 1.18 ## reprocess
2867     return;
2868 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
2869 wakaba 1.79 !!!cp ('t18');
2870 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
2871     $self->{document}->append_child ($comment);
2872    
2873 wakaba 1.84 ## Stay in the insertion mode.
2874 wakaba 1.18 !!!next-token;
2875     redo INITIAL;
2876     } else {
2877 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
2878 wakaba 1.18 }
2879     } # INITIAL
2880 wakaba 1.79
2881     die "$0: _tree_construction_initial: This should be never reached";
2882 wakaba 1.3 } # _tree_construction_initial
2883    
2884     sub _tree_construction_root_element ($) {
2885     my $self = shift;
2886 wakaba 1.84
2887     ## NOTE: "before html" insertion mode.
2888 wakaba 1.3
2889     B: {
2890 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2891 wakaba 1.79 !!!cp ('t19');
2892 wakaba 1.113 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
2893 wakaba 1.3 ## Ignore the token
2894 wakaba 1.84 ## Stay in the insertion mode.
2895 wakaba 1.3 !!!next-token;
2896     redo B;
2897 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
2898 wakaba 1.79 !!!cp ('t20');
2899 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
2900     $self->{document}->append_child ($comment);
2901 wakaba 1.84 ## Stay in the insertion mode.
2902 wakaba 1.3 !!!next-token;
2903     redo B;
2904 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
2905 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2906     ## Ignore the token.
2907    
2908 wakaba 1.3 unless (length $token->{data}) {
2909 wakaba 1.79 !!!cp ('t21');
2910 wakaba 1.84 ## Stay in the insertion mode.
2911 wakaba 1.3 !!!next-token;
2912     redo B;
2913 wakaba 1.79 } else {
2914     !!!cp ('t22');
2915 wakaba 1.3 }
2916 wakaba 1.79 } else {
2917     !!!cp ('t23');
2918 wakaba 1.3 }
2919 wakaba 1.61
2920     $self->{application_cache_selection}->(undef);
2921    
2922     #
2923     } elsif ($token->{type} == START_TAG_TOKEN) {
2924 wakaba 1.84 if ($token->{tag_name} eq 'html') {
2925     my $root_element;
2926 wakaba 1.116 !!!create-element ($root_element, $token->{tag_name}, $token->{attributes}, $token);
2927 wakaba 1.84 $self->{document}->append_child ($root_element);
2928 wakaba 1.123 push @{$self->{open_elements}},
2929     [$root_element, $el_category->{html}];
2930 wakaba 1.84
2931     if ($token->{attributes}->{manifest}) {
2932     !!!cp ('t24');
2933     $self->{application_cache_selection}
2934     ->($token->{attributes}->{manifest}->{value});
2935 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
2936     ## According to Hixie (#whatwg 2008-03-19), it should be
2937     ## resolved against the base URI of the document in HTML
2938     ## or xml:base of the element in XHTML.
2939 wakaba 1.84 } else {
2940     !!!cp ('t25');
2941     $self->{application_cache_selection}->(undef);
2942     }
2943    
2944     !!!next-token;
2945     return; ## Go to the "before head" insertion mode.
2946 wakaba 1.61 } else {
2947 wakaba 1.84 !!!cp ('t25.1');
2948     #
2949 wakaba 1.61 }
2950 wakaba 1.3 } elsif ({
2951 wakaba 1.55 END_TAG_TOKEN, 1,
2952     END_OF_FILE_TOKEN, 1,
2953 wakaba 1.3 }->{$token->{type}}) {
2954 wakaba 1.79 !!!cp ('t26');
2955 wakaba 1.3 #
2956     } else {
2957 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
2958 wakaba 1.3 }
2959 wakaba 1.61
2960 wakaba 1.116 my $root_element; !!!create-element ($root_element, 'html',, $token);
2961 wakaba 1.84 $self->{document}->append_child ($root_element);
2962 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
2963 wakaba 1.84
2964     $self->{application_cache_selection}->(undef);
2965    
2966     ## NOTE: Reprocess the token.
2967     return; ## Go to the "before head" insertion mode.
2968    
2969     ## ISSUE: There is an issue in the spec
2970 wakaba 1.3 } # B
2971 wakaba 1.79
2972     die "$0: _tree_construction_root_element: This should never be reached";
2973 wakaba 1.3 } # _tree_construction_root_element
2974    
2975     sub _reset_insertion_mode ($) {
2976     my $self = shift;
2977    
2978     ## Step 1
2979     my $last;
2980    
2981     ## Step 2
2982     my $i = -1;
2983     my $node = $self->{open_elements}->[$i];
2984    
2985     ## Step 3
2986     S3: {
2987 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
2988     $last = 1;
2989     if (defined $self->{inner_html_node}) {
2990 wakaba 1.123 if ($self->{inner_html_node}->[1] & TABLE_CELL_EL) {
2991 wakaba 1.79 !!!cp ('t27');
2992 wakaba 1.29 #
2993     } else {
2994 wakaba 1.79 !!!cp ('t28');
2995 wakaba 1.29 $node = $self->{inner_html_node};
2996     }
2997 wakaba 1.3 }
2998     }
2999    
3000     ## Step 4..13
3001     my $new_mode = {
3002 wakaba 1.54 select => IN_SELECT_IM,
3003 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
3004     ## insertion mode to "in select" by themselves.
3005 wakaba 1.54 td => IN_CELL_IM,
3006     th => IN_CELL_IM,
3007     tr => IN_ROW_IM,
3008     tbody => IN_TABLE_BODY_IM,
3009     thead => IN_TABLE_BODY_IM,
3010     tfoot => IN_TABLE_BODY_IM,
3011     caption => IN_CAPTION_IM,
3012     colgroup => IN_COLUMN_GROUP_IM,
3013     table => IN_TABLE_IM,
3014     head => IN_BODY_IM, # not in head!
3015     body => IN_BODY_IM,
3016     frameset => IN_FRAMESET_IM,
3017 wakaba 1.123 }->{$node->[0]->manakai_local_name};
3018     ## TODO: Foreign namespace case OK?
3019 wakaba 1.3 $self->{insertion_mode} = $new_mode and return if defined $new_mode;
3020    
3021     ## Step 14
3022 wakaba 1.123 if ($node->[1] & HTML_EL) {
3023 wakaba 1.3 unless (defined $self->{head_element}) {
3024 wakaba 1.79 !!!cp ('t29');
3025 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
3026 wakaba 1.3 } else {
3027 wakaba 1.81 ## ISSUE: Can this state be reached?
3028 wakaba 1.79 !!!cp ('t30');
3029 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3030 wakaba 1.3 }
3031     return;
3032 wakaba 1.79 } else {
3033     !!!cp ('t31');
3034 wakaba 1.3 }
3035    
3036     ## Step 15
3037 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
3038 wakaba 1.3
3039     ## Step 16
3040     $i--;
3041     $node = $self->{open_elements}->[$i];
3042    
3043     ## Step 17
3044     redo S3;
3045     } # S3
3046 wakaba 1.79
3047     die "$0: _reset_insertion_mode: This line should never be reached";
3048 wakaba 1.3 } # _reset_insertion_mode
3049    
3050     sub _tree_construction_main ($) {
3051     my $self = shift;
3052    
3053 wakaba 1.1 my $active_formatting_elements = [];
3054    
3055     my $reconstruct_active_formatting_elements = sub { # MUST
3056     my $insert = shift;
3057    
3058     ## Step 1
3059     return unless @$active_formatting_elements;
3060    
3061     ## Step 3
3062     my $i = -1;
3063     my $entry = $active_formatting_elements->[$i];
3064    
3065     ## Step 2
3066     return if $entry->[0] eq '#marker';
3067 wakaba 1.3 for (@{$self->{open_elements}}) {
3068 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3069 wakaba 1.79 !!!cp ('t32');
3070 wakaba 1.1 return;
3071     }
3072     }
3073    
3074     S4: {
3075     ## Step 4
3076     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
3077    
3078     ## Step 5
3079     $i--;
3080     $entry = $active_formatting_elements->[$i];
3081    
3082     ## Step 6
3083     if ($entry->[0] eq '#marker') {
3084 wakaba 1.81 !!!cp ('t33_1');
3085 wakaba 1.1 #
3086     } else {
3087     my $in_open_elements;
3088 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
3089 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3090 wakaba 1.79 !!!cp ('t33');
3091 wakaba 1.1 $in_open_elements = 1;
3092     last OE;
3093     }
3094     }
3095     if ($in_open_elements) {
3096 wakaba 1.79 !!!cp ('t34');
3097 wakaba 1.1 #
3098     } else {
3099 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
3100 wakaba 1.79 !!!cp ('t35');
3101 wakaba 1.1 redo S4;
3102     }
3103     }
3104    
3105     ## Step 7
3106     $i++;
3107     $entry = $active_formatting_elements->[$i];
3108     } # S4
3109    
3110     S7: {
3111     ## Step 8
3112     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
3113    
3114     ## Step 9
3115     $insert->($clone->[0]);
3116 wakaba 1.3 push @{$self->{open_elements}}, $clone;
3117 wakaba 1.1
3118     ## Step 10
3119 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
3120 wakaba 1.1
3121     ## Step 11
3122     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
3123 wakaba 1.79 !!!cp ('t36');
3124 wakaba 1.1 ## Step 7'
3125     $i++;
3126     $entry = $active_formatting_elements->[$i];
3127    
3128     redo S7;
3129     }
3130 wakaba 1.79
3131     !!!cp ('t37');
3132 wakaba 1.1 } # S7
3133     }; # $reconstruct_active_formatting_elements
3134    
3135     my $clear_up_to_marker = sub {
3136     for (reverse 0..$#$active_formatting_elements) {
3137     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3138 wakaba 1.79 !!!cp ('t38');
3139 wakaba 1.1 splice @$active_formatting_elements, $_;
3140     return;
3141     }
3142     }
3143 wakaba 1.79
3144     !!!cp ('t39');
3145 wakaba 1.1 }; # $clear_up_to_marker
3146    
3147 wakaba 1.96 my $insert;
3148    
3149     my $parse_rcdata = sub ($) {
3150     my ($content_model_flag) = @_;
3151 wakaba 1.25
3152     ## Step 1
3153     my $start_tag_name = $token->{tag_name};
3154     my $el;
3155 wakaba 1.116 !!!create-element ($el, $start_tag_name, $token->{attributes}, $token);
3156 wakaba 1.25
3157     ## Step 2
3158 wakaba 1.96 $insert->($el);
3159 wakaba 1.25
3160     ## Step 3
3161 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3162 wakaba 1.13 delete $self->{escape}; # MUST
3163 wakaba 1.25
3164     ## Step 4
3165 wakaba 1.1 my $text = '';
3166     !!!next-token;
3167 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
3168 wakaba 1.79 !!!cp ('t40');
3169 wakaba 1.1 $text .= $token->{data};
3170     !!!next-token;
3171 wakaba 1.25 }
3172    
3173     ## Step 5
3174 wakaba 1.1 if (length $text) {
3175 wakaba 1.79 !!!cp ('t41');
3176 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3177     $el->append_child ($text);
3178 wakaba 1.1 }
3179 wakaba 1.25
3180     ## Step 6
3181 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3182 wakaba 1.25
3183     ## Step 7
3184 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
3185     $token->{tag_name} eq $start_tag_name) {
3186     !!!cp ('t42');
3187 wakaba 1.1 ## Ignore the token
3188     } else {
3189 wakaba 1.96 ## NOTE: An end-of-file token.
3190     if ($content_model_flag == CDATA_CONTENT_MODEL) {
3191     !!!cp ('t43');
3192 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3193 wakaba 1.96 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3194     !!!cp ('t44');
3195 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
3196 wakaba 1.96 } else {
3197     die "$0: $content_model_flag in parse_rcdata";
3198     }
3199 wakaba 1.1 }
3200     !!!next-token;
3201 wakaba 1.25 }; # $parse_rcdata
3202 wakaba 1.1
3203 wakaba 1.96 my $script_start_tag = sub () {
3204 wakaba 1.1 my $script_el;
3205 wakaba 1.116 !!!create-element ($script_el, 'script', $token->{attributes}, $token);
3206 wakaba 1.1 ## TODO: mark as "parser-inserted"
3207    
3208 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
3209 wakaba 1.13 delete $self->{escape}; # MUST
3210 wakaba 1.1
3211     my $text = '';
3212     !!!next-token;
3213 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
3214 wakaba 1.79 !!!cp ('t45');
3215 wakaba 1.1 $text .= $token->{data};
3216     !!!next-token;
3217     } # stop if non-character token or tokenizer stops tokenising
3218     if (length $text) {
3219 wakaba 1.79 !!!cp ('t46');
3220 wakaba 1.1 $script_el->manakai_append_text ($text);
3221     }
3222    
3223 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3224 wakaba 1.1
3225 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
3226 wakaba 1.1 $token->{tag_name} eq 'script') {
3227 wakaba 1.79 !!!cp ('t47');
3228 wakaba 1.1 ## Ignore the token
3229     } else {
3230 wakaba 1.79 !!!cp ('t48');
3231 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3232 wakaba 1.1 ## ISSUE: And ignore?
3233     ## TODO: mark as "already executed"
3234     }
3235    
3236 wakaba 1.3 if (defined $self->{inner_html_node}) {
3237 wakaba 1.79 !!!cp ('t49');
3238 wakaba 1.3 ## TODO: mark as "already executed"
3239     } else {
3240 wakaba 1.79 !!!cp ('t50');
3241 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3242     ## TODO: insertion point = just before the next input character
3243 wakaba 1.25
3244     $insert->($script_el);
3245 wakaba 1.1
3246     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3247    
3248     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3249     }
3250    
3251     !!!next-token;
3252     }; # $script_start_tag
3253    
3254 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
3255     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
3256     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
3257    
3258 wakaba 1.1 my $formatting_end_tag = sub {
3259 wakaba 1.113 my $end_tag_token = shift;
3260     my $tag_name = $end_tag_token->{tag_name};
3261 wakaba 1.1
3262 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
3263 wakaba 1.102
3264 wakaba 1.1 FET: {
3265     ## Step 1
3266     my $formatting_element;
3267     my $formatting_element_i_in_active;
3268     AFE: for (reverse 0..$#$active_formatting_elements) {
3269 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3270     !!!cp ('t52');
3271     last AFE;
3272     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
3273     eq $tag_name) {
3274 wakaba 1.79 !!!cp ('t51');
3275 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
3276     $formatting_element_i_in_active = $_;
3277     last AFE;
3278     }
3279     } # AFE
3280     unless (defined $formatting_element) {
3281 wakaba 1.79 !!!cp ('t53');
3282 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$tag_name, token => $end_tag_token);
3283 wakaba 1.1 ## Ignore the token
3284     !!!next-token;
3285     return;
3286     }
3287     ## has an element in scope
3288     my $in_scope = 1;
3289     my $formatting_element_i_in_open;
3290 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3291     my $node = $self->{open_elements}->[$_];
3292 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3293     if ($in_scope) {
3294 wakaba 1.79 !!!cp ('t54');
3295 wakaba 1.1 $formatting_element_i_in_open = $_;
3296     last INSCOPE;
3297     } else { # in open elements but not in scope
3298 wakaba 1.79 !!!cp ('t55');
3299 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3300     token => $end_tag_token);
3301 wakaba 1.1 ## Ignore the token
3302     !!!next-token;
3303     return;
3304     }
3305 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
3306 wakaba 1.79 !!!cp ('t56');
3307 wakaba 1.1 $in_scope = 0;
3308     }
3309     } # INSCOPE
3310     unless (defined $formatting_element_i_in_open) {
3311 wakaba 1.79 !!!cp ('t57');
3312 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3313     token => $end_tag_token);
3314 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3315     !!!next-token; ## TODO: ok?
3316     return;
3317     }
3318 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3319 wakaba 1.79 !!!cp ('t58');
3320 wakaba 1.122 !!!parse-error (type => 'not closed',
3321     value => $self->{open_elements}->[-1]->[0]
3322     ->manakai_local_name,
3323 wakaba 1.113 token => $end_tag_token);
3324 wakaba 1.1 }
3325    
3326     ## Step 2
3327     my $furthest_block;
3328     my $furthest_block_i_in_open;
3329 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3330     my $node = $self->{open_elements}->[$_];
3331 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
3332 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
3333 wakaba 1.123 ($node->[1] & SPECIAL_EL or
3334     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
3335 wakaba 1.79 !!!cp ('t59');
3336 wakaba 1.1 $furthest_block = $node;
3337     $furthest_block_i_in_open = $_;
3338     } elsif ($node->[0] eq $formatting_element->[0]) {
3339 wakaba 1.79 !!!cp ('t60');
3340 wakaba 1.1 last OE;
3341     }
3342     } # OE
3343    
3344     ## Step 3
3345     unless (defined $furthest_block) { # MUST
3346 wakaba 1.79 !!!cp ('t61');
3347 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3348 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3349     !!!next-token;
3350     return;
3351     }
3352    
3353     ## Step 4
3354 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3355 wakaba 1.1
3356     ## Step 5
3357     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3358     if (defined $furthest_block_parent) {
3359 wakaba 1.79 !!!cp ('t62');
3360 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
3361     }
3362    
3363     ## Step 6
3364     my $bookmark_prev_el
3365     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3366     ->[0];
3367    
3368     ## Step 7
3369     my $node = $furthest_block;
3370     my $node_i_in_open = $furthest_block_i_in_open;
3371     my $last_node = $furthest_block;
3372     S7: {
3373     ## Step 1
3374     $node_i_in_open--;
3375 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3376 wakaba 1.1
3377     ## Step 2
3378     my $node_i_in_active;
3379     S7S2: {
3380     for (reverse 0..$#$active_formatting_elements) {
3381     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3382 wakaba 1.79 !!!cp ('t63');
3383 wakaba 1.1 $node_i_in_active = $_;
3384     last S7S2;
3385     }
3386     }
3387 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3388 wakaba 1.1 redo S7;
3389     } # S7S2
3390    
3391     ## Step 3
3392     last S7 if $node->[0] eq $formatting_element->[0];
3393    
3394     ## Step 4
3395     if ($last_node->[0] eq $furthest_block->[0]) {
3396 wakaba 1.79 !!!cp ('t64');
3397 wakaba 1.1 $bookmark_prev_el = $node->[0];
3398     }
3399    
3400     ## Step 5
3401     if ($node->[0]->has_child_nodes ()) {
3402 wakaba 1.79 !!!cp ('t65');
3403 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
3404     $active_formatting_elements->[$node_i_in_active] = $clone;
3405 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3406 wakaba 1.1 $node = $clone;
3407     }
3408    
3409     ## Step 6
3410     $node->[0]->append_child ($last_node->[0]);
3411    
3412     ## Step 7
3413     $last_node = $node;
3414    
3415     ## Step 8
3416     redo S7;
3417     } # S7
3418    
3419     ## Step 8
3420 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
3421 wakaba 1.102 my $foster_parent_element;
3422     my $next_sibling;
3423 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3424     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3425 wakaba 1.102 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3426     if (defined $parent and $parent->node_type == 1) {
3427     !!!cp ('t65.1');
3428     $foster_parent_element = $parent;
3429     $next_sibling = $self->{open_elements}->[$_]->[0];
3430     } else {
3431     !!!cp ('t65.2');
3432     $foster_parent_element
3433     = $self->{open_elements}->[$_ - 1]->[0];
3434     }
3435     last OE;
3436     }
3437     } # OE
3438     $foster_parent_element = $self->{open_elements}->[0]->[0]
3439     unless defined $foster_parent_element;
3440     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
3441     $open_tables->[-1]->[1] = 1; # tainted
3442     } else {
3443     !!!cp ('t65.3');
3444     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3445     }
3446 wakaba 1.1
3447     ## Step 9
3448     my $clone = [$formatting_element->[0]->clone_node (0),
3449     $formatting_element->[1]];
3450    
3451     ## Step 10
3452     my @cn = @{$furthest_block->[0]->child_nodes};
3453     $clone->[0]->append_child ($_) for @cn;
3454    
3455     ## Step 11
3456     $furthest_block->[0]->append_child ($clone->[0]);
3457    
3458     ## Step 12
3459     my $i;
3460     AFE: for (reverse 0..$#$active_formatting_elements) {
3461     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3462 wakaba 1.79 !!!cp ('t66');
3463 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
3464     $i-- and last AFE if defined $i;
3465     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3466 wakaba 1.79 !!!cp ('t67');
3467 wakaba 1.1 $i = $_;
3468     }
3469     } # AFE
3470     splice @$active_formatting_elements, $i + 1, 0, $clone;
3471    
3472     ## Step 13
3473     undef $i;
3474 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3475     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3476 wakaba 1.79 !!!cp ('t68');
3477 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
3478 wakaba 1.1 $i-- and last OE if defined $i;
3479 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3480 wakaba 1.79 !!!cp ('t69');
3481 wakaba 1.1 $i = $_;
3482     }
3483     } # OE
3484 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3485 wakaba 1.1
3486     ## Step 14
3487     redo FET;
3488     } # FET
3489     }; # $formatting_end_tag
3490    
3491 wakaba 1.96 $insert = my $insert_to_current = sub {
3492 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3493 wakaba 1.1 }; # $insert_to_current
3494    
3495     my $insert_to_foster = sub {
3496 wakaba 1.95 my $child = shift;
3497 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
3498 wakaba 1.95 # MUST
3499     my $foster_parent_element;
3500     my $next_sibling;
3501 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3502     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3503 wakaba 1.3 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3504 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3505 wakaba 1.79 !!!cp ('t70');
3506 wakaba 1.1 $foster_parent_element = $parent;
3507 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3508 wakaba 1.1 } else {
3509 wakaba 1.79 !!!cp ('t71');
3510 wakaba 1.1 $foster_parent_element
3511 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3512 wakaba 1.1 }
3513     last OE;
3514     }
3515     } # OE
3516 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3517 wakaba 1.1 unless defined $foster_parent_element;
3518     $foster_parent_element->insert_before
3519     ($child, $next_sibling);
3520 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
3521     } else {
3522     !!!cp ('t72');
3523     $self->{open_elements}->[-1]->[0]->append_child ($child);
3524     }
3525 wakaba 1.1 }; # $insert_to_foster
3526    
3527 wakaba 1.52 B: {
3528 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3529 wakaba 1.79 !!!cp ('t73');
3530 wakaba 1.113 !!!parse-error (type => 'DOCTYPE in the middle', token => $token);
3531 wakaba 1.52 ## Ignore the token
3532     ## Stay in the phase
3533     !!!next-token;
3534     redo B;
3535 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
3536 wakaba 1.52 $token->{tag_name} eq 'html') {
3537 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
3538 wakaba 1.79 !!!cp ('t79');
3539 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3540 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
3541     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
3542 wakaba 1.79 !!!cp ('t80');
3543 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3544 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
3545 wakaba 1.79 } else {
3546     !!!cp ('t81');
3547 wakaba 1.52 }
3548    
3549 wakaba 1.84 !!!cp ('t82');
3550 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
3551 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
3552     for my $attr_name (keys %{$token->{attributes}}) {
3553     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3554 wakaba 1.79 !!!cp ('t84');
3555 wakaba 1.52 $top_el->set_attribute_ns
3556     (undef, [undef, $attr_name],
3557     $token->{attributes}->{$attr_name}->{value});
3558     }
3559     }
3560     !!!next-token;
3561     redo B;
3562 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3563 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
3564 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
3565 wakaba 1.79 !!!cp ('t85');
3566 wakaba 1.52 $self->{document}->append_child ($comment);
3567 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
3568 wakaba 1.79 !!!cp ('t86');
3569 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
3570     } else {
3571 wakaba 1.79 !!!cp ('t87');
3572 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3573     }
3574     !!!next-token;
3575     redo B;
3576 wakaba 1.56 } elsif ($self->{insertion_mode} & HEAD_IMS) {
3577 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3578 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3579 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3580     !!!cp ('t88.2');
3581     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3582     } else {
3583     !!!cp ('t88.1');
3584     ## Ignore the token.
3585     !!!next-token;
3586     redo B;
3587     }
3588 wakaba 1.52 unless (length $token->{data}) {
3589 wakaba 1.79 !!!cp ('t88');
3590 wakaba 1.52 !!!next-token;
3591     redo B;
3592 wakaba 1.1 }
3593     }
3594 wakaba 1.52
3595 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3596 wakaba 1.79 !!!cp ('t89');
3597 wakaba 1.52 ## As if <head>
3598 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3599 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3600 wakaba 1.123 push @{$self->{open_elements}},
3601     [$self->{head_element}, $el_category->{head}];
3602 wakaba 1.52
3603     ## Reprocess in the "in head" insertion mode...
3604     pop @{$self->{open_elements}};
3605    
3606     ## Reprocess in the "after head" insertion mode...
3607 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3608 wakaba 1.79 !!!cp ('t90');
3609 wakaba 1.52 ## As if </noscript>
3610     pop @{$self->{open_elements}};
3611 wakaba 1.113 !!!parse-error (type => 'in noscript:#character', token => $token);
3612 wakaba 1.1
3613 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
3614     ## As if </head>
3615     pop @{$self->{open_elements}};
3616    
3617     ## Reprocess in the "after head" insertion mode...
3618 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3619 wakaba 1.79 !!!cp ('t91');
3620 wakaba 1.52 pop @{$self->{open_elements}};
3621    
3622     ## Reprocess in the "after head" insertion mode...
3623 wakaba 1.79 } else {
3624     !!!cp ('t92');
3625 wakaba 1.1 }
3626 wakaba 1.52
3627 wakaba 1.123 ## "after head" insertion mode
3628     ## As if <body>
3629     !!!insert-element ('body',, $token);
3630     $self->{insertion_mode} = IN_BODY_IM;
3631     ## reprocess
3632     redo B;
3633     } elsif ($token->{type} == START_TAG_TOKEN) {
3634     if ($token->{tag_name} eq 'head') {
3635     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3636     !!!cp ('t93');
3637     !!!create-element ($self->{head_element}, $token->{tag_name}, $token->{attributes}, $token);
3638     $self->{open_elements}->[-1]->[0]->append_child
3639     ($self->{head_element});
3640     push @{$self->{open_elements}},
3641     [$self->{head_element}, $el_category->{head}];
3642     $self->{insertion_mode} = IN_HEAD_IM;
3643     !!!next-token;
3644 wakaba 1.52 redo B;
3645 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3646 wakaba 1.79 !!!cp ('t94');
3647 wakaba 1.54 #
3648     } else {
3649 wakaba 1.79 !!!cp ('t95');
3650 wakaba 1.113 !!!parse-error (type => 'in head:head', token => $token); # or in head noscript
3651 wakaba 1.52 ## Ignore the token
3652     !!!next-token;
3653     redo B;
3654     }
3655 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3656 wakaba 1.79 !!!cp ('t96');
3657 wakaba 1.52 ## As if <head>
3658 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3659 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3660 wakaba 1.123 push @{$self->{open_elements}},
3661     [$self->{head_element}, $el_category->{head}];
3662 wakaba 1.52
3663 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3664 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
3665 wakaba 1.79 } else {
3666     !!!cp ('t97');
3667 wakaba 1.1 }
3668 wakaba 1.52
3669 wakaba 1.49 if ($token->{tag_name} eq 'base') {
3670 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3671 wakaba 1.79 !!!cp ('t98');
3672 wakaba 1.49 ## As if </noscript>
3673     pop @{$self->{open_elements}};
3674 wakaba 1.113 !!!parse-error (type => 'in noscript:base', token => $token);
3675 wakaba 1.49
3676 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3677 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3678 wakaba 1.79 } else {
3679     !!!cp ('t99');
3680 wakaba 1.49 }
3681    
3682     ## NOTE: There is a "as if in head" code clone.
3683 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3684 wakaba 1.79 !!!cp ('t100');
3685 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3686 wakaba 1.123 push @{$self->{open_elements}},
3687     [$self->{head_element}, $el_category->{head}];
3688 wakaba 1.79 } else {
3689     !!!cp ('t101');
3690 wakaba 1.49 }
3691 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3692 wakaba 1.49 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3693 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3694 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3695 wakaba 1.49 !!!next-token;
3696     redo B;
3697     } elsif ($token->{tag_name} eq 'link') {
3698 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3699 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3700 wakaba 1.79 !!!cp ('t102');
3701 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3702 wakaba 1.123 push @{$self->{open_elements}},
3703     [$self->{head_element}, $el_category->{head}];
3704 wakaba 1.79 } else {
3705     !!!cp ('t103');
3706 wakaba 1.25 }
3707 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3708 wakaba 1.25 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3709 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3710 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3711 wakaba 1.1 !!!next-token;
3712 wakaba 1.25 redo B;
3713 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
3714     ## NOTE: There is a "as if in head" code clone.
3715 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3716 wakaba 1.79 !!!cp ('t104');
3717 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3718 wakaba 1.123 push @{$self->{open_elements}},
3719     [$self->{head_element}, $el_category->{head}];
3720 wakaba 1.79 } else {
3721     !!!cp ('t105');
3722 wakaba 1.34 }
3723 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3724 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3725 wakaba 1.34
3726     unless ($self->{confident}) {
3727     if ($token->{attributes}->{charset}) { ## TODO: And if supported
3728 wakaba 1.79 !!!cp ('t106');
3729 wakaba 1.63 $self->{change_encoding}
3730 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
3731     $token);
3732 wakaba 1.66
3733     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
3734     ->set_user_data (manakai_has_reference =>
3735     $token->{attributes}->{charset}
3736     ->{has_reference});
3737 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
3738 wakaba 1.35 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
3739 wakaba 1.63 if ($token->{attributes}->{content}->{value}
3740 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
3741     [\x09-\x0D\x20]*=
3742 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
3743     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
3744 wakaba 1.79 !!!cp ('t107');
3745 wakaba 1.63 $self->{change_encoding}
3746 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
3747     $token);
3748 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
3749     ->set_user_data (manakai_has_reference =>
3750     $token->{attributes}->{content}
3751     ->{has_reference});
3752 wakaba 1.79 } else {
3753     !!!cp ('t108');
3754 wakaba 1.63 }
3755 wakaba 1.34 }
3756 wakaba 1.66 } else {
3757     if ($token->{attributes}->{charset}) {
3758 wakaba 1.79 !!!cp ('t109');
3759 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
3760     ->set_user_data (manakai_has_reference =>
3761     $token->{attributes}->{charset}
3762     ->{has_reference});
3763     }
3764 wakaba 1.68 if ($token->{attributes}->{content}) {
3765 wakaba 1.79 !!!cp ('t110');
3766 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
3767     ->set_user_data (manakai_has_reference =>
3768     $token->{attributes}->{content}
3769     ->{has_reference});
3770     }
3771 wakaba 1.34 }
3772    
3773 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3774 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3775 wakaba 1.34 !!!next-token;
3776     redo B;
3777 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
3778 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3779 wakaba 1.79 !!!cp ('t111');
3780 wakaba 1.49 ## As if </noscript>
3781     pop @{$self->{open_elements}};
3782 wakaba 1.113 !!!parse-error (type => 'in noscript:title', token => $token);
3783 wakaba 1.49
3784 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3785 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3786 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3787 wakaba 1.79 !!!cp ('t112');
3788 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3789 wakaba 1.123 push @{$self->{open_elements}},
3790     [$self->{head_element}, $el_category->{head}];
3791 wakaba 1.79 } else {
3792     !!!cp ('t113');
3793 wakaba 1.25 }
3794 wakaba 1.49
3795     ## NOTE: There is a "as if in head" code clone.
3796 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
3797     : $self->{open_elements}->[-1]->[0];
3798 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
3799 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3800 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3801 wakaba 1.25 redo B;
3802     } elsif ($token->{tag_name} eq 'style') {
3803     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
3804 wakaba 1.54 ## insertion mode IN_HEAD_IM)
3805 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3806 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3807 wakaba 1.79 !!!cp ('t114');
3808 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3809 wakaba 1.123 push @{$self->{open_elements}},
3810     [$self->{head_element}, $el_category->{head}];
3811 wakaba 1.79 } else {
3812     !!!cp ('t115');
3813 wakaba 1.25 }
3814 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
3815 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3816 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3817 wakaba 1.25 redo B;
3818     } elsif ($token->{tag_name} eq 'noscript') {
3819 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
3820 wakaba 1.79 !!!cp ('t116');
3821 wakaba 1.25 ## NOTE: and scripting is disalbed
3822 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3823 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
3824 wakaba 1.1 !!!next-token;
3825 wakaba 1.25 redo B;
3826 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3827 wakaba 1.79 !!!cp ('t117');
3828 wakaba 1.113 !!!parse-error (type => 'in noscript:noscript', token => $token);
3829 wakaba 1.1 ## Ignore the token
3830 wakaba 1.41 !!!next-token;
3831 wakaba 1.25 redo B;
3832 wakaba 1.1 } else {
3833 wakaba 1.79 !!!cp ('t118');
3834 wakaba 1.25 #
3835 wakaba 1.1 }
3836 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
3837 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3838 wakaba 1.79 !!!cp ('t119');
3839 wakaba 1.49 ## As if </noscript>
3840     pop @{$self->{open_elements}};
3841 wakaba 1.113 !!!parse-error (type => 'in noscript:script', token => $token);
3842 wakaba 1.49
3843 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3844 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3845 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3846 wakaba 1.79 !!!cp ('t120');
3847 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3848 wakaba 1.123 push @{$self->{open_elements}},
3849     [$self->{head_element}, $el_category->{head}];
3850 wakaba 1.79 } else {
3851     !!!cp ('t121');
3852 wakaba 1.25 }
3853 wakaba 1.49
3854 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3855 wakaba 1.100 $script_start_tag->();
3856     pop @{$self->{open_elements}} # <head>
3857 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3858 wakaba 1.1 redo B;
3859 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
3860 wakaba 1.25 $token->{tag_name} eq 'frameset') {
3861 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3862 wakaba 1.79 !!!cp ('t122');
3863 wakaba 1.49 ## As if </noscript>
3864     pop @{$self->{open_elements}};
3865 wakaba 1.113 !!!parse-error (type => 'in noscript:'.$token->{tag_name}, token => $token);
3866 wakaba 1.49
3867     ## Reprocess in the "in head" insertion mode...
3868     ## As if </head>
3869     pop @{$self->{open_elements}};
3870    
3871     ## Reprocess in the "after head" insertion mode...
3872 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3873 wakaba 1.79 !!!cp ('t124');
3874 wakaba 1.49 pop @{$self->{open_elements}};
3875    
3876     ## Reprocess in the "after head" insertion mode...
3877 wakaba 1.79 } else {
3878     !!!cp ('t125');
3879 wakaba 1.49 }
3880    
3881     ## "after head" insertion mode
3882 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3883 wakaba 1.54 if ($token->{tag_name} eq 'body') {
3884 wakaba 1.79 !!!cp ('t126');
3885 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3886     } elsif ($token->{tag_name} eq 'frameset') {
3887 wakaba 1.79 !!!cp ('t127');
3888 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
3889     } else {
3890     die "$0: tag name: $self->{tag_name}";
3891     }
3892 wakaba 1.1 !!!next-token;
3893     redo B;
3894     } else {
3895 wakaba 1.79 !!!cp ('t128');
3896 wakaba 1.1 #
3897     }
3898 wakaba 1.49
3899 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3900 wakaba 1.79 !!!cp ('t129');
3901 wakaba 1.49 ## As if </noscript>
3902     pop @{$self->{open_elements}};
3903 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
3904 wakaba 1.49
3905     ## Reprocess in the "in head" insertion mode...
3906     ## As if </head>
3907 wakaba 1.25 pop @{$self->{open_elements}};
3908 wakaba 1.49
3909     ## Reprocess in the "after head" insertion mode...
3910 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3911 wakaba 1.79 !!!cp ('t130');
3912 wakaba 1.49 ## As if </head>
3913 wakaba 1.25 pop @{$self->{open_elements}};
3914 wakaba 1.49
3915     ## Reprocess in the "after head" insertion mode...
3916 wakaba 1.79 } else {
3917     !!!cp ('t131');
3918 wakaba 1.49 }
3919    
3920     ## "after head" insertion mode
3921     ## As if <body>
3922 wakaba 1.116 !!!insert-element ('body',, $token);
3923 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3924 wakaba 1.49 ## reprocess
3925     redo B;
3926 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
3927 wakaba 1.49 if ($token->{tag_name} eq 'head') {
3928 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3929 wakaba 1.79 !!!cp ('t132');
3930 wakaba 1.50 ## As if <head>
3931 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3932 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3933 wakaba 1.123 push @{$self->{open_elements}},
3934     [$self->{head_element}, $el_category->{head}];
3935 wakaba 1.50
3936     ## Reprocess in the "in head" insertion mode...
3937     pop @{$self->{open_elements}};
3938 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3939 wakaba 1.50 !!!next-token;
3940     redo B;
3941 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3942 wakaba 1.79 !!!cp ('t133');
3943 wakaba 1.49 ## As if </noscript>
3944     pop @{$self->{open_elements}};
3945 wakaba 1.113 !!!parse-error (type => 'in noscript:/head', token => $token);
3946 wakaba 1.49
3947     ## Reprocess in the "in head" insertion mode...
3948 wakaba 1.50 pop @{$self->{open_elements}};
3949 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3950 wakaba 1.50 !!!next-token;
3951     redo B;
3952 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3953 wakaba 1.79 !!!cp ('t134');
3954 wakaba 1.49 pop @{$self->{open_elements}};
3955 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3956 wakaba 1.49 !!!next-token;
3957     redo B;
3958     } else {
3959 wakaba 1.79 !!!cp ('t135');
3960 wakaba 1.49 #
3961     }
3962     } elsif ($token->{tag_name} eq 'noscript') {
3963 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3964 wakaba 1.79 !!!cp ('t136');
3965 wakaba 1.49 pop @{$self->{open_elements}};
3966 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3967 wakaba 1.49 !!!next-token;
3968     redo B;
3969 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3970 wakaba 1.79 !!!cp ('t137');
3971 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:noscript', token => $token);
3972 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
3973     !!!next-token;
3974     redo B;
3975 wakaba 1.49 } else {
3976 wakaba 1.79 !!!cp ('t138');
3977 wakaba 1.49 #
3978     }
3979     } elsif ({
3980 wakaba 1.31 body => 1, html => 1,
3981     }->{$token->{tag_name}}) {
3982 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3983 wakaba 1.79 !!!cp ('t139');
3984 wakaba 1.50 ## As if <head>
3985 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3986 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3987 wakaba 1.123 push @{$self->{open_elements}},
3988     [$self->{head_element}, $el_category->{head}];
3989 wakaba 1.50
3990 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3991 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
3992 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3993 wakaba 1.79 !!!cp ('t140');
3994 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
3995 wakaba 1.49 ## Ignore the token
3996     !!!next-token;
3997     redo B;
3998 wakaba 1.79 } else {
3999     !!!cp ('t141');
4000 wakaba 1.49 }
4001 wakaba 1.50
4002     #
4003 wakaba 1.49 } elsif ({
4004 wakaba 1.31 p => 1, br => 1,
4005     }->{$token->{tag_name}}) {
4006 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4007 wakaba 1.79 !!!cp ('t142');
4008 wakaba 1.50 ## As if <head>
4009 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
4010 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4011 wakaba 1.123 push @{$self->{open_elements}},
4012     [$self->{head_element}, $el_category->{head}];
4013 wakaba 1.50
4014 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4015 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
4016 wakaba 1.79 } else {
4017     !!!cp ('t143');
4018 wakaba 1.50 }
4019    
4020 wakaba 1.1 #
4021 wakaba 1.25 } else {
4022 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4023 wakaba 1.79 !!!cp ('t144');
4024 wakaba 1.54 #
4025     } else {
4026 wakaba 1.79 !!!cp ('t145');
4027 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4028 wakaba 1.49 ## Ignore the token
4029     !!!next-token;
4030     redo B;
4031     }
4032     }
4033    
4034 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4035 wakaba 1.79 !!!cp ('t146');
4036 wakaba 1.49 ## As if </noscript>
4037     pop @{$self->{open_elements}};
4038 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
4039 wakaba 1.49
4040     ## Reprocess in the "in head" insertion mode...
4041     ## As if </head>
4042     pop @{$self->{open_elements}};
4043    
4044     ## Reprocess in the "after head" insertion mode...
4045 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4046 wakaba 1.79 !!!cp ('t147');
4047 wakaba 1.49 ## As if </head>
4048     pop @{$self->{open_elements}};
4049    
4050     ## Reprocess in the "after head" insertion mode...
4051 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4052 wakaba 1.82 ## ISSUE: This case cannot be reached?
4053 wakaba 1.79 !!!cp ('t148');
4054 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4055 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4056     !!!next-token;
4057     redo B;
4058 wakaba 1.79 } else {
4059     !!!cp ('t149');
4060 wakaba 1.1 }
4061    
4062 wakaba 1.49 ## "after head" insertion mode
4063     ## As if <body>
4064 wakaba 1.116 !!!insert-element ('body',, $token);
4065 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4066 wakaba 1.52 ## reprocess
4067     redo B;
4068 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4069     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4070     !!!cp ('t149.1');
4071    
4072     ## NOTE: As if <head>
4073 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
4074 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
4075     ($self->{head_element});
4076 wakaba 1.123 #push @{$self->{open_elements}},
4077     # [$self->{head_element}, $el_category->{head}];
4078 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
4079     ## NOTE: Reprocess.
4080    
4081     ## NOTE: As if </head>
4082     #pop @{$self->{open_elements}};
4083     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4084     ## NOTE: Reprocess.
4085    
4086     #
4087     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4088     !!!cp ('t149.2');
4089    
4090     ## NOTE: As if </head>
4091     pop @{$self->{open_elements}};
4092     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4093     ## NOTE: Reprocess.
4094    
4095     #
4096     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4097     !!!cp ('t149.3');
4098    
4099 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
4100 wakaba 1.104
4101     ## As if </noscript>
4102     pop @{$self->{open_elements}};
4103     #$self->{insertion_mode} = IN_HEAD_IM;
4104     ## NOTE: Reprocess.
4105    
4106     ## NOTE: As if </head>
4107     pop @{$self->{open_elements}};
4108     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4109     ## NOTE: Reprocess.
4110    
4111     #
4112     } else {
4113     !!!cp ('t149.4');
4114     #
4115     }
4116    
4117     ## NOTE: As if <body>
4118 wakaba 1.116 !!!insert-element ('body',, $token);
4119 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
4120     ## NOTE: Reprocess.
4121     redo B;
4122     } else {
4123     die "$0: $token->{type}: Unknown token type";
4124     }
4125 wakaba 1.52
4126     ## ISSUE: An issue in the spec.
4127 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
4128 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4129 wakaba 1.79 !!!cp ('t150');
4130 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
4131     $reconstruct_active_formatting_elements->($insert_to_current);
4132    
4133     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4134    
4135     !!!next-token;
4136     redo B;
4137 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4138 wakaba 1.52 if ({
4139     caption => 1, col => 1, colgroup => 1, tbody => 1,
4140     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4141     }->{$token->{tag_name}}) {
4142 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4143 wakaba 1.52 ## have an element in table scope
4144 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
4145 wakaba 1.52 my $node = $self->{open_elements}->[$_];
4146 wakaba 1.123 if ($node->[1] & TABLE_CELL_EL) {
4147 wakaba 1.79 !!!cp ('t151');
4148 wakaba 1.108
4149     ## Close the cell
4150     !!!back-token; # <?>
4151 wakaba 1.122 $token = {type => END_TAG_TOKEN,
4152     tag_name => $node->[0]->manakai_local_name,
4153 wakaba 1.114 line => $token->{line},
4154     column => $token->{column}};
4155 wakaba 1.108 redo B;
4156 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4157 wakaba 1.79 !!!cp ('t152');
4158 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
4159     last;
4160 wakaba 1.52 }
4161 wakaba 1.108 }
4162    
4163     !!!cp ('t153');
4164     !!!parse-error (type => 'start tag not allowed',
4165 wakaba 1.113 value => $token->{tag_name}, token => $token);
4166 wakaba 1.108 ## Ignore the token
4167     !!!next-token;
4168 wakaba 1.52 redo B;
4169 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4170 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4171 wakaba 1.52
4172 wakaba 1.108 ## NOTE: As if </caption>.
4173 wakaba 1.52 ## have a table element in table scope
4174     my $i;
4175 wakaba 1.108 INSCOPE: {
4176     for (reverse 0..$#{$self->{open_elements}}) {
4177     my $node = $self->{open_elements}->[$_];
4178 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4179 wakaba 1.108 !!!cp ('t155');
4180     $i = $_;
4181     last INSCOPE;
4182 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4183 wakaba 1.108 !!!cp ('t156');
4184     last;
4185     }
4186 wakaba 1.52 }
4187 wakaba 1.108
4188     !!!cp ('t157');
4189     !!!parse-error (type => 'start tag not allowed',
4190 wakaba 1.113 value => $token->{tag_name}, token => $token);
4191 wakaba 1.108 ## Ignore the token
4192     !!!next-token;
4193     redo B;
4194 wakaba 1.52 } # INSCOPE
4195    
4196     ## generate implied end tags
4197 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4198     & END_TAG_OPTIONAL_EL) {
4199 wakaba 1.79 !!!cp ('t158');
4200 wakaba 1.86 pop @{$self->{open_elements}};
4201 wakaba 1.52 }
4202    
4203 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4204 wakaba 1.79 !!!cp ('t159');
4205 wakaba 1.122 !!!parse-error (type => 'not closed',
4206     value => $self->{open_elements}->[-1]->[0]
4207     ->manakai_local_name,
4208     token => $token);
4209 wakaba 1.79 } else {
4210     !!!cp ('t160');
4211 wakaba 1.52 }
4212    
4213     splice @{$self->{open_elements}}, $i;
4214    
4215     $clear_up_to_marker->();
4216    
4217 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4218 wakaba 1.52
4219     ## reprocess
4220     redo B;
4221     } else {
4222 wakaba 1.79 !!!cp ('t161');
4223 wakaba 1.52 #
4224     }
4225     } else {
4226 wakaba 1.79 !!!cp ('t162');
4227 wakaba 1.52 #
4228     }
4229 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4230 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4231 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4232 wakaba 1.43 ## have an element in table scope
4233 wakaba 1.52 my $i;
4234 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4235     my $node = $self->{open_elements}->[$_];
4236 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4237 wakaba 1.79 !!!cp ('t163');
4238 wakaba 1.52 $i = $_;
4239 wakaba 1.43 last INSCOPE;
4240 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4241 wakaba 1.79 !!!cp ('t164');
4242 wakaba 1.43 last INSCOPE;
4243     }
4244     } # INSCOPE
4245 wakaba 1.52 unless (defined $i) {
4246 wakaba 1.79 !!!cp ('t165');
4247 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4248 wakaba 1.43 ## Ignore the token
4249     !!!next-token;
4250     redo B;
4251     }
4252    
4253 wakaba 1.52 ## generate implied end tags
4254 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4255     & END_TAG_OPTIONAL_EL) {
4256 wakaba 1.79 !!!cp ('t166');
4257 wakaba 1.86 pop @{$self->{open_elements}};
4258 wakaba 1.52 }
4259 wakaba 1.86
4260 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
4261     ne $token->{tag_name}) {
4262 wakaba 1.79 !!!cp ('t167');
4263 wakaba 1.122 !!!parse-error (type => 'not closed',
4264     value => $self->{open_elements}->[-1]->[0]
4265     ->manakai_local_name,
4266     token => $token);
4267 wakaba 1.79 } else {
4268     !!!cp ('t168');
4269 wakaba 1.52 }
4270    
4271     splice @{$self->{open_elements}}, $i;
4272    
4273     $clear_up_to_marker->();
4274    
4275 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4276 wakaba 1.52
4277     !!!next-token;
4278 wakaba 1.43 redo B;
4279 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4280 wakaba 1.79 !!!cp ('t169');
4281 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4282 wakaba 1.52 ## Ignore the token
4283     !!!next-token;
4284     redo B;
4285     } else {
4286 wakaba 1.79 !!!cp ('t170');
4287 wakaba 1.52 #
4288     }
4289     } elsif ($token->{tag_name} eq 'caption') {
4290 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
4291 wakaba 1.43 ## have a table element in table scope
4292     my $i;
4293 wakaba 1.108 INSCOPE: {
4294     for (reverse 0..$#{$self->{open_elements}}) {
4295     my $node = $self->{open_elements}->[$_];
4296 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4297 wakaba 1.108 !!!cp ('t171');
4298     $i = $_;
4299     last INSCOPE;
4300 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4301 wakaba 1.108 !!!cp ('t172');
4302     last;
4303     }
4304 wakaba 1.43 }
4305 wakaba 1.108
4306     !!!cp ('t173');
4307     !!!parse-error (type => 'unmatched end tag',
4308 wakaba 1.113 value => $token->{tag_name}, token => $token);
4309 wakaba 1.108 ## Ignore the token
4310     !!!next-token;
4311     redo B;
4312 wakaba 1.43 } # INSCOPE
4313    
4314     ## generate implied end tags
4315 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4316     & END_TAG_OPTIONAL_EL) {
4317 wakaba 1.79 !!!cp ('t174');
4318 wakaba 1.86 pop @{$self->{open_elements}};
4319 wakaba 1.43 }
4320 wakaba 1.52
4321 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4322 wakaba 1.79 !!!cp ('t175');
4323 wakaba 1.122 !!!parse-error (type => 'not closed',
4324     value => $self->{open_elements}->[-1]->[0]
4325     ->manakai_local_name,
4326     token => $token);
4327 wakaba 1.79 } else {
4328     !!!cp ('t176');
4329 wakaba 1.52 }
4330    
4331     splice @{$self->{open_elements}}, $i;
4332    
4333     $clear_up_to_marker->();
4334    
4335 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4336 wakaba 1.52
4337     !!!next-token;
4338     redo B;
4339 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
4340 wakaba 1.79 !!!cp ('t177');
4341 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4342 wakaba 1.52 ## Ignore the token
4343     !!!next-token;
4344     redo B;
4345     } else {
4346 wakaba 1.79 !!!cp ('t178');
4347 wakaba 1.52 #
4348     }
4349     } elsif ({
4350     table => 1, tbody => 1, tfoot => 1,
4351     thead => 1, tr => 1,
4352     }->{$token->{tag_name}} and
4353 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
4354 wakaba 1.52 ## have an element in table scope
4355     my $i;
4356     my $tn;
4357 wakaba 1.108 INSCOPE: {
4358     for (reverse 0..$#{$self->{open_elements}}) {
4359     my $node = $self->{open_elements}->[$_];
4360 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4361 wakaba 1.108 !!!cp ('t179');
4362     $i = $_;
4363    
4364     ## Close the cell
4365     !!!back-token; # </?>
4366 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
4367     line => $token->{line},
4368     column => $token->{column}};
4369 wakaba 1.108 redo B;
4370 wakaba 1.123 } elsif ($node->[1] & TABLE_CELL_EL) {
4371 wakaba 1.108 !!!cp ('t180');
4372 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
4373 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
4374     ## in scope in the stack of open elements by definition.
4375 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4376 wakaba 1.108 ## ISSUE: Can this be reached?
4377     !!!cp ('t181');
4378     last;
4379     }
4380 wakaba 1.52 }
4381 wakaba 1.108
4382 wakaba 1.79 !!!cp ('t182');
4383 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
4384 wakaba 1.113 value => $token->{tag_name}, token => $token);
4385 wakaba 1.52 ## Ignore the token
4386     !!!next-token;
4387     redo B;
4388 wakaba 1.108 } # INSCOPE
4389 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
4390 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4391 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4392 wakaba 1.52
4393     ## As if </caption>
4394     ## have a table element in table scope
4395     my $i;
4396     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4397     my $node = $self->{open_elements}->[$_];
4398 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4399 wakaba 1.79 !!!cp ('t184');
4400 wakaba 1.52 $i = $_;
4401     last INSCOPE;
4402 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4403 wakaba 1.79 !!!cp ('t185');
4404 wakaba 1.52 last INSCOPE;
4405     }
4406     } # INSCOPE
4407     unless (defined $i) {
4408 wakaba 1.79 !!!cp ('t186');
4409 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:caption', token => $token);
4410 wakaba 1.52 ## Ignore the token
4411     !!!next-token;
4412     redo B;
4413     }
4414    
4415     ## generate implied end tags
4416 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4417 wakaba 1.79 !!!cp ('t187');
4418 wakaba 1.86 pop @{$self->{open_elements}};
4419 wakaba 1.52 }
4420    
4421 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4422 wakaba 1.79 !!!cp ('t188');
4423 wakaba 1.122 !!!parse-error (type => 'not closed',
4424     value => $self->{open_elements}->[-1]->[0]
4425     ->manakai_local_name,
4426     token => $token);
4427 wakaba 1.79 } else {
4428     !!!cp ('t189');
4429 wakaba 1.52 }
4430    
4431     splice @{$self->{open_elements}}, $i;
4432    
4433     $clear_up_to_marker->();
4434    
4435 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4436 wakaba 1.52
4437     ## reprocess
4438     redo B;
4439     } elsif ({
4440     body => 1, col => 1, colgroup => 1, html => 1,
4441     }->{$token->{tag_name}}) {
4442 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
4443 wakaba 1.79 !!!cp ('t190');
4444 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4445 wakaba 1.52 ## Ignore the token
4446     !!!next-token;
4447     redo B;
4448     } else {
4449 wakaba 1.79 !!!cp ('t191');
4450 wakaba 1.52 #
4451     }
4452     } elsif ({
4453     tbody => 1, tfoot => 1,
4454     thead => 1, tr => 1,
4455     }->{$token->{tag_name}} and
4456 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4457 wakaba 1.79 !!!cp ('t192');
4458 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4459 wakaba 1.52 ## Ignore the token
4460     !!!next-token;
4461     redo B;
4462     } else {
4463 wakaba 1.79 !!!cp ('t193');
4464 wakaba 1.52 #
4465     }
4466 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4467     for my $entry (@{$self->{open_elements}}) {
4468 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
4469 wakaba 1.104 !!!cp ('t75');
4470 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4471 wakaba 1.104 last;
4472     }
4473     }
4474    
4475     ## Stop parsing.
4476     last B;
4477 wakaba 1.52 } else {
4478     die "$0: $token->{type}: Unknown token type";
4479     }
4480    
4481     $insert = $insert_to_current;
4482     #
4483 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
4484 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
4485 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
4486     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4487     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4488 wakaba 1.52
4489 wakaba 1.95 unless (length $token->{data}) {
4490     !!!cp ('t194');
4491     !!!next-token;
4492     redo B;
4493     } else {
4494     !!!cp ('t195');
4495     }
4496     }
4497 wakaba 1.52
4498 wakaba 1.113 !!!parse-error (type => 'in table:#character', token => $token);
4499 wakaba 1.52
4500     ## As if in body, but insert into foster parent element
4501     ## ISSUE: Spec says that "whenever a node would be inserted
4502     ## into the current node" while characters might not be
4503     ## result in a new Text node.
4504     $reconstruct_active_formatting_elements->($insert_to_foster);
4505    
4506 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
4507 wakaba 1.52 # MUST
4508     my $foster_parent_element;
4509     my $next_sibling;
4510     my $prev_sibling;
4511     OE: for (reverse 0..$#{$self->{open_elements}}) {
4512 wakaba 1.123 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
4513 wakaba 1.52 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4514     if (defined $parent and $parent->node_type == 1) {
4515 wakaba 1.79 !!!cp ('t196');
4516 wakaba 1.52 $foster_parent_element = $parent;
4517     $next_sibling = $self->{open_elements}->[$_]->[0];
4518     $prev_sibling = $next_sibling->previous_sibling;
4519     } else {
4520 wakaba 1.79 !!!cp ('t197');
4521 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4522     $prev_sibling = $foster_parent_element->last_child;
4523     }
4524     last OE;
4525     }
4526     } # OE
4527     $foster_parent_element = $self->{open_elements}->[0]->[0] and
4528     $prev_sibling = $foster_parent_element->last_child
4529     unless defined $foster_parent_element;
4530     if (defined $prev_sibling and
4531     $prev_sibling->node_type == 3) {
4532 wakaba 1.79 !!!cp ('t198');
4533 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
4534     } else {
4535 wakaba 1.79 !!!cp ('t199');
4536 wakaba 1.52 $foster_parent_element->insert_before
4537     ($self->{document}->create_text_node ($token->{data}),
4538     $next_sibling);
4539     }
4540 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
4541     } else {
4542     !!!cp ('t200');
4543     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4544     }
4545 wakaba 1.52
4546 wakaba 1.95 !!!next-token;
4547     redo B;
4548 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
4549 wakaba 1.52 if ({
4550 wakaba 1.54 tr => ($self->{insertion_mode} != IN_ROW_IM),
4551 wakaba 1.52 th => 1, td => 1,
4552     }->{$token->{tag_name}}) {
4553 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_IM) {
4554 wakaba 1.52 ## Clear back to table context
4555 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4556     & TABLE_SCOPING_EL)) {
4557 wakaba 1.79 !!!cp ('t201');
4558 wakaba 1.52 pop @{$self->{open_elements}};
4559 wakaba 1.43 }
4560    
4561 wakaba 1.116 !!!insert-element ('tbody',, $token);
4562 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4563 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4564     }
4565    
4566 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4567 wakaba 1.52 unless ($token->{tag_name} eq 'tr') {
4568 wakaba 1.79 !!!cp ('t202');
4569 wakaba 1.113 !!!parse-error (type => 'missing start tag:tr', token => $token);
4570 wakaba 1.52 }
4571 wakaba 1.43
4572 wakaba 1.52 ## Clear back to table body context
4573 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4574     & TABLE_ROWS_SCOPING_EL)) {
4575 wakaba 1.79 !!!cp ('t203');
4576 wakaba 1.83 ## ISSUE: Can this case be reached?
4577 wakaba 1.52 pop @{$self->{open_elements}};
4578     }
4579 wakaba 1.43
4580 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4581 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4582 wakaba 1.79 !!!cp ('t204');
4583 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4584 wakaba 1.52 !!!next-token;
4585     redo B;
4586     } else {
4587 wakaba 1.79 !!!cp ('t205');
4588 wakaba 1.116 !!!insert-element ('tr',, $token);
4589 wakaba 1.52 ## reprocess in the "in row" insertion mode
4590     }
4591 wakaba 1.79 } else {
4592     !!!cp ('t206');
4593 wakaba 1.52 }
4594    
4595     ## Clear back to table row context
4596 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4597     & TABLE_ROW_SCOPING_EL)) {
4598 wakaba 1.79 !!!cp ('t207');
4599 wakaba 1.52 pop @{$self->{open_elements}};
4600 wakaba 1.43 }
4601 wakaba 1.52
4602 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4603 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
4604 wakaba 1.52
4605     push @$active_formatting_elements, ['#marker', ''];
4606    
4607     !!!next-token;
4608     redo B;
4609     } elsif ({
4610     caption => 1, col => 1, colgroup => 1,
4611     tbody => 1, tfoot => 1, thead => 1,
4612 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
4613 wakaba 1.52 }->{$token->{tag_name}}) {
4614 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4615 wakaba 1.52 ## As if </tr>
4616 wakaba 1.43 ## have an element in table scope
4617     my $i;
4618     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4619     my $node = $self->{open_elements}->[$_];
4620 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
4621 wakaba 1.79 !!!cp ('t208');
4622 wakaba 1.43 $i = $_;
4623     last INSCOPE;
4624 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4625 wakaba 1.79 !!!cp ('t209');
4626 wakaba 1.43 last INSCOPE;
4627     }
4628     } # INSCOPE
4629 wakaba 1.79 unless (defined $i) {
4630     !!!cp ('t210');
4631 wakaba 1.83 ## TODO: This type is wrong.
4632 wakaba 1.113 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name}, token => $token);
4633 wakaba 1.52 ## Ignore the token
4634     !!!next-token;
4635 wakaba 1.43 redo B;
4636     }
4637    
4638 wakaba 1.52 ## Clear back to table row context
4639 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4640     & TABLE_ROW_SCOPING_EL)) {
4641 wakaba 1.79 !!!cp ('t211');
4642 wakaba 1.83 ## ISSUE: Can this case be reached?
4643 wakaba 1.52 pop @{$self->{open_elements}};
4644 wakaba 1.1 }
4645 wakaba 1.43
4646 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4647 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4648 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4649 wakaba 1.79 !!!cp ('t212');
4650 wakaba 1.52 ## reprocess
4651     redo B;
4652     } else {
4653 wakaba 1.79 !!!cp ('t213');
4654 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4655     }
4656 wakaba 1.1 }
4657 wakaba 1.52
4658 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4659 wakaba 1.52 ## have an element in table scope
4660 wakaba 1.43 my $i;
4661     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4662     my $node = $self->{open_elements}->[$_];
4663 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
4664 wakaba 1.79 !!!cp ('t214');
4665 wakaba 1.43 $i = $_;
4666     last INSCOPE;
4667 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4668 wakaba 1.79 !!!cp ('t215');
4669 wakaba 1.43 last INSCOPE;
4670     }
4671     } # INSCOPE
4672 wakaba 1.52 unless (defined $i) {
4673 wakaba 1.79 !!!cp ('t216');
4674 wakaba 1.82 ## TODO: This erorr type ios wrong.
4675 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4676 wakaba 1.52 ## Ignore the token
4677     !!!next-token;
4678 wakaba 1.43 redo B;
4679     }
4680 wakaba 1.52
4681     ## Clear back to table body context
4682 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4683     & TABLE_ROWS_SCOPING_EL)) {
4684 wakaba 1.79 !!!cp ('t217');
4685 wakaba 1.83 ## ISSUE: Can this state be reached?
4686 wakaba 1.52 pop @{$self->{open_elements}};
4687 wakaba 1.43 }
4688    
4689 wakaba 1.52 ## As if <{current node}>
4690     ## have an element in table scope
4691     ## true by definition
4692 wakaba 1.43
4693 wakaba 1.52 ## Clear back to table body context
4694     ## nop by definition
4695 wakaba 1.43
4696 wakaba 1.52 pop @{$self->{open_elements}};
4697 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4698 wakaba 1.52 ## reprocess in "in table" insertion mode...
4699 wakaba 1.79 } else {
4700     !!!cp ('t218');
4701 wakaba 1.52 }
4702    
4703     if ($token->{tag_name} eq 'col') {
4704     ## Clear back to table context
4705 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4706     & TABLE_SCOPING_EL)) {
4707 wakaba 1.79 !!!cp ('t219');
4708 wakaba 1.83 ## ISSUE: Can this state be reached?
4709 wakaba 1.52 pop @{$self->{open_elements}};
4710     }
4711 wakaba 1.43
4712 wakaba 1.116 !!!insert-element ('colgroup',, $token);
4713 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
4714 wakaba 1.52 ## reprocess
4715 wakaba 1.43 redo B;
4716 wakaba 1.52 } elsif ({
4717     caption => 1,
4718     colgroup => 1,
4719     tbody => 1, tfoot => 1, thead => 1,
4720     }->{$token->{tag_name}}) {
4721     ## Clear back to table context
4722 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4723     & TABLE_SCOPING_EL)) {
4724 wakaba 1.79 !!!cp ('t220');
4725 wakaba 1.83 ## ISSUE: Can this state be reached?
4726 wakaba 1.52 pop @{$self->{open_elements}};
4727 wakaba 1.1 }
4728 wakaba 1.52
4729     push @$active_formatting_elements, ['#marker', '']
4730     if $token->{tag_name} eq 'caption';
4731    
4732 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4733 wakaba 1.52 $self->{insertion_mode} = {
4734 wakaba 1.54 caption => IN_CAPTION_IM,
4735     colgroup => IN_COLUMN_GROUP_IM,
4736     tbody => IN_TABLE_BODY_IM,
4737     tfoot => IN_TABLE_BODY_IM,
4738     thead => IN_TABLE_BODY_IM,
4739 wakaba 1.52 }->{$token->{tag_name}};
4740 wakaba 1.1 !!!next-token;
4741     redo B;
4742 wakaba 1.52 } else {
4743     die "$0: in table: <>: $token->{tag_name}";
4744 wakaba 1.1 }
4745 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
4746 wakaba 1.122 !!!parse-error (type => 'not closed',
4747     value => $self->{open_elements}->[-1]->[0]
4748     ->manakai_local_name,
4749     token => $token);
4750 wakaba 1.1
4751 wakaba 1.52 ## As if </table>
4752 wakaba 1.1 ## have a table element in table scope
4753     my $i;
4754 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4755     my $node = $self->{open_elements}->[$_];
4756 wakaba 1.123 if ($node->[1] & TABLE_EL) {
4757 wakaba 1.79 !!!cp ('t221');
4758 wakaba 1.1 $i = $_;
4759     last INSCOPE;
4760 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4761 wakaba 1.79 !!!cp ('t222');
4762 wakaba 1.1 last INSCOPE;
4763     }
4764     } # INSCOPE
4765     unless (defined $i) {
4766 wakaba 1.79 !!!cp ('t223');
4767 wakaba 1.83 ## TODO: The following is wrong, maybe.
4768 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:table', token => $token);
4769 wakaba 1.52 ## Ignore tokens </table><table>
4770 wakaba 1.1 !!!next-token;
4771     redo B;
4772     }
4773    
4774 wakaba 1.106 ## TODO: Followings are removed from the latest spec.
4775 wakaba 1.1 ## generate implied end tags
4776 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4777 wakaba 1.79 !!!cp ('t224');
4778 wakaba 1.86 pop @{$self->{open_elements}};
4779 wakaba 1.1 }
4780    
4781 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & TABLE_EL) {
4782 wakaba 1.79 !!!cp ('t225');
4783 wakaba 1.122 ## NOTE: |<table><tr><table>|
4784     !!!parse-error (type => 'not closed',
4785     value => $self->{open_elements}->[-1]->[0]
4786     ->manakai_local_name,
4787     token => $token);
4788 wakaba 1.79 } else {
4789     !!!cp ('t226');
4790 wakaba 1.1 }
4791    
4792 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4793 wakaba 1.95 pop @{$open_tables};
4794 wakaba 1.1
4795 wakaba 1.52 $self->_reset_insertion_mode;
4796 wakaba 1.1
4797     ## reprocess
4798     redo B;
4799 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
4800     if (not $open_tables->[-1]->[1]) { # tainted
4801     !!!cp ('t227.8');
4802     ## NOTE: This is a "as if in head" code clone.
4803     $parse_rcdata->(CDATA_CONTENT_MODEL);
4804     redo B;
4805     } else {
4806     !!!cp ('t227.7');
4807     #
4808     }
4809     } elsif ($token->{tag_name} eq 'script') {
4810     if (not $open_tables->[-1]->[1]) { # tainted
4811     !!!cp ('t227.6');
4812     ## NOTE: This is a "as if in head" code clone.
4813     $script_start_tag->();
4814     redo B;
4815     } else {
4816     !!!cp ('t227.5');
4817     #
4818     }
4819 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
4820     if (not $open_tables->[-1]->[1]) { # tainted
4821     if ($token->{attributes}->{type}) { ## TODO: case
4822     my $type = lc $token->{attributes}->{type}->{value};
4823     if ($type eq 'hidden') {
4824     !!!cp ('t227.3');
4825 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
4826 wakaba 1.98
4827 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4828 wakaba 1.98
4829     ## TODO: form element pointer
4830    
4831     pop @{$self->{open_elements}};
4832    
4833     !!!next-token;
4834     redo B;
4835     } else {
4836     !!!cp ('t227.2');
4837     #
4838     }
4839     } else {
4840     !!!cp ('t227.1');
4841     #
4842     }
4843     } else {
4844     !!!cp ('t227.4');
4845     #
4846     }
4847 wakaba 1.58 } else {
4848 wakaba 1.79 !!!cp ('t227');
4849 wakaba 1.58 #
4850     }
4851 wakaba 1.98
4852 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
4853 wakaba 1.98
4854     $insert = $insert_to_foster;
4855     #
4856 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
4857 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
4858 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
4859 wakaba 1.52 ## have an element in table scope
4860     my $i;
4861     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4862     my $node = $self->{open_elements}->[$_];
4863 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
4864 wakaba 1.79 !!!cp ('t228');
4865 wakaba 1.52 $i = $_;
4866     last INSCOPE;
4867 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4868 wakaba 1.79 !!!cp ('t229');
4869 wakaba 1.52 last INSCOPE;
4870     }
4871     } # INSCOPE
4872     unless (defined $i) {
4873 wakaba 1.79 !!!cp ('t230');
4874 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4875 wakaba 1.52 ## Ignore the token
4876 wakaba 1.42 !!!next-token;
4877     redo B;
4878 wakaba 1.79 } else {
4879     !!!cp ('t232');
4880 wakaba 1.42 }
4881    
4882 wakaba 1.52 ## Clear back to table row context
4883 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4884     & TABLE_ROW_SCOPING_EL)) {
4885 wakaba 1.79 !!!cp ('t231');
4886 wakaba 1.83 ## ISSUE: Can this state be reached?
4887 wakaba 1.52 pop @{$self->{open_elements}};
4888     }
4889 wakaba 1.42
4890 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4891 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4892 wakaba 1.52 !!!next-token;
4893     redo B;
4894     } elsif ($token->{tag_name} eq 'table') {
4895 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4896 wakaba 1.52 ## As if </tr>
4897     ## have an element in table scope
4898     my $i;
4899     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4900     my $node = $self->{open_elements}->[$_];
4901 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
4902 wakaba 1.79 !!!cp ('t233');
4903 wakaba 1.52 $i = $_;
4904     last INSCOPE;
4905 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4906 wakaba 1.79 !!!cp ('t234');
4907 wakaba 1.52 last INSCOPE;
4908 wakaba 1.42 }
4909 wakaba 1.52 } # INSCOPE
4910     unless (defined $i) {
4911 wakaba 1.79 !!!cp ('t235');
4912 wakaba 1.83 ## TODO: The following is wrong.
4913 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{type}, token => $token);
4914 wakaba 1.52 ## Ignore the token
4915     !!!next-token;
4916     redo B;
4917 wakaba 1.42 }
4918 wakaba 1.52
4919     ## Clear back to table row context
4920 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4921     & TABLE_ROW_SCOPING_EL)) {
4922 wakaba 1.79 !!!cp ('t236');
4923 wakaba 1.83 ## ISSUE: Can this state be reached?
4924 wakaba 1.46 pop @{$self->{open_elements}};
4925 wakaba 1.1 }
4926 wakaba 1.46
4927 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4928 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4929 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
4930 wakaba 1.1 }
4931    
4932 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4933 wakaba 1.52 ## have an element in table scope
4934     my $i;
4935     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4936     my $node = $self->{open_elements}->[$_];
4937 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
4938 wakaba 1.79 !!!cp ('t237');
4939 wakaba 1.52 $i = $_;
4940     last INSCOPE;
4941 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4942 wakaba 1.79 !!!cp ('t238');
4943 wakaba 1.52 last INSCOPE;
4944     }
4945     } # INSCOPE
4946     unless (defined $i) {
4947 wakaba 1.79 !!!cp ('t239');
4948 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4949 wakaba 1.52 ## Ignore the token
4950     !!!next-token;
4951     redo B;
4952 wakaba 1.47 }
4953    
4954     ## Clear back to table body context
4955 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4956     & TABLE_ROWS_SCOPING_EL)) {
4957 wakaba 1.79 !!!cp ('t240');
4958 wakaba 1.47 pop @{$self->{open_elements}};
4959     }
4960    
4961 wakaba 1.52 ## As if <{current node}>
4962     ## have an element in table scope
4963     ## true by definition
4964    
4965     ## Clear back to table body context
4966     ## nop by definition
4967    
4968     pop @{$self->{open_elements}};
4969 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4970 wakaba 1.52 ## reprocess in the "in table" insertion mode...
4971     }
4972    
4973 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
4974     ## When you edit the code fragment below, please ensure that
4975     ## the code for <table> in the "in table" insertion mode
4976     ## is synced with it.
4977    
4978 wakaba 1.52 ## have a table element in table scope
4979     my $i;
4980     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4981     my $node = $self->{open_elements}->[$_];
4982 wakaba 1.123 if ($node->[1] & TABLE_EL) {
4983 wakaba 1.79 !!!cp ('t241');
4984 wakaba 1.52 $i = $_;
4985     last INSCOPE;
4986 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4987 wakaba 1.79 !!!cp ('t242');
4988 wakaba 1.52 last INSCOPE;
4989 wakaba 1.47 }
4990 wakaba 1.52 } # INSCOPE
4991     unless (defined $i) {
4992 wakaba 1.79 !!!cp ('t243');
4993 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4994 wakaba 1.52 ## Ignore the token
4995     !!!next-token;
4996     redo B;
4997 wakaba 1.3 }
4998 wakaba 1.52
4999     splice @{$self->{open_elements}}, $i;
5000 wakaba 1.95 pop @{$open_tables};
5001 wakaba 1.1
5002 wakaba 1.52 $self->_reset_insertion_mode;
5003 wakaba 1.47
5004     !!!next-token;
5005     redo B;
5006     } elsif ({
5007 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
5008 wakaba 1.52 }->{$token->{tag_name}} and
5009 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
5010 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5011 wakaba 1.52 ## have an element in table scope
5012     my $i;
5013     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5014     my $node = $self->{open_elements}->[$_];
5015 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5016 wakaba 1.79 !!!cp ('t247');
5017 wakaba 1.52 $i = $_;
5018     last INSCOPE;
5019 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5020 wakaba 1.79 !!!cp ('t248');
5021 wakaba 1.52 last INSCOPE;
5022     }
5023     } # INSCOPE
5024     unless (defined $i) {
5025 wakaba 1.79 !!!cp ('t249');
5026 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5027 wakaba 1.52 ## Ignore the token
5028     !!!next-token;
5029     redo B;
5030     }
5031    
5032 wakaba 1.48 ## As if </tr>
5033     ## have an element in table scope
5034     my $i;
5035     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5036     my $node = $self->{open_elements}->[$_];
5037 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5038 wakaba 1.79 !!!cp ('t250');
5039 wakaba 1.48 $i = $_;
5040     last INSCOPE;
5041 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5042 wakaba 1.79 !!!cp ('t251');
5043 wakaba 1.48 last INSCOPE;
5044     }
5045     } # INSCOPE
5046 wakaba 1.52 unless (defined $i) {
5047 wakaba 1.79 !!!cp ('t252');
5048 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:tr', token => $token);
5049 wakaba 1.52 ## Ignore the token
5050     !!!next-token;
5051     redo B;
5052     }
5053 wakaba 1.48
5054     ## Clear back to table row context
5055 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5056     & TABLE_ROW_SCOPING_EL)) {
5057 wakaba 1.79 !!!cp ('t253');
5058 wakaba 1.83 ## ISSUE: Can this case be reached?
5059 wakaba 1.48 pop @{$self->{open_elements}};
5060     }
5061    
5062     pop @{$self->{open_elements}}; # tr
5063 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5064 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5065     }
5066    
5067     ## have an element in table scope
5068     my $i;
5069     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5070     my $node = $self->{open_elements}->[$_];
5071 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5072 wakaba 1.79 !!!cp ('t254');
5073 wakaba 1.52 $i = $_;
5074     last INSCOPE;
5075 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5076 wakaba 1.79 !!!cp ('t255');
5077 wakaba 1.52 last INSCOPE;
5078     }
5079     } # INSCOPE
5080     unless (defined $i) {
5081 wakaba 1.79 !!!cp ('t256');
5082 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5083 wakaba 1.52 ## Ignore the token
5084     !!!next-token;
5085     redo B;
5086     }
5087    
5088     ## Clear back to table body context
5089 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5090     & TABLE_ROWS_SCOPING_EL)) {
5091 wakaba 1.79 !!!cp ('t257');
5092 wakaba 1.83 ## ISSUE: Can this case be reached?
5093 wakaba 1.52 pop @{$self->{open_elements}};
5094     }
5095    
5096     pop @{$self->{open_elements}};
5097 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5098 wakaba 1.52 !!!next-token;
5099     redo B;
5100     } elsif ({
5101     body => 1, caption => 1, col => 1, colgroup => 1,
5102     html => 1, td => 1, th => 1,
5103 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5104     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
5105 wakaba 1.52 }->{$token->{tag_name}}) {
5106 wakaba 1.79 !!!cp ('t258');
5107 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5108 wakaba 1.52 ## Ignore the token
5109     !!!next-token;
5110     redo B;
5111 wakaba 1.58 } else {
5112 wakaba 1.79 !!!cp ('t259');
5113 wakaba 1.113 !!!parse-error (type => 'in table:/'.$token->{tag_name}, token => $token);
5114 wakaba 1.52
5115 wakaba 1.58 $insert = $insert_to_foster;
5116     #
5117     }
5118 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5119 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5120 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5121 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5122 wakaba 1.104 !!!cp ('t259.1');
5123 wakaba 1.105 #
5124 wakaba 1.104 } else {
5125     !!!cp ('t259.2');
5126 wakaba 1.105 #
5127 wakaba 1.104 }
5128    
5129     ## Stop parsing
5130     last B;
5131 wakaba 1.58 } else {
5132     die "$0: $token->{type}: Unknown token type";
5133     }
5134 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
5135 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5136 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5137     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5138     unless (length $token->{data}) {
5139 wakaba 1.79 !!!cp ('t260');
5140 wakaba 1.52 !!!next-token;
5141     redo B;
5142     }
5143     }
5144    
5145 wakaba 1.79 !!!cp ('t261');
5146 wakaba 1.52 #
5147 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5148 wakaba 1.52 if ($token->{tag_name} eq 'col') {
5149 wakaba 1.79 !!!cp ('t262');
5150 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5151 wakaba 1.52 pop @{$self->{open_elements}};
5152     !!!next-token;
5153     redo B;
5154     } else {
5155 wakaba 1.79 !!!cp ('t263');
5156 wakaba 1.52 #
5157     }
5158 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5159 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
5160 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5161 wakaba 1.79 !!!cp ('t264');
5162 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5163 wakaba 1.52 ## Ignore the token
5164     !!!next-token;
5165     redo B;
5166     } else {
5167 wakaba 1.79 !!!cp ('t265');
5168 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5169 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5170 wakaba 1.52 !!!next-token;
5171     redo B;
5172     }
5173     } elsif ($token->{tag_name} eq 'col') {
5174 wakaba 1.79 !!!cp ('t266');
5175 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:col', token => $token);
5176 wakaba 1.52 ## Ignore the token
5177     !!!next-token;
5178     redo B;
5179     } else {
5180 wakaba 1.79 !!!cp ('t267');
5181 wakaba 1.52 #
5182     }
5183 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5184 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
5185 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5186     !!!cp ('t270.2');
5187     ## Stop parsing.
5188     last B;
5189     } else {
5190     ## NOTE: As if </colgroup>.
5191     !!!cp ('t270.1');
5192     pop @{$self->{open_elements}}; # colgroup
5193     $self->{insertion_mode} = IN_TABLE_IM;
5194     ## Reprocess.
5195     redo B;
5196     }
5197     } else {
5198     die "$0: $token->{type}: Unknown token type";
5199     }
5200 wakaba 1.52
5201     ## As if </colgroup>
5202 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5203 wakaba 1.79 !!!cp ('t269');
5204 wakaba 1.104 ## TODO: Wrong error type?
5205 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5206 wakaba 1.52 ## Ignore the token
5207     !!!next-token;
5208     redo B;
5209     } else {
5210 wakaba 1.79 !!!cp ('t270');
5211 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5212 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5213 wakaba 1.52 ## reprocess
5214     redo B;
5215     }
5216 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
5217 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5218 wakaba 1.79 !!!cp ('t271');
5219 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5220     !!!next-token;
5221     redo B;
5222     } elsif ($token->{type} == START_TAG_TOKEN) {
5223 wakaba 1.123 if ($token->{tag_name} eq 'option') {
5224     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5225     !!!cp ('t272');
5226     ## As if </option>
5227     pop @{$self->{open_elements}};
5228     } else {
5229     !!!cp ('t273');
5230     }
5231 wakaba 1.52
5232 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5233     !!!next-token;
5234     redo B;
5235     } elsif ($token->{tag_name} eq 'optgroup') {
5236     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5237     !!!cp ('t274');
5238     ## As if </option>
5239     pop @{$self->{open_elements}};
5240     } else {
5241     !!!cp ('t275');
5242     }
5243 wakaba 1.52
5244 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5245     !!!cp ('t276');
5246     ## As if </optgroup>
5247     pop @{$self->{open_elements}};
5248     } else {
5249     !!!cp ('t277');
5250     }
5251 wakaba 1.52
5252 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5253     !!!next-token;
5254     redo B;
5255 wakaba 1.101 } elsif ($token->{tag_name} eq 'select' or
5256     $token->{tag_name} eq 'input' or
5257     ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5258     {
5259     caption => 1, table => 1,
5260     tbody => 1, tfoot => 1, thead => 1,
5261     tr => 1, td => 1, th => 1,
5262     }->{$token->{tag_name}})) {
5263     ## TODO: The type below is not good - <select> is replaced by </select>
5264 wakaba 1.113 !!!parse-error (type => 'not closed:select', token => $token);
5265 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
5266     ## as if there were </select> (otherwise).
5267 wakaba 1.123 ## have an element in table scope
5268     my $i;
5269     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5270     my $node = $self->{open_elements}->[$_];
5271     if ($node->[1] & SELECT_EL) {
5272     !!!cp ('t278');
5273     $i = $_;
5274     last INSCOPE;
5275     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5276     !!!cp ('t279');
5277     last INSCOPE;
5278     }
5279     } # INSCOPE
5280     unless (defined $i) {
5281     !!!cp ('t280');
5282     !!!parse-error (type => 'unmatched end tag:select', token => $token);
5283     ## Ignore the token
5284     !!!next-token;
5285     redo B;
5286     }
5287 wakaba 1.52
5288 wakaba 1.123 !!!cp ('t281');
5289     splice @{$self->{open_elements}}, $i;
5290 wakaba 1.52
5291 wakaba 1.123 $self->_reset_insertion_mode;
5292 wakaba 1.47
5293 wakaba 1.101 if ($token->{tag_name} eq 'select') {
5294     !!!cp ('t281.2');
5295     !!!next-token;
5296     redo B;
5297     } else {
5298     !!!cp ('t281.1');
5299     ## Reprocess the token.
5300     redo B;
5301     }
5302 wakaba 1.58 } else {
5303 wakaba 1.79 !!!cp ('t282');
5304 wakaba 1.113 !!!parse-error (type => 'in select:'.$token->{tag_name}, token => $token);
5305 wakaba 1.58 ## Ignore the token
5306     !!!next-token;
5307     redo B;
5308     }
5309     } elsif ($token->{type} == END_TAG_TOKEN) {
5310 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
5311     if ($self->{open_elements}->[-1]->[1] & OPTION_EL and
5312     $self->{open_elements}->[-2]->[1] & OPTGROUP_EL) {
5313     !!!cp ('t283');
5314     ## As if </option>
5315     splice @{$self->{open_elements}}, -2;
5316     } elsif ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5317     !!!cp ('t284');
5318     pop @{$self->{open_elements}};
5319     } else {
5320     !!!cp ('t285');
5321     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5322     ## Ignore the token
5323     }
5324     !!!next-token;
5325     redo B;
5326     } elsif ($token->{tag_name} eq 'option') {
5327     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5328     !!!cp ('t286');
5329     pop @{$self->{open_elements}};
5330     } else {
5331     !!!cp ('t287');
5332     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5333     ## Ignore the token
5334     }
5335     !!!next-token;
5336     redo B;
5337     } elsif ($token->{tag_name} eq 'select') {
5338     ## have an element in table scope
5339     my $i;
5340     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5341     my $node = $self->{open_elements}->[$_];
5342     if ($node->[1] & SELECT_EL) {
5343     !!!cp ('t288');
5344     $i = $_;
5345     last INSCOPE;
5346     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5347     !!!cp ('t289');
5348     last INSCOPE;
5349     }
5350     } # INSCOPE
5351     unless (defined $i) {
5352     !!!cp ('t290');
5353     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5354     ## Ignore the token
5355     !!!next-token;
5356     redo B;
5357     }
5358 wakaba 1.52
5359 wakaba 1.123 !!!cp ('t291');
5360     splice @{$self->{open_elements}}, $i;
5361 wakaba 1.52
5362 wakaba 1.123 $self->_reset_insertion_mode;
5363 wakaba 1.52
5364 wakaba 1.123 !!!next-token;
5365     redo B;
5366 wakaba 1.101 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5367     {
5368     caption => 1, table => 1, tbody => 1,
5369     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
5370     }->{$token->{tag_name}}) {
5371 wakaba 1.83 ## TODO: The following is wrong?
5372 wakaba 1.123 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5373 wakaba 1.52
5374 wakaba 1.123 ## have an element in table scope
5375     my $i;
5376     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5377     my $node = $self->{open_elements}->[$_];
5378     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5379     !!!cp ('t292');
5380     $i = $_;
5381     last INSCOPE;
5382     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5383     !!!cp ('t293');
5384     last INSCOPE;
5385     }
5386     } # INSCOPE
5387     unless (defined $i) {
5388     !!!cp ('t294');
5389     ## Ignore the token
5390     !!!next-token;
5391     redo B;
5392     }
5393 wakaba 1.52
5394 wakaba 1.123 ## As if </select>
5395     ## have an element in table scope
5396     undef $i;
5397     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5398     my $node = $self->{open_elements}->[$_];
5399     if ($node->[1] & SELECT_EL) {
5400     !!!cp ('t295');
5401     $i = $_;
5402     last INSCOPE;
5403     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5404 wakaba 1.83 ## ISSUE: Can this state be reached?
5405 wakaba 1.123 !!!cp ('t296');
5406     last INSCOPE;
5407     }
5408     } # INSCOPE
5409     unless (defined $i) {
5410     !!!cp ('t297');
5411 wakaba 1.83 ## TODO: The following error type is correct?
5412 wakaba 1.123 !!!parse-error (type => 'unmatched end tag:select', token => $token);
5413     ## Ignore the </select> token
5414     !!!next-token; ## TODO: ok?
5415     redo B;
5416     }
5417 wakaba 1.52
5418 wakaba 1.123 !!!cp ('t298');
5419     splice @{$self->{open_elements}}, $i;
5420 wakaba 1.52
5421 wakaba 1.123 $self->_reset_insertion_mode;
5422 wakaba 1.52
5423 wakaba 1.123 ## reprocess
5424     redo B;
5425 wakaba 1.58 } else {
5426 wakaba 1.79 !!!cp ('t299');
5427 wakaba 1.113 !!!parse-error (type => 'in select:/'.$token->{tag_name}, token => $token);
5428 wakaba 1.52 ## Ignore the token
5429     !!!next-token;
5430     redo B;
5431 wakaba 1.58 }
5432 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5433 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5434 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5435     !!!cp ('t299.1');
5436 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5437 wakaba 1.104 } else {
5438     !!!cp ('t299.2');
5439     }
5440    
5441     ## Stop parsing.
5442     last B;
5443 wakaba 1.58 } else {
5444     die "$0: $token->{type}: Unknown token type";
5445     }
5446 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
5447 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5448 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5449     my $data = $1;
5450     ## As if in body
5451     $reconstruct_active_formatting_elements->($insert_to_current);
5452    
5453     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5454    
5455     unless (length $token->{data}) {
5456 wakaba 1.79 !!!cp ('t300');
5457 wakaba 1.52 !!!next-token;
5458     redo B;
5459     }
5460     }
5461    
5462 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5463 wakaba 1.79 !!!cp ('t301');
5464 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
5465 wakaba 1.52
5466 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5467 wakaba 1.79 } else {
5468     !!!cp ('t302');
5469 wakaba 1.52 }
5470    
5471     ## "after body" insertion mode
5472 wakaba 1.113 !!!parse-error (type => 'after body:#character', token => $token);
5473 wakaba 1.52
5474 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5475 wakaba 1.52 ## reprocess
5476     redo B;
5477 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5478 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5479 wakaba 1.79 !!!cp ('t303');
5480 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
5481 wakaba 1.52
5482 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5483 wakaba 1.79 } else {
5484     !!!cp ('t304');
5485 wakaba 1.52 }
5486    
5487     ## "after body" insertion mode
5488 wakaba 1.113 !!!parse-error (type => 'after body:'.$token->{tag_name}, token => $token);
5489 wakaba 1.52
5490 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5491 wakaba 1.52 ## reprocess
5492     redo B;
5493 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5494 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5495 wakaba 1.79 !!!cp ('t305');
5496 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
5497 wakaba 1.52
5498 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5499 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5500 wakaba 1.79 } else {
5501     !!!cp ('t306');
5502 wakaba 1.52 }
5503    
5504     ## "after body" insertion mode
5505     if ($token->{tag_name} eq 'html') {
5506     if (defined $self->{inner_html_node}) {
5507 wakaba 1.79 !!!cp ('t307');
5508 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:html', token => $token);
5509 wakaba 1.52 ## Ignore the token
5510     !!!next-token;
5511     redo B;
5512     } else {
5513 wakaba 1.79 !!!cp ('t308');
5514 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
5515 wakaba 1.52 !!!next-token;
5516     redo B;
5517     }
5518     } else {
5519 wakaba 1.79 !!!cp ('t309');
5520 wakaba 1.113 !!!parse-error (type => 'after body:/'.$token->{tag_name}, token => $token);
5521 wakaba 1.52
5522 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5523 wakaba 1.52 ## reprocess
5524     redo B;
5525     }
5526 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5527     !!!cp ('t309.2');
5528     ## Stop parsing
5529     last B;
5530 wakaba 1.52 } else {
5531     die "$0: $token->{type}: Unknown token type";
5532     }
5533 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
5534 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5535 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5536     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5537    
5538     unless (length $token->{data}) {
5539 wakaba 1.79 !!!cp ('t310');
5540 wakaba 1.52 !!!next-token;
5541     redo B;
5542     }
5543     }
5544    
5545     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
5546 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5547 wakaba 1.79 !!!cp ('t311');
5548 wakaba 1.113 !!!parse-error (type => 'in frameset:#character', token => $token);
5549 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
5550 wakaba 1.79 !!!cp ('t312');
5551 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
5552 wakaba 1.52 } else { # "after html frameset"
5553 wakaba 1.79 !!!cp ('t313');
5554 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
5555 wakaba 1.52
5556 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5557 wakaba 1.84 ## Reprocess in the "after frameset" insertion mode.
5558 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
5559 wakaba 1.52 }
5560    
5561     ## Ignore the token.
5562     if (length $token->{data}) {
5563 wakaba 1.79 !!!cp ('t314');
5564 wakaba 1.52 ## reprocess the rest of characters
5565     } else {
5566 wakaba 1.79 !!!cp ('t315');
5567 wakaba 1.52 !!!next-token;
5568     }
5569     redo B;
5570     }
5571    
5572     die qq[$0: Character "$token->{data}"];
5573 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5574 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5575 wakaba 1.79 !!!cp ('t316');
5576 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
5577 wakaba 1.1
5578 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5579 wakaba 1.84 ## Process in the "after frameset" insertion mode.
5580 wakaba 1.79 } else {
5581     !!!cp ('t317');
5582     }
5583 wakaba 1.1
5584 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
5585 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5586 wakaba 1.79 !!!cp ('t318');
5587 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5588 wakaba 1.52 !!!next-token;
5589     redo B;
5590     } elsif ($token->{tag_name} eq 'frame' and
5591 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5592 wakaba 1.79 !!!cp ('t319');
5593 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5594 wakaba 1.52 pop @{$self->{open_elements}};
5595     !!!next-token;
5596     redo B;
5597     } elsif ($token->{tag_name} eq 'noframes') {
5598 wakaba 1.79 !!!cp ('t320');
5599 wakaba 1.52 ## NOTE: As if in body.
5600 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5601 wakaba 1.52 redo B;
5602     } else {
5603 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5604 wakaba 1.79 !!!cp ('t321');
5605 wakaba 1.113 !!!parse-error (type => 'in frameset:'.$token->{tag_name}, token => $token);
5606 wakaba 1.52 } else {
5607 wakaba 1.79 !!!cp ('t322');
5608 wakaba 1.113 !!!parse-error (type => 'after frameset:'.$token->{tag_name}, token => $token);
5609 wakaba 1.52 }
5610     ## Ignore the token
5611     !!!next-token;
5612     redo B;
5613     }
5614 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5615 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5616 wakaba 1.79 !!!cp ('t323');
5617 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
5618 wakaba 1.1
5619 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5620 wakaba 1.84 ## Process in the "after frameset" insertion mode.
5621 wakaba 1.79 } else {
5622     !!!cp ('t324');
5623 wakaba 1.52 }
5624 wakaba 1.1
5625 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
5626 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5627 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
5628 wakaba 1.52 @{$self->{open_elements}} == 1) {
5629 wakaba 1.79 !!!cp ('t325');
5630 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5631 wakaba 1.52 ## Ignore the token
5632     !!!next-token;
5633     } else {
5634 wakaba 1.79 !!!cp ('t326');
5635 wakaba 1.52 pop @{$self->{open_elements}};
5636     !!!next-token;
5637     }
5638 wakaba 1.47
5639 wakaba 1.52 if (not defined $self->{inner_html_node} and
5640 wakaba 1.123 not ($self->{open_elements}->[-1]->[1] & FRAMESET_EL)) {
5641 wakaba 1.79 !!!cp ('t327');
5642 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5643 wakaba 1.79 } else {
5644     !!!cp ('t328');
5645 wakaba 1.52 }
5646     redo B;
5647     } elsif ($token->{tag_name} eq 'html' and
5648 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
5649 wakaba 1.79 !!!cp ('t329');
5650 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
5651 wakaba 1.52 !!!next-token;
5652     redo B;
5653     } else {
5654 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5655 wakaba 1.79 !!!cp ('t330');
5656 wakaba 1.113 !!!parse-error (type => 'in frameset:/'.$token->{tag_name}, token => $token);
5657 wakaba 1.52 } else {
5658 wakaba 1.79 !!!cp ('t331');
5659 wakaba 1.113 !!!parse-error (type => 'after frameset:/'.$token->{tag_name}, token => $token);
5660 wakaba 1.52 }
5661     ## Ignore the token
5662     !!!next-token;
5663     redo B;
5664     }
5665 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5666 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5667 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5668     !!!cp ('t331.1');
5669 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5670 wakaba 1.104 } else {
5671     !!!cp ('t331.2');
5672     }
5673    
5674     ## Stop parsing
5675     last B;
5676 wakaba 1.52 } else {
5677     die "$0: $token->{type}: Unknown token type";
5678     }
5679 wakaba 1.47
5680 wakaba 1.52 ## ISSUE: An issue in spec here
5681     } else {
5682     die "$0: $self->{insertion_mode}: Unknown insertion mode";
5683     }
5684 wakaba 1.47
5685 wakaba 1.52 ## "in body" insertion mode
5686 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
5687 wakaba 1.52 if ($token->{tag_name} eq 'script') {
5688 wakaba 1.79 !!!cp ('t332');
5689 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5690 wakaba 1.100 $script_start_tag->();
5691 wakaba 1.53 redo B;
5692 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
5693 wakaba 1.79 !!!cp ('t333');
5694 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5695 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5696 wakaba 1.53 redo B;
5697 wakaba 1.52 } elsif ({
5698     base => 1, link => 1,
5699     }->{$token->{tag_name}}) {
5700 wakaba 1.79 !!!cp ('t334');
5701 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
5702 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5703 wakaba 1.52 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5704     !!!next-token;
5705 wakaba 1.53 redo B;
5706 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
5707     ## NOTE: This is an "as if in head" code clone, only "-t" differs
5708 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5709 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5710 wakaba 1.46
5711 wakaba 1.52 unless ($self->{confident}) {
5712     if ($token->{attributes}->{charset}) { ## TODO: And if supported
5713 wakaba 1.79 !!!cp ('t335');
5714 wakaba 1.63 $self->{change_encoding}
5715 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
5716 wakaba 1.66
5717     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
5718     ->set_user_data (manakai_has_reference =>
5719     $token->{attributes}->{charset}
5720     ->{has_reference});
5721 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
5722 wakaba 1.52 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
5723 wakaba 1.63 if ($token->{attributes}->{content}->{value}
5724 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
5725     [\x09-\x0D\x20]*=
5726 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
5727     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
5728 wakaba 1.79 !!!cp ('t336');
5729 wakaba 1.63 $self->{change_encoding}
5730 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
5731 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
5732     ->set_user_data (manakai_has_reference =>
5733     $token->{attributes}->{content}
5734     ->{has_reference});
5735 wakaba 1.63 }
5736 wakaba 1.52 }
5737 wakaba 1.66 } else {
5738     if ($token->{attributes}->{charset}) {
5739 wakaba 1.79 !!!cp ('t337');
5740 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
5741     ->set_user_data (manakai_has_reference =>
5742     $token->{attributes}->{charset}
5743     ->{has_reference});
5744     }
5745 wakaba 1.68 if ($token->{attributes}->{content}) {
5746 wakaba 1.79 !!!cp ('t338');
5747 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
5748     ->set_user_data (manakai_has_reference =>
5749     $token->{attributes}->{content}
5750     ->{has_reference});
5751     }
5752 wakaba 1.52 }
5753 wakaba 1.1
5754 wakaba 1.52 !!!next-token;
5755 wakaba 1.53 redo B;
5756 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
5757 wakaba 1.79 !!!cp ('t341');
5758 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5759 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
5760 wakaba 1.53 redo B;
5761 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
5762 wakaba 1.113 !!!parse-error (type => 'in body:body', token => $token);
5763 wakaba 1.46
5764 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
5765 wakaba 1.123 not ($self->{open_elements}->[1]->[1] & BODY_EL)) {
5766 wakaba 1.79 !!!cp ('t342');
5767 wakaba 1.52 ## Ignore the token
5768     } else {
5769     my $body_el = $self->{open_elements}->[1]->[0];
5770     for my $attr_name (keys %{$token->{attributes}}) {
5771     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
5772 wakaba 1.79 !!!cp ('t343');
5773 wakaba 1.52 $body_el->set_attribute_ns
5774     (undef, [undef, $attr_name],
5775     $token->{attributes}->{$attr_name}->{value});
5776     }
5777     }
5778     }
5779     !!!next-token;
5780 wakaba 1.53 redo B;
5781 wakaba 1.52 } elsif ({
5782     address => 1, blockquote => 1, center => 1, dir => 1,
5783 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
5784     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5785 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
5786     pre => 1, listing => 1,
5787 wakaba 1.109 form => 1,
5788     table => 1,
5789     hr => 1,
5790 wakaba 1.52 }->{$token->{tag_name}}) {
5791 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
5792     !!!cp ('t350');
5793 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
5794 wakaba 1.109 ## Ignore the token
5795     !!!next-token;
5796     redo B;
5797     }
5798    
5799 wakaba 1.52 ## has a p element in scope
5800     INSCOPE: for (reverse @{$self->{open_elements}}) {
5801 wakaba 1.123 if ($_->[1] & P_EL) {
5802 wakaba 1.79 !!!cp ('t344');
5803 wakaba 1.52 !!!back-token;
5804 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
5805     line => $token->{line}, column => $token->{column}};
5806 wakaba 1.53 redo B;
5807 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
5808 wakaba 1.79 !!!cp ('t345');
5809 wakaba 1.52 last INSCOPE;
5810     }
5811     } # INSCOPE
5812    
5813 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5814 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
5815 wakaba 1.52 !!!next-token;
5816 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5817 wakaba 1.52 $token->{data} =~ s/^\x0A//;
5818     unless (length $token->{data}) {
5819 wakaba 1.79 !!!cp ('t346');
5820 wakaba 1.1 !!!next-token;
5821 wakaba 1.79 } else {
5822     !!!cp ('t349');
5823 wakaba 1.52 }
5824 wakaba 1.79 } else {
5825     !!!cp ('t348');
5826 wakaba 1.52 }
5827 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
5828     !!!cp ('t347.1');
5829     $self->{form_element} = $self->{open_elements}->[-1]->[0];
5830    
5831     !!!next-token;
5832     } elsif ($token->{tag_name} eq 'table') {
5833     !!!cp ('t382');
5834     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
5835    
5836     $self->{insertion_mode} = IN_TABLE_IM;
5837    
5838     !!!next-token;
5839     } elsif ($token->{tag_name} eq 'hr') {
5840     !!!cp ('t386');
5841     pop @{$self->{open_elements}};
5842    
5843     !!!next-token;
5844 wakaba 1.52 } else {
5845 wakaba 1.79 !!!cp ('t347');
5846 wakaba 1.52 !!!next-token;
5847     }
5848 wakaba 1.53 redo B;
5849 wakaba 1.109 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
5850 wakaba 1.52 ## has a p element in scope
5851     INSCOPE: for (reverse @{$self->{open_elements}}) {
5852 wakaba 1.123 if ($_->[1] & P_EL) {
5853 wakaba 1.79 !!!cp ('t353');
5854 wakaba 1.52 !!!back-token;
5855 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
5856     line => $token->{line}, column => $token->{column}};
5857 wakaba 1.53 redo B;
5858 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
5859 wakaba 1.79 !!!cp ('t354');
5860 wakaba 1.52 last INSCOPE;
5861     }
5862     } # INSCOPE
5863    
5864     ## Step 1
5865     my $i = -1;
5866     my $node = $self->{open_elements}->[$i];
5867 wakaba 1.109 my $li_or_dtdd = {li => {li => 1},
5868     dt => {dt => 1, dd => 1},
5869     dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
5870 wakaba 1.52 LI: {
5871     ## Step 2
5872 wakaba 1.123 if ($li_or_dtdd->{$node->[0]->manakai_local_name}) {
5873 wakaba 1.52 if ($i != -1) {
5874 wakaba 1.79 !!!cp ('t355');
5875 wakaba 1.122 !!!parse-error (type => 'not closed',
5876     value => $self->{open_elements}->[-1]->[0]
5877     ->manakai_local_name,
5878     token => $token);
5879 wakaba 1.79 } else {
5880     !!!cp ('t356');
5881 wakaba 1.52 }
5882     splice @{$self->{open_elements}}, $i;
5883     last LI;
5884 wakaba 1.79 } else {
5885     !!!cp ('t357');
5886 wakaba 1.52 }
5887    
5888     ## Step 3
5889 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
5890 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
5891 wakaba 1.123 ($node->[1] & SPECIAL_EL or
5892     $node->[1] & SCOPING_EL) and
5893     not ($node->[1] & ADDRESS_EL) and
5894     not ($node->[1] & DIV_EL)) {
5895 wakaba 1.79 !!!cp ('t358');
5896 wakaba 1.52 last LI;
5897     }
5898    
5899 wakaba 1.79 !!!cp ('t359');
5900 wakaba 1.52 ## Step 4
5901     $i--;
5902     $node = $self->{open_elements}->[$i];
5903     redo LI;
5904     } # LI
5905    
5906 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5907 wakaba 1.52 !!!next-token;
5908 wakaba 1.53 redo B;
5909 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
5910     ## has a p element in scope
5911     INSCOPE: for (reverse @{$self->{open_elements}}) {
5912 wakaba 1.123 if ($_->[1] & P_EL) {
5913 wakaba 1.79 !!!cp ('t367');
5914 wakaba 1.52 !!!back-token;
5915 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
5916     line => $token->{line}, column => $token->{column}};
5917 wakaba 1.53 redo B;
5918 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
5919 wakaba 1.79 !!!cp ('t368');
5920 wakaba 1.52 last INSCOPE;
5921 wakaba 1.46 }
5922 wakaba 1.52 } # INSCOPE
5923    
5924 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5925 wakaba 1.52
5926     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
5927    
5928     !!!next-token;
5929 wakaba 1.53 redo B;
5930 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
5931     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
5932     my $node = $active_formatting_elements->[$i];
5933 wakaba 1.123 if ($node->[1] & A_EL) {
5934 wakaba 1.79 !!!cp ('t371');
5935 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
5936 wakaba 1.52
5937     !!!back-token;
5938 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
5939     line => $token->{line}, column => $token->{column}};
5940 wakaba 1.113 $formatting_end_tag->($token);
5941 wakaba 1.52
5942     AFE2: for (reverse 0..$#$active_formatting_elements) {
5943     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
5944 wakaba 1.79 !!!cp ('t372');
5945 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
5946     last AFE2;
5947 wakaba 1.1 }
5948 wakaba 1.52 } # AFE2
5949     OE: for (reverse 0..$#{$self->{open_elements}}) {
5950     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
5951 wakaba 1.79 !!!cp ('t373');
5952 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
5953     last OE;
5954 wakaba 1.1 }
5955 wakaba 1.52 } # OE
5956     last AFE;
5957     } elsif ($node->[0] eq '#marker') {
5958 wakaba 1.79 !!!cp ('t374');
5959 wakaba 1.52 last AFE;
5960     }
5961     } # AFE
5962    
5963     $reconstruct_active_formatting_elements->($insert_to_current);
5964 wakaba 1.1
5965 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5966 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
5967 wakaba 1.1
5968 wakaba 1.52 !!!next-token;
5969 wakaba 1.53 redo B;
5970 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
5971     $reconstruct_active_formatting_elements->($insert_to_current);
5972 wakaba 1.1
5973 wakaba 1.52 ## has a |nobr| element in scope
5974     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5975     my $node = $self->{open_elements}->[$_];
5976 wakaba 1.123 if ($node->[1] & NOBR_EL) {
5977 wakaba 1.79 !!!cp ('t376');
5978 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
5979 wakaba 1.52 !!!back-token;
5980 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
5981     line => $token->{line}, column => $token->{column}};
5982 wakaba 1.53 redo B;
5983 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5984 wakaba 1.79 !!!cp ('t377');
5985 wakaba 1.52 last INSCOPE;
5986     }
5987     } # INSCOPE
5988    
5989 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5990 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
5991    
5992     !!!next-token;
5993 wakaba 1.53 redo B;
5994 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
5995     ## has a button element in scope
5996     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5997     my $node = $self->{open_elements}->[$_];
5998 wakaba 1.123 if ($node->[1] & BUTTON_EL) {
5999 wakaba 1.79 !!!cp ('t378');
6000 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
6001 wakaba 1.52 !!!back-token;
6002 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
6003     line => $token->{line}, column => $token->{column}};
6004 wakaba 1.53 redo B;
6005 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6006 wakaba 1.79 !!!cp ('t379');
6007 wakaba 1.52 last INSCOPE;
6008     }
6009     } # INSCOPE
6010    
6011     $reconstruct_active_formatting_elements->($insert_to_current);
6012    
6013 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6014 wakaba 1.85
6015     ## TODO: associate with $self->{form_element} if defined
6016    
6017 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
6018 wakaba 1.1
6019 wakaba 1.52 !!!next-token;
6020 wakaba 1.53 redo B;
6021 wakaba 1.103 } elsif ({
6022 wakaba 1.109 xmp => 1,
6023     iframe => 1,
6024     noembed => 1,
6025     noframes => 1,
6026     noscript => 0, ## TODO: 1 if scripting is enabled
6027 wakaba 1.103 }->{$token->{tag_name}}) {
6028 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
6029     !!!cp ('t381');
6030     $reconstruct_active_formatting_elements->($insert_to_current);
6031     } else {
6032     !!!cp ('t399');
6033     }
6034     ## NOTE: There is an "as if in body" code clone.
6035 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6036 wakaba 1.53 redo B;
6037 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
6038 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
6039 wakaba 1.52
6040     if (defined $self->{form_element}) {
6041 wakaba 1.79 !!!cp ('t389');
6042 wakaba 1.52 ## Ignore the token
6043     !!!next-token;
6044 wakaba 1.53 redo B;
6045 wakaba 1.52 } else {
6046     my $at = $token->{attributes};
6047     my $form_attrs;
6048     $form_attrs->{action} = $at->{action} if $at->{action};
6049     my $prompt_attr = $at->{prompt};
6050     $at->{name} = {name => 'name', value => 'isindex'};
6051     delete $at->{action};
6052     delete $at->{prompt};
6053     my @tokens = (
6054 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
6055 wakaba 1.114 attributes => $form_attrs,
6056     line => $token->{line}, column => $token->{column}},
6057     {type => START_TAG_TOKEN, tag_name => 'hr',
6058     line => $token->{line}, column => $token->{column}},
6059     {type => START_TAG_TOKEN, tag_name => 'p',
6060     line => $token->{line}, column => $token->{column}},
6061     {type => START_TAG_TOKEN, tag_name => 'label',
6062     line => $token->{line}, column => $token->{column}},
6063 wakaba 1.52 );
6064     if ($prompt_attr) {
6065 wakaba 1.79 !!!cp ('t390');
6066 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
6067 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6068     };
6069 wakaba 1.1 } else {
6070 wakaba 1.79 !!!cp ('t391');
6071 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
6072 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
6073 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6074     }; # SHOULD
6075 wakaba 1.52 ## TODO: make this configurable
6076 wakaba 1.1 }
6077 wakaba 1.52 push @tokens,
6078 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
6079     line => $token->{line}, column => $token->{column}},
6080 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
6081 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
6082     line => $token->{line}, column => $token->{column}},
6083     {type => END_TAG_TOKEN, tag_name => 'p',
6084     line => $token->{line}, column => $token->{column}},
6085     {type => START_TAG_TOKEN, tag_name => 'hr',
6086     line => $token->{line}, column => $token->{column}},
6087     {type => END_TAG_TOKEN, tag_name => 'form',
6088     line => $token->{line}, column => $token->{column}};
6089 wakaba 1.52 $token = shift @tokens;
6090     !!!back-token (@tokens);
6091 wakaba 1.53 redo B;
6092 wakaba 1.52 }
6093     } elsif ($token->{tag_name} eq 'textarea') {
6094     my $tag_name = $token->{tag_name};
6095     my $el;
6096 wakaba 1.116 !!!create-element ($el, $token->{tag_name}, $token->{attributes}, $token);
6097 wakaba 1.52
6098     ## TODO: $self->{form_element} if defined
6099     $self->{content_model} = RCDATA_CONTENT_MODEL;
6100     delete $self->{escape}; # MUST
6101    
6102     $insert->($el);
6103    
6104     my $text = '';
6105     !!!next-token;
6106 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6107 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6108 wakaba 1.51 unless (length $token->{data}) {
6109 wakaba 1.79 !!!cp ('t392');
6110 wakaba 1.51 !!!next-token;
6111 wakaba 1.79 } else {
6112     !!!cp ('t393');
6113 wakaba 1.51 }
6114 wakaba 1.79 } else {
6115     !!!cp ('t394');
6116 wakaba 1.51 }
6117 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
6118 wakaba 1.79 !!!cp ('t395');
6119 wakaba 1.52 $text .= $token->{data};
6120     !!!next-token;
6121     }
6122     if (length $text) {
6123 wakaba 1.79 !!!cp ('t396');
6124 wakaba 1.52 $el->manakai_append_text ($text);
6125     }
6126    
6127     $self->{content_model} = PCDATA_CONTENT_MODEL;
6128 wakaba 1.51
6129 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
6130 wakaba 1.52 $token->{tag_name} eq $tag_name) {
6131 wakaba 1.79 !!!cp ('t397');
6132 wakaba 1.52 ## Ignore the token
6133     } else {
6134 wakaba 1.79 !!!cp ('t398');
6135 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
6136 wakaba 1.51 }
6137 wakaba 1.52 !!!next-token;
6138 wakaba 1.53 redo B;
6139 wakaba 1.52 } elsif ({
6140     caption => 1, col => 1, colgroup => 1, frame => 1,
6141     frameset => 1, head => 1, option => 1, optgroup => 1,
6142     tbody => 1, td => 1, tfoot => 1, th => 1,
6143     thead => 1, tr => 1,
6144     }->{$token->{tag_name}}) {
6145 wakaba 1.79 !!!cp ('t401');
6146 wakaba 1.113 !!!parse-error (type => 'in body:'.$token->{tag_name}, token => $token);
6147 wakaba 1.52 ## Ignore the token
6148     !!!next-token;
6149 wakaba 1.53 redo B;
6150 wakaba 1.52
6151     ## ISSUE: An issue on HTML5 new elements in the spec.
6152     } else {
6153 wakaba 1.110 if ($token->{tag_name} eq 'image') {
6154     !!!cp ('t384');
6155 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
6156 wakaba 1.110 $token->{tag_name} = 'img';
6157     } else {
6158     !!!cp ('t385');
6159     }
6160    
6161     ## NOTE: There is an "as if <br>" code clone.
6162 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
6163    
6164 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6165 wakaba 1.109
6166 wakaba 1.110 if ({
6167     applet => 1, marquee => 1, object => 1,
6168     }->{$token->{tag_name}}) {
6169     !!!cp ('t380');
6170     push @$active_formatting_elements, ['#marker', ''];
6171     } elsif ({
6172     b => 1, big => 1, em => 1, font => 1, i => 1,
6173     s => 1, small => 1, strile => 1,
6174     strong => 1, tt => 1, u => 1,
6175     }->{$token->{tag_name}}) {
6176     !!!cp ('t375');
6177     push @$active_formatting_elements, $self->{open_elements}->[-1];
6178     } elsif ($token->{tag_name} eq 'input') {
6179     !!!cp ('t388');
6180     ## TODO: associate with $self->{form_element} if defined
6181     pop @{$self->{open_elements}};
6182     } elsif ({
6183     area => 1, basefont => 1, bgsound => 1, br => 1,
6184     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
6185     #image => 1,
6186     }->{$token->{tag_name}}) {
6187     !!!cp ('t388.1');
6188     pop @{$self->{open_elements}};
6189     } elsif ($token->{tag_name} eq 'select') {
6190 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
6191    
6192     if ($self->{insertion_mode} & TABLE_IMS or
6193     $self->{insertion_mode} & BODY_TABLE_IMS or
6194     $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6195     !!!cp ('t400.1');
6196     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
6197     } else {
6198     !!!cp ('t400.2');
6199     $self->{insertion_mode} = IN_SELECT_IM;
6200     }
6201 wakaba 1.110 } else {
6202     !!!cp ('t402');
6203 wakaba 1.109 }
6204 wakaba 1.51
6205 wakaba 1.52 !!!next-token;
6206 wakaba 1.53 redo B;
6207 wakaba 1.52 }
6208 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6209 wakaba 1.52 if ($token->{tag_name} eq 'body') {
6210 wakaba 1.107 ## has a |body| element in scope
6211     my $i;
6212 wakaba 1.111 INSCOPE: {
6213     for (reverse @{$self->{open_elements}}) {
6214 wakaba 1.123 if ($_->[1] & BODY_EL) {
6215 wakaba 1.111 !!!cp ('t405');
6216     $i = $_;
6217     last INSCOPE;
6218 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6219 wakaba 1.111 !!!cp ('t405.1');
6220     last;
6221     }
6222 wakaba 1.52 }
6223 wakaba 1.111
6224     !!!parse-error (type => 'start tag not allowed',
6225 wakaba 1.113 value => $token->{tag_name}, token => $token);
6226 wakaba 1.107 ## NOTE: Ignore the token.
6227 wakaba 1.52 !!!next-token;
6228 wakaba 1.53 redo B;
6229 wakaba 1.111 } # INSCOPE
6230 wakaba 1.107
6231     for (@{$self->{open_elements}}) {
6232 wakaba 1.123 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
6233 wakaba 1.107 !!!cp ('t403');
6234 wakaba 1.122 !!!parse-error (type => 'not closed',
6235     value => $_->[0]->manakai_local_name,
6236     token => $token);
6237 wakaba 1.107 last;
6238     } else {
6239     !!!cp ('t404');
6240     }
6241     }
6242    
6243     $self->{insertion_mode} = AFTER_BODY_IM;
6244     !!!next-token;
6245     redo B;
6246 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
6247 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
6248     ## up-to-date, though it has same effect as speced.
6249 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
6250     $self->{open_elements}->[1]->[1] & BODY_EL) {
6251 wakaba 1.52 ## ISSUE: There is an issue in the spec.
6252 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & BODY_EL) {
6253 wakaba 1.79 !!!cp ('t406');
6254 wakaba 1.122 !!!parse-error (type => 'not closed',
6255     value => $self->{open_elements}->[1]->[0]
6256     ->manakai_local_name,
6257     token => $token);
6258 wakaba 1.79 } else {
6259     !!!cp ('t407');
6260 wakaba 1.1 }
6261 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6262 wakaba 1.52 ## reprocess
6263 wakaba 1.53 redo B;
6264 wakaba 1.51 } else {
6265 wakaba 1.79 !!!cp ('t408');
6266 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6267 wakaba 1.52 ## Ignore the token
6268     !!!next-token;
6269 wakaba 1.53 redo B;
6270 wakaba 1.51 }
6271 wakaba 1.52 } elsif ({
6272     address => 1, blockquote => 1, center => 1, dir => 1,
6273     div => 1, dl => 1, fieldset => 1, listing => 1,
6274     menu => 1, ol => 1, pre => 1, ul => 1,
6275     dd => 1, dt => 1, li => 1,
6276 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6277 wakaba 1.52 }->{$token->{tag_name}}) {
6278     ## has an element in scope
6279     my $i;
6280     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6281     my $node = $self->{open_elements}->[$_];
6282 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6283 wakaba 1.79 !!!cp ('t410');
6284 wakaba 1.52 $i = $_;
6285 wakaba 1.87 last INSCOPE;
6286 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6287 wakaba 1.79 !!!cp ('t411');
6288 wakaba 1.52 last INSCOPE;
6289 wakaba 1.51 }
6290 wakaba 1.52 } # INSCOPE
6291 wakaba 1.89
6292     unless (defined $i) { # has an element in scope
6293     !!!cp ('t413');
6294 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6295 wakaba 1.89 } else {
6296     ## Step 1. generate implied end tags
6297     while ({
6298     dd => ($token->{tag_name} ne 'dd'),
6299     dt => ($token->{tag_name} ne 'dt'),
6300     li => ($token->{tag_name} ne 'li'),
6301     p => 1,
6302 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
6303 wakaba 1.89 !!!cp ('t409');
6304     pop @{$self->{open_elements}};
6305     }
6306    
6307     ## Step 2.
6308 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6309     ne $token->{tag_name}) {
6310 wakaba 1.79 !!!cp ('t412');
6311 wakaba 1.122 !!!parse-error (type => 'not closed',
6312     value => $self->{open_elements}->[-1]->[0]
6313     ->manakai_local_name,
6314     token => $token);
6315 wakaba 1.51 } else {
6316 wakaba 1.89 !!!cp ('t414');
6317 wakaba 1.51 }
6318 wakaba 1.89
6319     ## Step 3.
6320 wakaba 1.52 splice @{$self->{open_elements}}, $i;
6321 wakaba 1.89
6322     ## Step 4.
6323     $clear_up_to_marker->()
6324     if {
6325 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6326 wakaba 1.89 }->{$token->{tag_name}};
6327 wakaba 1.51 }
6328 wakaba 1.52 !!!next-token;
6329 wakaba 1.53 redo B;
6330 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
6331 wakaba 1.92 undef $self->{form_element};
6332    
6333 wakaba 1.52 ## has an element in scope
6334 wakaba 1.92 my $i;
6335 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6336     my $node = $self->{open_elements}->[$_];
6337 wakaba 1.123 if ($node->[1] & FORM_EL) {
6338 wakaba 1.79 !!!cp ('t418');
6339 wakaba 1.92 $i = $_;
6340 wakaba 1.52 last INSCOPE;
6341 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6342 wakaba 1.79 !!!cp ('t419');
6343 wakaba 1.52 last INSCOPE;
6344     }
6345     } # INSCOPE
6346 wakaba 1.92
6347     unless (defined $i) { # has an element in scope
6348 wakaba 1.79 !!!cp ('t421');
6349 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6350 wakaba 1.92 } else {
6351     ## Step 1. generate implied end tags
6352 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6353 wakaba 1.92 !!!cp ('t417');
6354     pop @{$self->{open_elements}};
6355     }
6356    
6357     ## Step 2.
6358 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6359     ne $token->{tag_name}) {
6360 wakaba 1.92 !!!cp ('t417.1');
6361 wakaba 1.122 !!!parse-error (type => 'not closed',
6362     value => $self->{open_elements}->[-1]->[0]
6363     ->manakai_local_name,
6364     token => $token);
6365 wakaba 1.92 } else {
6366     !!!cp ('t420');
6367     }
6368    
6369     ## Step 3.
6370     splice @{$self->{open_elements}}, $i;
6371 wakaba 1.52 }
6372    
6373     !!!next-token;
6374 wakaba 1.53 redo B;
6375 wakaba 1.52 } elsif ({
6376     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6377     }->{$token->{tag_name}}) {
6378     ## has an element in scope
6379     my $i;
6380     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6381     my $node = $self->{open_elements}->[$_];
6382 wakaba 1.123 if ($node->[1] & HEADING_EL) {
6383 wakaba 1.79 !!!cp ('t423');
6384 wakaba 1.52 $i = $_;
6385     last INSCOPE;
6386 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6387 wakaba 1.79 !!!cp ('t424');
6388 wakaba 1.52 last INSCOPE;
6389 wakaba 1.51 }
6390 wakaba 1.52 } # INSCOPE
6391 wakaba 1.93
6392     unless (defined $i) { # has an element in scope
6393     !!!cp ('t425.1');
6394 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6395 wakaba 1.79 } else {
6396 wakaba 1.93 ## Step 1. generate implied end tags
6397 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6398 wakaba 1.93 !!!cp ('t422');
6399     pop @{$self->{open_elements}};
6400     }
6401    
6402     ## Step 2.
6403 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6404     ne $token->{tag_name}) {
6405 wakaba 1.93 !!!cp ('t425');
6406 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6407 wakaba 1.93 } else {
6408     !!!cp ('t426');
6409     }
6410    
6411     ## Step 3.
6412     splice @{$self->{open_elements}}, $i;
6413 wakaba 1.36 }
6414 wakaba 1.52
6415     !!!next-token;
6416 wakaba 1.53 redo B;
6417 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
6418     ## has an element in scope
6419     my $i;
6420     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6421     my $node = $self->{open_elements}->[$_];
6422 wakaba 1.123 if ($node->[1] & P_EL) {
6423 wakaba 1.87 !!!cp ('t410.1');
6424     $i = $_;
6425 wakaba 1.88 last INSCOPE;
6426 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6427 wakaba 1.87 !!!cp ('t411.1');
6428     last INSCOPE;
6429     }
6430     } # INSCOPE
6431 wakaba 1.91
6432     if (defined $i) {
6433 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6434     ne $token->{tag_name}) {
6435 wakaba 1.87 !!!cp ('t412.1');
6436 wakaba 1.122 !!!parse-error (type => 'not closed',
6437     value => $self->{open_elements}->[-1]->[0]
6438     ->manakai_local_name,
6439     token => $token);
6440 wakaba 1.87 } else {
6441 wakaba 1.91 !!!cp ('t414.1');
6442 wakaba 1.87 }
6443 wakaba 1.91
6444 wakaba 1.87 splice @{$self->{open_elements}}, $i;
6445     } else {
6446 wakaba 1.91 !!!cp ('t413.1');
6447 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6448 wakaba 1.91
6449 wakaba 1.87 !!!cp ('t415.1');
6450     ## As if <p>, then reprocess the current token
6451     my $el;
6452 wakaba 1.116 !!!create-element ($el, 'p',, $token);
6453 wakaba 1.87 $insert->($el);
6454 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
6455 wakaba 1.87 }
6456 wakaba 1.91
6457 wakaba 1.87 !!!next-token;
6458     redo B;
6459 wakaba 1.52 } elsif ({
6460     a => 1,
6461     b => 1, big => 1, em => 1, font => 1, i => 1,
6462     nobr => 1, s => 1, small => 1, strile => 1,
6463     strong => 1, tt => 1, u => 1,
6464     }->{$token->{tag_name}}) {
6465 wakaba 1.79 !!!cp ('t427');
6466 wakaba 1.113 $formatting_end_tag->($token);
6467 wakaba 1.53 redo B;
6468 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
6469 wakaba 1.79 !!!cp ('t428');
6470 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:br', token => $token);
6471 wakaba 1.52
6472     ## As if <br>
6473     $reconstruct_active_formatting_elements->($insert_to_current);
6474    
6475     my $el;
6476 wakaba 1.116 !!!create-element ($el, 'br',, $token);
6477 wakaba 1.52 $insert->($el);
6478    
6479     ## Ignore the token.
6480     !!!next-token;
6481 wakaba 1.53 redo B;
6482 wakaba 1.52 } elsif ({
6483     caption => 1, col => 1, colgroup => 1, frame => 1,
6484     frameset => 1, head => 1, option => 1, optgroup => 1,
6485     tbody => 1, td => 1, tfoot => 1, th => 1,
6486     thead => 1, tr => 1,
6487     area => 1, basefont => 1, bgsound => 1,
6488     embed => 1, hr => 1, iframe => 1, image => 1,
6489     img => 1, input => 1, isindex => 1, noembed => 1,
6490     noframes => 1, param => 1, select => 1, spacer => 1,
6491     table => 1, textarea => 1, wbr => 1,
6492     noscript => 0, ## TODO: if scripting is enabled
6493     }->{$token->{tag_name}}) {
6494 wakaba 1.79 !!!cp ('t429');
6495 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6496 wakaba 1.52 ## Ignore the token
6497     !!!next-token;
6498 wakaba 1.53 redo B;
6499 wakaba 1.52
6500     ## ISSUE: Issue on HTML5 new elements in spec
6501    
6502     } else {
6503     ## Step 1
6504     my $node_i = -1;
6505     my $node = $self->{open_elements}->[$node_i];
6506 wakaba 1.51
6507 wakaba 1.52 ## Step 2
6508     S2: {
6509 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6510 wakaba 1.52 ## Step 1
6511     ## generate implied end tags
6512 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6513 wakaba 1.79 !!!cp ('t430');
6514 wakaba 1.83 ## ISSUE: Can this case be reached?
6515 wakaba 1.86 pop @{$self->{open_elements}};
6516 wakaba 1.52 }
6517    
6518     ## Step 2
6519 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6520     ne $token->{tag_name}) {
6521 wakaba 1.79 !!!cp ('t431');
6522 wakaba 1.58 ## NOTE: <x><y></x>
6523 wakaba 1.122 !!!parse-error (type => 'not closed',
6524     value => $self->{open_elements}->[-1]->[0]
6525     ->manakai_local_name,
6526     token => $token);
6527 wakaba 1.79 } else {
6528     !!!cp ('t432');
6529 wakaba 1.52 }
6530    
6531     ## Step 3
6532     splice @{$self->{open_elements}}, $node_i;
6533 wakaba 1.51
6534 wakaba 1.1 !!!next-token;
6535 wakaba 1.52 last S2;
6536 wakaba 1.1 } else {
6537 wakaba 1.52 ## Step 3
6538 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
6539 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
6540 wakaba 1.123 ($node->[1] & SPECIAL_EL or
6541     $node->[1] & SCOPING_EL)) {
6542 wakaba 1.79 !!!cp ('t433');
6543 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6544 wakaba 1.52 ## Ignore the token
6545     !!!next-token;
6546     last S2;
6547     }
6548 wakaba 1.79
6549     !!!cp ('t434');
6550 wakaba 1.1 }
6551 wakaba 1.52
6552     ## Step 4
6553     $node_i--;
6554     $node = $self->{open_elements}->[$node_i];
6555    
6556     ## Step 5;
6557     redo S2;
6558     } # S2
6559 wakaba 1.53 redo B;
6560 wakaba 1.1 }
6561     }
6562 wakaba 1.52 redo B;
6563 wakaba 1.1 } # B
6564    
6565     ## Stop parsing # MUST
6566    
6567     ## TODO: script stuffs
6568 wakaba 1.3 } # _tree_construct_main
6569    
6570     sub set_inner_html ($$$) {
6571     my $class = shift;
6572     my $node = shift;
6573     my $s = \$_[0];
6574     my $onerror = $_[1];
6575    
6576 wakaba 1.63 ## ISSUE: Should {confident} be true?
6577    
6578 wakaba 1.3 my $nt = $node->node_type;
6579     if ($nt == 9) {
6580     # MUST
6581    
6582     ## Step 1 # MUST
6583     ## TODO: If the document has an active parser, ...
6584     ## ISSUE: There is an issue in the spec.
6585    
6586     ## Step 2 # MUST
6587     my @cn = @{$node->child_nodes};
6588     for (@cn) {
6589     $node->remove_child ($_);
6590     }
6591    
6592     ## Step 3, 4, 5 # MUST
6593     $class->parse_string ($$s => $node, $onerror);
6594     } elsif ($nt == 1) {
6595     ## TODO: If non-html element
6596    
6597     ## NOTE: Most of this code is copied from |parse_string|
6598    
6599     ## Step 1 # MUST
6600 wakaba 1.14 my $this_doc = $node->owner_document;
6601     my $doc = $this_doc->implementation->create_document;
6602 wakaba 1.18 $doc->manakai_is_html (1);
6603 wakaba 1.3 my $p = $class->new;
6604     $p->{document} = $doc;
6605    
6606 wakaba 1.84 ## Step 8 # MUST
6607 wakaba 1.3 my $i = 0;
6608 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
6609     $p->{column_prev} = $p->{column} = 0;
6610 wakaba 1.76 $p->{set_next_char} = sub {
6611 wakaba 1.3 my $self = shift;
6612 wakaba 1.14
6613 wakaba 1.76 pop @{$self->{prev_char}};
6614     unshift @{$self->{prev_char}}, $self->{next_char};
6615 wakaba 1.14
6616 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
6617     $self->{next_char} = ord substr $$s, $i++, 1;
6618 wakaba 1.121
6619     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
6620     $p->{column}++;
6621 wakaba 1.4
6622 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
6623 wakaba 1.121 $p->{line}++;
6624     $p->{column} = 0;
6625 wakaba 1.79 !!!cp ('i1');
6626 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
6627 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6628 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
6629 wakaba 1.121 $p->{line}++;
6630     $p->{column} = 0;
6631 wakaba 1.79 !!!cp ('i2');
6632 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
6633     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6634 wakaba 1.79 !!!cp ('i3');
6635 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
6636 wakaba 1.79 !!!cp ('i4');
6637 wakaba 1.14 !!!parse-error (type => 'NULL');
6638 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6639 wakaba 1.3 }
6640     };
6641 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
6642     $p->{next_char} = -1;
6643 wakaba 1.3
6644     my $ponerror = $onerror || sub {
6645     my (%opt) = @_;
6646 wakaba 1.121 my $line = $opt{line};
6647     my $column = $opt{column};
6648     if (defined $opt{token} and defined $opt{token}->{line}) {
6649     $line = $opt{token}->{line};
6650     $column = $opt{token}->{column};
6651     }
6652     warn "Parse error ($opt{type}) at line $line column $column\n";
6653 wakaba 1.3 };
6654     $p->{parse_error} = sub {
6655 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
6656 wakaba 1.3 };
6657    
6658     $p->_initialize_tokenizer;
6659     $p->_initialize_tree_constructor;
6660    
6661     ## Step 2
6662 wakaba 1.71 my $node_ln = $node->manakai_local_name;
6663 wakaba 1.40 $p->{content_model} = {
6664     title => RCDATA_CONTENT_MODEL,
6665     textarea => RCDATA_CONTENT_MODEL,
6666     style => CDATA_CONTENT_MODEL,
6667     script => CDATA_CONTENT_MODEL,
6668     xmp => CDATA_CONTENT_MODEL,
6669     iframe => CDATA_CONTENT_MODEL,
6670     noembed => CDATA_CONTENT_MODEL,
6671     noframes => CDATA_CONTENT_MODEL,
6672     noscript => CDATA_CONTENT_MODEL,
6673     plaintext => PLAINTEXT_CONTENT_MODEL,
6674     }->{$node_ln};
6675     $p->{content_model} = PCDATA_CONTENT_MODEL
6676     unless defined $p->{content_model};
6677     ## ISSUE: What is "the name of the element"? local name?
6678 wakaba 1.3
6679 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
6680     ## TODO: Foreign element OK?
6681 wakaba 1.3
6682 wakaba 1.84 ## Step 3
6683 wakaba 1.3 my $root = $doc->create_element_ns
6684     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6685    
6686 wakaba 1.84 ## Step 4 # MUST
6687 wakaba 1.3 $doc->append_child ($root);
6688    
6689 wakaba 1.84 ## Step 5 # MUST
6690 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
6691 wakaba 1.3
6692     undef $p->{head_element};
6693    
6694 wakaba 1.84 ## Step 6 # MUST
6695 wakaba 1.3 $p->_reset_insertion_mode;
6696    
6697 wakaba 1.84 ## Step 7 # MUST
6698 wakaba 1.3 my $anode = $node;
6699     AN: while (defined $anode) {
6700     if ($anode->node_type == 1) {
6701     my $nsuri = $anode->namespace_uri;
6702     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6703 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
6704 wakaba 1.79 !!!cp ('i5');
6705 wakaba 1.3 $p->{form_element} = $anode;
6706     last AN;
6707     }
6708     }
6709     }
6710     $anode = $anode->parent_node;
6711     } # AN
6712    
6713 wakaba 1.84 ## Step 9 # MUST
6714 wakaba 1.3 {
6715     my $self = $p;
6716     !!!next-token;
6717     }
6718     $p->_tree_construction_main;
6719    
6720 wakaba 1.84 ## Step 10 # MUST
6721 wakaba 1.3 my @cn = @{$node->child_nodes};
6722     for (@cn) {
6723     $node->remove_child ($_);
6724     }
6725     ## ISSUE: mutation events? read-only?
6726    
6727 wakaba 1.84 ## Step 11 # MUST
6728 wakaba 1.3 @cn = @{$root->child_nodes};
6729     for (@cn) {
6730 wakaba 1.14 $this_doc->adopt_node ($_);
6731 wakaba 1.3 $node->append_child ($_);
6732     }
6733 wakaba 1.14 ## ISSUE: mutation events?
6734 wakaba 1.3
6735     $p->_terminate_tree_constructor;
6736 wakaba 1.121
6737     delete $p->{parse_error}; # delete loop
6738 wakaba 1.3 } else {
6739     die "$0: |set_inner_html| is not defined for node of type $nt";
6740     }
6741     } # set_inner_html
6742    
6743     } # tree construction stage
6744 wakaba 1.1
6745 wakaba 1.63 package Whatpm::HTML::RestartParser;
6746     push our @ISA, 'Error';
6747    
6748 wakaba 1.1 1;
6749 wakaba 1.123 # $Date: 2008/04/06 06:34:11 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24