/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.125 - (hide annotations) (download) (as text)
Sat Apr 12 10:41:31 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.124: +178 -110 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	12 Apr 2008 10:41:08 -0000
	* HTML-tokenizer.t: Remove "self-closing flag" if the start
	tag token is that of a slash permitted element (This is necessary
	to maintain compatibility with current test data, since in the
	new algorithm whether slash is permitted or not is decided in
	tree construction stage).

2008-04-12  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	12 Apr 2008 10:38:11 -0000
2008-04-12  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src, mkhtmlparser.pl: The way permitted slash errors
	are raised is changed (HTML5 revision 1404).

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.125 our $VERSION=do{my @r=(q$Revision: 1.124 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.70 ## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263)
12     ## TODO: 1252 parse error (revision 1264)
13     ## TODO: 8859-11 = 874 (revision 1271)
14    
15 wakaba 1.123 sub A_EL () { 0b1 }
16     sub ADDRESS_EL () { 0b10 }
17     sub BODY_EL () { 0b100 }
18     sub BUTTON_EL () { 0b1000 }
19     sub CAPTION_EL () { 0b10000 }
20     sub DD_EL () { 0b100000 }
21     sub DIV_EL () { 0b1000000 }
22     sub DT_EL () { 0b10000000 }
23     sub FORM_EL () { 0b100000000 }
24     sub FORMATTING_EL () { 0b1000000000 }
25     sub FRAMESET_EL () { 0b10000000000 }
26     sub HEADING_EL () { 0b100000000000 }
27     sub HTML_EL () { 0b1000000000000 }
28     sub LI_EL () { 0b10000000000000 }
29     sub NOBR_EL () { 0b100000000000000 }
30     sub OPTION_EL () { 0b1000000000000000 }
31     sub OPTGROUP_EL () { 0b10000000000000000 }
32     sub P_EL () { 0b100000000000000000 }
33     sub SELECT_EL () { 0b1000000000000000000 }
34     sub TABLE_EL () { 0b10000000000000000000 }
35     sub TABLE_CELL_EL () { 0b100000000000000000000 }
36     sub TABLE_ROW_EL () { 0b1000000000000000000000 }
37     sub TABLE_ROW_GROUP_EL () { 0b10000000000000000000000 }
38     sub MISC_SCOPING_EL () { 0b100000000000000000000000 }
39     sub MISC_SPECIAL_EL () { 0b1000000000000000000000000 }
40    
41     sub TABLE_ROWS_EL () {
42     TABLE_EL |
43     TABLE_ROW_EL |
44     TABLE_ROW_GROUP_EL
45     }
46    
47     sub END_TAG_OPTIONAL_EL () {
48     DD_EL |
49     DT_EL |
50     LI_EL |
51     P_EL
52     }
53    
54     sub ALL_END_TAG_OPTIONAL_EL () {
55     END_TAG_OPTIONAL_EL |
56     BODY_EL |
57     HTML_EL |
58     TABLE_CELL_EL |
59     TABLE_ROW_EL |
60     TABLE_ROW_GROUP_EL
61     }
62    
63     sub SCOPING_EL () {
64     BUTTON_EL |
65     CAPTION_EL |
66     HTML_EL |
67     TABLE_EL |
68     TABLE_CELL_EL |
69     MISC_SCOPING_EL
70     }
71    
72     sub TABLE_SCOPING_EL () {
73     HTML_EL |
74     TABLE_EL
75     }
76    
77     sub TABLE_ROWS_SCOPING_EL () {
78     HTML_EL |
79     TABLE_ROW_GROUP_EL
80     }
81    
82     sub TABLE_ROW_SCOPING_EL () {
83     HTML_EL |
84     TABLE_ROW_EL
85     }
86    
87     sub SPECIAL_EL () {
88     ADDRESS_EL |
89     BODY_EL |
90     DIV_EL |
91     END_TAG_OPTIONAL_EL |
92     FORM_EL |
93     FRAMESET_EL |
94     HEADING_EL |
95     OPTION_EL |
96     OPTGROUP_EL |
97     SELECT_EL |
98     TABLE_ROW_EL |
99     TABLE_ROW_GROUP_EL |
100     MISC_SPECIAL_EL
101     }
102    
103     my $el_category = {
104     a => A_EL | FORMATTING_EL,
105     address => ADDRESS_EL,
106     applet => MISC_SCOPING_EL,
107     area => MISC_SPECIAL_EL,
108     b => FORMATTING_EL,
109     base => MISC_SPECIAL_EL,
110     basefont => MISC_SPECIAL_EL,
111     bgsound => MISC_SPECIAL_EL,
112     big => FORMATTING_EL,
113     blockquote => MISC_SPECIAL_EL,
114     body => BODY_EL,
115     br => MISC_SPECIAL_EL,
116     button => BUTTON_EL,
117     caption => CAPTION_EL,
118     center => MISC_SPECIAL_EL,
119     col => MISC_SPECIAL_EL,
120     colgroup => MISC_SPECIAL_EL,
121     dd => DD_EL,
122     dir => MISC_SPECIAL_EL,
123     div => DIV_EL,
124     dl => MISC_SPECIAL_EL,
125     dt => DT_EL,
126     em => FORMATTING_EL,
127     embed => MISC_SPECIAL_EL,
128     fieldset => MISC_SPECIAL_EL,
129     font => FORMATTING_EL,
130     form => FORM_EL,
131     frame => MISC_SPECIAL_EL,
132     frameset => FRAMESET_EL,
133     h1 => HEADING_EL,
134     h2 => HEADING_EL,
135     h3 => HEADING_EL,
136     h4 => HEADING_EL,
137     h5 => HEADING_EL,
138     h6 => HEADING_EL,
139     head => MISC_SPECIAL_EL,
140     hr => MISC_SPECIAL_EL,
141     html => HTML_EL,
142     i => FORMATTING_EL,
143     iframe => MISC_SPECIAL_EL,
144     img => MISC_SPECIAL_EL,
145     input => MISC_SPECIAL_EL,
146     isindex => MISC_SPECIAL_EL,
147     li => LI_EL,
148     link => MISC_SPECIAL_EL,
149     listing => MISC_SPECIAL_EL,
150     marquee => MISC_SCOPING_EL,
151     menu => MISC_SPECIAL_EL,
152     meta => MISC_SPECIAL_EL,
153     nobr => NOBR_EL | FORMATTING_EL,
154     noembed => MISC_SPECIAL_EL,
155     noframes => MISC_SPECIAL_EL,
156     noscript => MISC_SPECIAL_EL,
157     object => MISC_SCOPING_EL,
158     ol => MISC_SPECIAL_EL,
159     optgroup => OPTGROUP_EL,
160     option => OPTION_EL,
161     p => P_EL,
162     param => MISC_SPECIAL_EL,
163     plaintext => MISC_SPECIAL_EL,
164     pre => MISC_SPECIAL_EL,
165     s => FORMATTING_EL,
166     script => MISC_SPECIAL_EL,
167     select => SELECT_EL,
168     small => FORMATTING_EL,
169     spacer => MISC_SPECIAL_EL,
170     strike => FORMATTING_EL,
171     strong => FORMATTING_EL,
172     style => MISC_SPECIAL_EL,
173     table => TABLE_EL,
174     tbody => TABLE_ROW_GROUP_EL,
175     td => TABLE_CELL_EL,
176     textarea => MISC_SPECIAL_EL,
177     tfoot => TABLE_ROW_GROUP_EL,
178     th => TABLE_CELL_EL,
179     thead => TABLE_ROW_GROUP_EL,
180     title => MISC_SPECIAL_EL,
181     tr => TABLE_ROW_EL,
182     tt => FORMATTING_EL,
183     u => FORMATTING_EL,
184     ul => MISC_SPECIAL_EL,
185     wbr => MISC_SPECIAL_EL,
186     };
187    
188 wakaba 1.4 my $c1_entity_char = {
189 wakaba 1.10 0x80 => 0x20AC,
190     0x81 => 0xFFFD,
191     0x82 => 0x201A,
192     0x83 => 0x0192,
193     0x84 => 0x201E,
194     0x85 => 0x2026,
195     0x86 => 0x2020,
196     0x87 => 0x2021,
197     0x88 => 0x02C6,
198     0x89 => 0x2030,
199     0x8A => 0x0160,
200     0x8B => 0x2039,
201     0x8C => 0x0152,
202     0x8D => 0xFFFD,
203     0x8E => 0x017D,
204     0x8F => 0xFFFD,
205     0x90 => 0xFFFD,
206     0x91 => 0x2018,
207     0x92 => 0x2019,
208     0x93 => 0x201C,
209     0x94 => 0x201D,
210     0x95 => 0x2022,
211     0x96 => 0x2013,
212     0x97 => 0x2014,
213     0x98 => 0x02DC,
214     0x99 => 0x2122,
215     0x9A => 0x0161,
216     0x9B => 0x203A,
217     0x9C => 0x0153,
218     0x9D => 0xFFFD,
219     0x9E => 0x017E,
220     0x9F => 0x0178,
221 wakaba 1.4 }; # $c1_entity_char
222 wakaba 1.1
223 wakaba 1.63 sub parse_byte_string ($$$$;$) {
224     my $self = ref $_[0] ? shift : shift->new;
225     my $charset = shift;
226     my $bytes_s = ref $_[0] ? $_[0] : \($_[0]);
227     my $s;
228    
229     if (defined $charset) {
230 wakaba 1.64 require Encode; ## TODO: decode(utf8) don't delete BOM
231 wakaba 1.63 $s = \ (Encode::decode ($charset, $$bytes_s));
232 wakaba 1.64 $self->{input_encoding} = lc $charset; ## TODO: normalize name
233 wakaba 1.63 $self->{confident} = 1;
234     } else {
235 wakaba 1.65 ## TODO: Implement HTML5 detection algorithm
236     require Whatpm::Charset::UniversalCharDet;
237     $charset = Whatpm::Charset::UniversalCharDet->detect_byte_string
238     (substr ($$bytes_s, 0, 1024));
239     $charset ||= 'windows-1252';
240 wakaba 1.64 $s = \ (Encode::decode ($charset, $$bytes_s));
241     $self->{input_encoding} = $charset;
242 wakaba 1.63 $self->{confident} = 0;
243     }
244    
245     $self->{change_encoding} = sub {
246     my $self = shift;
247     my $charset = lc shift;
248 wakaba 1.114 my $token = shift;
249 wakaba 1.63 ## TODO: if $charset is supported
250     ## TODO: normalize charset name
251    
252     ## "Change the encoding" algorithm:
253    
254     ## Step 1
255     if ($charset eq 'utf-16') { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8?
256     $charset = 'utf-8';
257     }
258    
259     ## Step 2
260     if (defined $self->{input_encoding} and
261     $self->{input_encoding} eq $charset) {
262     $self->{confident} = 1;
263     return;
264     }
265    
266 wakaba 1.64 !!!parse-error (type => 'charset label detected:'.$self->{input_encoding}.
267 wakaba 1.114 ':'.$charset, level => 'w', token => $token);
268 wakaba 1.63
269     ## Step 3
270     # if (can) {
271     ## change the encoding on the fly.
272     #$self->{confident} = 1;
273     #return;
274     # }
275    
276     ## Step 4
277     throw Whatpm::HTML::RestartParser (charset => $charset);
278     }; # $self->{change_encoding}
279    
280     my @args = @_; shift @args; # $s
281     my $return;
282     try {
283     $return = $self->parse_char_string ($s, @args);
284     } catch Whatpm::HTML::RestartParser with {
285     my $charset = shift->{charset};
286     $s = \ (Encode::decode ($charset, $$bytes_s));
287 wakaba 1.64 $self->{input_encoding} = $charset; ## TODO: normalize
288 wakaba 1.63 $self->{confident} = 1;
289     $return = $self->parse_char_string ($s, @args);
290     };
291     return $return;
292     } # parse_byte_string
293    
294 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
295     ## and the HTML layer MUST ignore it. However, we does strip BOM in
296     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
297     ## because the core part of our HTML parser expects a string of character,
298     ## not a string of bytes or code units or anything which might contain a BOM.
299     ## Therefore, any parser interface that accepts a string of bytes,
300     ## such as |parse_byte_string| in this module, must ensure that it does
301     ## strip the BOM and never strip any ZWNBSP.
302    
303 wakaba 1.63 *parse_char_string = \&parse_string;
304    
305 wakaba 1.1 sub parse_string ($$$;$) {
306 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
307     my $s = ref $_[0] ? $_[0] : \($_[0]);
308 wakaba 1.1 $self->{document} = $_[1];
309 wakaba 1.63 @{$self->{document}->child_nodes} = ();
310 wakaba 1.1
311 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
312    
313 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
314 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
315     if defined $self->{input_encoding};
316 wakaba 1.63
317 wakaba 1.1 my $i = 0;
318 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
319     $self->{column_prev} = $self->{column} = 0;
320 wakaba 1.76 $self->{set_next_char} = sub {
321 wakaba 1.1 my $self = shift;
322 wakaba 1.13
323 wakaba 1.76 pop @{$self->{prev_char}};
324     unshift @{$self->{prev_char}}, $self->{next_char};
325 wakaba 1.13
326 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
327     $self->{next_char} = ord substr $$s, $i++, 1;
328 wakaba 1.112
329     ($self->{line_prev}, $self->{column_prev})
330     = ($self->{line}, $self->{column});
331     $self->{column}++;
332 wakaba 1.1
333 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
334 wakaba 1.112 $self->{line}++;
335     $self->{column} = 0;
336 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
337 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
338 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
339 wakaba 1.112 $self->{line}++;
340     $self->{column} = 0;
341 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
342     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
343     } elsif ($self->{next_char} == 0x0000) { # NULL
344 wakaba 1.8 !!!parse-error (type => 'NULL');
345 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
346 wakaba 1.1 }
347     };
348 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
349     $self->{next_char} = -1;
350 wakaba 1.1
351 wakaba 1.3 my $onerror = $_[2] || sub {
352     my (%opt) = @_;
353 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
354     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
355     warn "Parse error ($opt{type}) at line $line column $column\n";
356 wakaba 1.3 };
357     $self->{parse_error} = sub {
358 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
359 wakaba 1.1 };
360    
361     $self->_initialize_tokenizer;
362     $self->_initialize_tree_constructor;
363     $self->_construct_tree;
364     $self->_terminate_tree_constructor;
365    
366 wakaba 1.112 delete $self->{parse_error}; # remove loop
367    
368 wakaba 1.1 return $self->{document};
369     } # parse_string
370    
371     sub new ($) {
372     my $class = shift;
373     my $self = bless {}, $class;
374 wakaba 1.76 $self->{set_next_char} = sub {
375     $self->{next_char} = -1;
376 wakaba 1.1 };
377     $self->{parse_error} = sub {
378     #
379     };
380 wakaba 1.63 $self->{change_encoding} = sub {
381     # if ($_[0] is a supported encoding) {
382     # run "change the encoding" algorithm;
383     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
384     # }
385     };
386 wakaba 1.61 $self->{application_cache_selection} = sub {
387     #
388     };
389 wakaba 1.1 return $self;
390     } # new
391    
392 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
393     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
394     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
395    
396     sub PLAINTEXT_CONTENT_MODEL () { 0 }
397     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
398     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
399     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
400    
401 wakaba 1.57 sub DATA_STATE () { 0 }
402     sub ENTITY_DATA_STATE () { 1 }
403     sub TAG_OPEN_STATE () { 2 }
404     sub CLOSE_TAG_OPEN_STATE () { 3 }
405     sub TAG_NAME_STATE () { 4 }
406     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
407     sub ATTRIBUTE_NAME_STATE () { 6 }
408     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
409     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
410     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
411     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
412     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
413     sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
414     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
415     sub COMMENT_START_STATE () { 14 }
416     sub COMMENT_START_DASH_STATE () { 15 }
417     sub COMMENT_STATE () { 16 }
418     sub COMMENT_END_STATE () { 17 }
419     sub COMMENT_END_DASH_STATE () { 18 }
420     sub BOGUS_COMMENT_STATE () { 19 }
421     sub DOCTYPE_STATE () { 20 }
422     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
423     sub DOCTYPE_NAME_STATE () { 22 }
424     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
425     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
426     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
427     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
428     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
429     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
430     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
431     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
432     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
433     sub BOGUS_DOCTYPE_STATE () { 32 }
434 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
435 wakaba 1.125 sub SELF_CLOSING_START_TAG_STATE () { 34 }
436 wakaba 1.57
437 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
438     sub COMMENT_TOKEN () { 2 }
439     sub START_TAG_TOKEN () { 3 }
440     sub END_TAG_TOKEN () { 4 }
441     sub END_OF_FILE_TOKEN () { 5 }
442     sub CHARACTER_TOKEN () { 6 }
443    
444 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
445     sub HEAD_IMS () { 0b1000 }
446     sub BODY_IMS () { 0b10000 }
447 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
448 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
449 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
450 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
451     sub FRAME_IMS () { 0b1000000000 }
452 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
453 wakaba 1.54
454 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
455    
456     ## NOTE: "after after body" insertion mode.
457 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
458 wakaba 1.84
459     ## NOTE: "after after frameset" insertion mode.
460 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
461 wakaba 1.84
462 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
463     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
464     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
465     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
466     sub IN_BODY_IM () { BODY_IMS }
467 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
468     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
469     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
470     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
471 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
472     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
473     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
474     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
475 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
476     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
477 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
478    
479 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
480    
481     sub _initialize_tokenizer ($) {
482     my $self = shift;
483 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
484 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
485 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
486     undef $self->{current_attribute};
487     undef $self->{last_emitted_start_tag_name};
488     undef $self->{last_attribute_value_state};
489 wakaba 1.125 delete $self->{self_closing};
490 wakaba 1.1 $self->{char} = [];
491 wakaba 1.76 # $self->{next_char}
492 wakaba 1.1 !!!next-input-character;
493     $self->{token} = [];
494 wakaba 1.18 # $self->{escape}
495 wakaba 1.1 } # _initialize_tokenizer
496    
497     ## A token has:
498 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
499     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
500     ## ->{name} (DOCTYPE_TOKEN)
501     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
502     ## ->{public_identifier} (DOCTYPE_TOKEN)
503     ## ->{system_identifier} (DOCTYPE_TOKEN)
504 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
505 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
506 wakaba 1.66 ## ->{name}
507     ## ->{value}
508     ## ->{has_reference} == 1 or 0
509 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
510 wakaba 1.125 ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
511     ## |->{self_closing}| is used to save the value of |$self->{self_closing}|
512     ## while the token is pushed back to the stack.
513    
514     ## ISSUE: "When a DOCTYPE token is created, its
515     ## <i>self-closing flag</i> must be unset (its other state is that it
516     ## be set), and its attributes list must be empty.": Wrong subject?
517 wakaba 1.1
518     ## Emitted token MUST immediately be handled by the tree construction state.
519    
520     ## Before each step, UA MAY check to see if either one of the scripts in
521     ## "list of scripts that will execute as soon as possible" or the first
522     ## script in the "list of scripts that will execute asynchronously",
523     ## has completed loading. If one has, then it MUST be executed
524     ## and removed from the list.
525    
526 wakaba 1.59 ## NOTE: HTML5 "Writing HTML documents" section, applied to
527     ## documents and not to user agents and conformance checkers,
528     ## contains some requirements that are not detected by the
529     ## parsing algorithm:
530     ## - Some requirements on character encoding declarations. ## TODO
531     ## - "Elements MUST NOT contain content that their content model disallows."
532     ## ... Some are parse error, some are not (will be reported by c.c.).
533     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
534     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
535     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
536    
537     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
538     ## be detected by the HTML5 parsing algorithm:
539     ## - Text,
540    
541 wakaba 1.1 sub _get_next_token ($) {
542     my $self = shift;
543 wakaba 1.125
544     if ($self->{self_closing}) {
545     !!!parse-error (type => 'nestc', token => $self->{current_token});
546     ## NOTE: The |self_closing| flag is only set by start tag token.
547     ## In addition, when a start tag token is emitted, it is always set to
548     ## |current_token|.
549     delete $self->{self_closing};
550     }
551    
552 wakaba 1.1 if (@{$self->{token}}) {
553 wakaba 1.125 $self->{self_closing} = $self->{token}->[0]->{self_closing};
554 wakaba 1.1 return shift @{$self->{token}};
555     }
556    
557     A: {
558 wakaba 1.57 if ($self->{state} == DATA_STATE) {
559 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
560 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
561     not $self->{escape}) {
562 wakaba 1.77 !!!cp (1);
563 wakaba 1.57 $self->{state} = ENTITY_DATA_STATE;
564 wakaba 1.1 !!!next-input-character;
565     redo A;
566     } else {
567 wakaba 1.77 !!!cp (2);
568 wakaba 1.1 #
569     }
570 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
571 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
572 wakaba 1.13 unless ($self->{escape}) {
573 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
574     $self->{prev_char}->[1] == 0x0021 and # !
575     $self->{prev_char}->[2] == 0x003C) { # <
576 wakaba 1.77 !!!cp (3);
577 wakaba 1.13 $self->{escape} = 1;
578 wakaba 1.77 } else {
579     !!!cp (4);
580 wakaba 1.13 }
581 wakaba 1.77 } else {
582     !!!cp (5);
583 wakaba 1.13 }
584     }
585    
586     #
587 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
588 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
589     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
590 wakaba 1.13 not $self->{escape})) {
591 wakaba 1.77 !!!cp (6);
592 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
593 wakaba 1.1 !!!next-input-character;
594     redo A;
595     } else {
596 wakaba 1.77 !!!cp (7);
597 wakaba 1.1 #
598     }
599 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
600 wakaba 1.13 if ($self->{escape} and
601 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
602 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
603     $self->{prev_char}->[1] == 0x002D) { # -
604 wakaba 1.77 !!!cp (8);
605 wakaba 1.13 delete $self->{escape};
606 wakaba 1.77 } else {
607     !!!cp (9);
608 wakaba 1.13 }
609 wakaba 1.77 } else {
610     !!!cp (10);
611 wakaba 1.13 }
612    
613     #
614 wakaba 1.76 } elsif ($self->{next_char} == -1) {
615 wakaba 1.77 !!!cp (11);
616 wakaba 1.112 !!!emit ({type => END_OF_FILE_TOKEN,
617     line => $self->{line}, column => $self->{column}});
618 wakaba 1.1 last A; ## TODO: ok?
619 wakaba 1.77 } else {
620     !!!cp (12);
621 wakaba 1.1 }
622     # Anything else
623 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
624 wakaba 1.112 data => chr $self->{next_char},
625 wakaba 1.120 line => $self->{line}, column => $self->{column},
626 wakaba 1.118 };
627 wakaba 1.1 ## Stay in the data state
628     !!!next-input-character;
629    
630     !!!emit ($token);
631    
632     redo A;
633 wakaba 1.57 } elsif ($self->{state} == ENTITY_DATA_STATE) {
634 wakaba 1.1 ## (cannot happen in CDATA state)
635 wakaba 1.112
636 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
637 wakaba 1.1
638 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
639 wakaba 1.1
640 wakaba 1.57 $self->{state} = DATA_STATE;
641 wakaba 1.1 # next-input-character is already done
642    
643     unless (defined $token) {
644 wakaba 1.77 !!!cp (13);
645 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '&',
646 wakaba 1.120 line => $l, column => $c,
647 wakaba 1.118 });
648 wakaba 1.1 } else {
649 wakaba 1.77 !!!cp (14);
650 wakaba 1.1 !!!emit ($token);
651     }
652    
653     redo A;
654 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
655 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
656 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
657 wakaba 1.77 !!!cp (15);
658 wakaba 1.1 !!!next-input-character;
659 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
660 wakaba 1.1 redo A;
661     } else {
662 wakaba 1.77 !!!cp (16);
663 wakaba 1.1 ## reconsume
664 wakaba 1.57 $self->{state} = DATA_STATE;
665 wakaba 1.1
666 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
667 wakaba 1.120 line => $self->{line_prev},
668     column => $self->{column_prev},
669 wakaba 1.118 });
670 wakaba 1.1
671     redo A;
672     }
673 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
674 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
675 wakaba 1.77 !!!cp (17);
676 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
677 wakaba 1.1 !!!next-input-character;
678     redo A;
679 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
680 wakaba 1.77 !!!cp (18);
681 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
682 wakaba 1.1 !!!next-input-character;
683     redo A;
684 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
685     $self->{next_char} <= 0x005A) { # A..Z
686 wakaba 1.77 !!!cp (19);
687 wakaba 1.1 $self->{current_token}
688 wakaba 1.55 = {type => START_TAG_TOKEN,
689 wakaba 1.112 tag_name => chr ($self->{next_char} + 0x0020),
690     line => $self->{line_prev},
691     column => $self->{column_prev}};
692 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
693 wakaba 1.1 !!!next-input-character;
694     redo A;
695 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
696     $self->{next_char} <= 0x007A) { # a..z
697 wakaba 1.77 !!!cp (20);
698 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
699 wakaba 1.112 tag_name => chr ($self->{next_char}),
700     line => $self->{line_prev},
701     column => $self->{column_prev}};
702 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
703 wakaba 1.1 !!!next-input-character;
704     redo A;
705 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
706 wakaba 1.77 !!!cp (21);
707 wakaba 1.115 !!!parse-error (type => 'empty start tag',
708     line => $self->{line_prev},
709     column => $self->{column_prev});
710 wakaba 1.57 $self->{state} = DATA_STATE;
711 wakaba 1.1 !!!next-input-character;
712    
713 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
714 wakaba 1.120 line => $self->{line_prev},
715     column => $self->{column_prev},
716 wakaba 1.118 });
717 wakaba 1.1
718     redo A;
719 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
720 wakaba 1.77 !!!cp (22);
721 wakaba 1.115 !!!parse-error (type => 'pio',
722     line => $self->{line_prev},
723     column => $self->{column_prev});
724 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
725 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
726 wakaba 1.120 line => $self->{line_prev},
727     column => $self->{column_prev},
728 wakaba 1.118 };
729 wakaba 1.76 ## $self->{next_char} is intentionally left as is
730 wakaba 1.1 redo A;
731     } else {
732 wakaba 1.77 !!!cp (23);
733 wakaba 1.3 !!!parse-error (type => 'bare stago');
734 wakaba 1.57 $self->{state} = DATA_STATE;
735 wakaba 1.1 ## reconsume
736    
737 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
738 wakaba 1.120 line => $self->{line_prev},
739     column => $self->{column_prev},
740 wakaba 1.118 });
741 wakaba 1.1
742     redo A;
743     }
744     } else {
745 wakaba 1.40 die "$0: $self->{content_model} in tag open";
746 wakaba 1.1 }
747 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
748 wakaba 1.113 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
749 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
750 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
751 wakaba 1.112
752 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
753 wakaba 1.23 my @next_char;
754     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
755 wakaba 1.76 push @next_char, $self->{next_char};
756 wakaba 1.23 my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
757     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
758 wakaba 1.76 if ($self->{next_char} == $c or $self->{next_char} == $C) {
759 wakaba 1.77 !!!cp (24);
760 wakaba 1.23 !!!next-input-character;
761     next TAGNAME;
762     } else {
763 wakaba 1.77 !!!cp (25);
764 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
765 wakaba 1.23 !!!back-next-input-character (@next_char);
766 wakaba 1.57 $self->{state} = DATA_STATE;
767 wakaba 1.23
768 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
769 wakaba 1.120 line => $l, column => $c,
770 wakaba 1.118 });
771 wakaba 1.23
772     redo A;
773     }
774     }
775 wakaba 1.76 push @next_char, $self->{next_char};
776 wakaba 1.23
777 wakaba 1.76 unless ($self->{next_char} == 0x0009 or # HT
778     $self->{next_char} == 0x000A or # LF
779     $self->{next_char} == 0x000B or # VT
780     $self->{next_char} == 0x000C or # FF
781     $self->{next_char} == 0x0020 or # SP
782     $self->{next_char} == 0x003E or # >
783     $self->{next_char} == 0x002F or # /
784     $self->{next_char} == -1) {
785 wakaba 1.77 !!!cp (26);
786 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
787 wakaba 1.1 !!!back-next-input-character (@next_char);
788 wakaba 1.57 $self->{state} = DATA_STATE;
789 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
790 wakaba 1.120 line => $l, column => $c,
791 wakaba 1.118 });
792 wakaba 1.1 redo A;
793 wakaba 1.23 } else {
794 wakaba 1.77 !!!cp (27);
795 wakaba 1.76 $self->{next_char} = shift @next_char;
796 wakaba 1.23 !!!back-next-input-character (@next_char);
797     # and consume...
798 wakaba 1.1 }
799 wakaba 1.23 } else {
800     ## No start tag token has ever been emitted
801 wakaba 1.77 !!!cp (28);
802 wakaba 1.23 # next-input-character is already done
803 wakaba 1.57 $self->{state} = DATA_STATE;
804 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
805 wakaba 1.120 line => $l, column => $c,
806 wakaba 1.118 });
807 wakaba 1.1 redo A;
808     }
809     }
810    
811 wakaba 1.76 if (0x0041 <= $self->{next_char} and
812     $self->{next_char} <= 0x005A) { # A..Z
813 wakaba 1.77 !!!cp (29);
814 wakaba 1.112 $self->{current_token}
815     = {type => END_TAG_TOKEN,
816     tag_name => chr ($self->{next_char} + 0x0020),
817     line => $l, column => $c};
818 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
819 wakaba 1.1 !!!next-input-character;
820     redo A;
821 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
822     $self->{next_char} <= 0x007A) { # a..z
823 wakaba 1.77 !!!cp (30);
824 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
825 wakaba 1.112 tag_name => chr ($self->{next_char}),
826     line => $l, column => $c};
827 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
828 wakaba 1.1 !!!next-input-character;
829     redo A;
830 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
831 wakaba 1.77 !!!cp (31);
832 wakaba 1.115 !!!parse-error (type => 'empty end tag',
833     line => $self->{line_prev}, ## "<" in "</>"
834     column => $self->{column_prev} - 1);
835 wakaba 1.57 $self->{state} = DATA_STATE;
836 wakaba 1.1 !!!next-input-character;
837     redo A;
838 wakaba 1.76 } elsif ($self->{next_char} == -1) {
839 wakaba 1.77 !!!cp (32);
840 wakaba 1.3 !!!parse-error (type => 'bare etago');
841 wakaba 1.57 $self->{state} = DATA_STATE;
842 wakaba 1.1 # reconsume
843    
844 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
845 wakaba 1.120 line => $l, column => $c,
846 wakaba 1.118 });
847 wakaba 1.1
848     redo A;
849     } else {
850 wakaba 1.77 !!!cp (33);
851 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
852 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
853 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
854 wakaba 1.120 line => $self->{line_prev}, # "<" of "</"
855     column => $self->{column_prev} - 1,
856 wakaba 1.118 };
857 wakaba 1.76 ## $self->{next_char} is intentionally left as is
858 wakaba 1.1 redo A;
859     }
860 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
861 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
862     $self->{next_char} == 0x000A or # LF
863     $self->{next_char} == 0x000B or # VT
864     $self->{next_char} == 0x000C or # FF
865     $self->{next_char} == 0x0020) { # SP
866 wakaba 1.77 !!!cp (34);
867 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
868 wakaba 1.1 !!!next-input-character;
869     redo A;
870 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
871 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
872 wakaba 1.77 !!!cp (35);
873 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
874 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
875 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
876 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
877     # ## NOTE: This should never be reached.
878     # !!! cp (36);
879     # !!! parse-error (type => 'end tag attribute');
880     #} else {
881 wakaba 1.77 !!!cp (37);
882 wakaba 1.78 #}
883 wakaba 1.1 } else {
884     die "$0: $self->{current_token}->{type}: Unknown token type";
885     }
886 wakaba 1.57 $self->{state} = DATA_STATE;
887 wakaba 1.1 !!!next-input-character;
888    
889     !!!emit ($self->{current_token}); # start tag or end tag
890    
891     redo A;
892 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
893     $self->{next_char} <= 0x005A) { # A..Z
894 wakaba 1.77 !!!cp (38);
895 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
896 wakaba 1.1 # start tag or end tag
897     ## Stay in this state
898     !!!next-input-character;
899     redo A;
900 wakaba 1.76 } elsif ($self->{next_char} == -1) {
901 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
902 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
903 wakaba 1.77 !!!cp (39);
904 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
905 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
906 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
907 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
908     # ## NOTE: This state should never be reached.
909     # !!! cp (40);
910     # !!! parse-error (type => 'end tag attribute');
911     #} else {
912 wakaba 1.77 !!!cp (41);
913 wakaba 1.78 #}
914 wakaba 1.1 } else {
915     die "$0: $self->{current_token}->{type}: Unknown token type";
916     }
917 wakaba 1.57 $self->{state} = DATA_STATE;
918 wakaba 1.1 # reconsume
919    
920     !!!emit ($self->{current_token}); # start tag or end tag
921    
922     redo A;
923 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
924 wakaba 1.125 !!!cp (42);
925     $self->{state} = SELF_CLOSING_START_TAG_STATE;
926 wakaba 1.1 !!!next-input-character;
927     redo A;
928     } else {
929 wakaba 1.77 !!!cp (44);
930 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
931 wakaba 1.1 # start tag or end tag
932     ## Stay in the state
933     !!!next-input-character;
934     redo A;
935     }
936 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
937 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
938     $self->{next_char} == 0x000A or # LF
939     $self->{next_char} == 0x000B or # VT
940     $self->{next_char} == 0x000C or # FF
941     $self->{next_char} == 0x0020) { # SP
942 wakaba 1.77 !!!cp (45);
943 wakaba 1.1 ## Stay in the state
944     !!!next-input-character;
945     redo A;
946 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
947 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
948 wakaba 1.77 !!!cp (46);
949 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
950 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
951 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
952 wakaba 1.1 if ($self->{current_token}->{attributes}) {
953 wakaba 1.77 !!!cp (47);
954 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
955 wakaba 1.77 } else {
956     !!!cp (48);
957 wakaba 1.1 }
958     } else {
959     die "$0: $self->{current_token}->{type}: Unknown token type";
960     }
961 wakaba 1.57 $self->{state} = DATA_STATE;
962 wakaba 1.1 !!!next-input-character;
963    
964     !!!emit ($self->{current_token}); # start tag or end tag
965    
966     redo A;
967 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
968     $self->{next_char} <= 0x005A) { # A..Z
969 wakaba 1.77 !!!cp (49);
970 wakaba 1.119 $self->{current_attribute}
971     = {name => chr ($self->{next_char} + 0x0020),
972     value => '',
973     line => $self->{line}, column => $self->{column}};
974 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
975 wakaba 1.1 !!!next-input-character;
976     redo A;
977 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
978 wakaba 1.125 !!!cp (50);
979     $self->{state} = SELF_CLOSING_START_TAG_STATE;
980 wakaba 1.1 !!!next-input-character;
981     redo A;
982 wakaba 1.76 } elsif ($self->{next_char} == -1) {
983 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
984 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
985 wakaba 1.77 !!!cp (52);
986 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
987 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
988 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
989 wakaba 1.1 if ($self->{current_token}->{attributes}) {
990 wakaba 1.77 !!!cp (53);
991 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
992 wakaba 1.77 } else {
993     !!!cp (54);
994 wakaba 1.1 }
995     } else {
996     die "$0: $self->{current_token}->{type}: Unknown token type";
997     }
998 wakaba 1.57 $self->{state} = DATA_STATE;
999 wakaba 1.1 # reconsume
1000    
1001     !!!emit ($self->{current_token}); # start tag or end tag
1002    
1003     redo A;
1004     } else {
1005 wakaba 1.72 if ({
1006     0x0022 => 1, # "
1007     0x0027 => 1, # '
1008     0x003D => 1, # =
1009 wakaba 1.76 }->{$self->{next_char}}) {
1010 wakaba 1.77 !!!cp (55);
1011 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1012 wakaba 1.77 } else {
1013     !!!cp (56);
1014 wakaba 1.72 }
1015 wakaba 1.119 $self->{current_attribute}
1016     = {name => chr ($self->{next_char}),
1017     value => '',
1018     line => $self->{line}, column => $self->{column}};
1019 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1020 wakaba 1.1 !!!next-input-character;
1021     redo A;
1022     }
1023 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1024 wakaba 1.1 my $before_leave = sub {
1025     if (exists $self->{current_token}->{attributes} # start tag or end tag
1026     ->{$self->{current_attribute}->{name}}) { # MUST
1027 wakaba 1.77 !!!cp (57);
1028 wakaba 1.120 !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
1029 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
1030     } else {
1031 wakaba 1.77 !!!cp (58);
1032 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
1033     = $self->{current_attribute};
1034     }
1035     }; # $before_leave
1036    
1037 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1038     $self->{next_char} == 0x000A or # LF
1039     $self->{next_char} == 0x000B or # VT
1040     $self->{next_char} == 0x000C or # FF
1041     $self->{next_char} == 0x0020) { # SP
1042 wakaba 1.77 !!!cp (59);
1043 wakaba 1.1 $before_leave->();
1044 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
1045 wakaba 1.1 !!!next-input-character;
1046     redo A;
1047 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1048 wakaba 1.77 !!!cp (60);
1049 wakaba 1.1 $before_leave->();
1050 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1051 wakaba 1.1 !!!next-input-character;
1052     redo A;
1053 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1054 wakaba 1.1 $before_leave->();
1055 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1056 wakaba 1.77 !!!cp (61);
1057 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1058 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1059 wakaba 1.77 !!!cp (62);
1060 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1061 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1062 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1063 wakaba 1.1 }
1064     } else {
1065     die "$0: $self->{current_token}->{type}: Unknown token type";
1066     }
1067 wakaba 1.57 $self->{state} = DATA_STATE;
1068 wakaba 1.1 !!!next-input-character;
1069    
1070     !!!emit ($self->{current_token}); # start tag or end tag
1071    
1072     redo A;
1073 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1074     $self->{next_char} <= 0x005A) { # A..Z
1075 wakaba 1.77 !!!cp (63);
1076 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
1077 wakaba 1.1 ## Stay in the state
1078     !!!next-input-character;
1079     redo A;
1080 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1081 wakaba 1.125 !!!cp (64);
1082 wakaba 1.1 $before_leave->();
1083 wakaba 1.125 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1084 wakaba 1.1 !!!next-input-character;
1085     redo A;
1086 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1087 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1088 wakaba 1.1 $before_leave->();
1089 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1090 wakaba 1.77 !!!cp (66);
1091 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1092 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1093 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1094 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1095 wakaba 1.77 !!!cp (67);
1096 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1097 wakaba 1.77 } else {
1098 wakaba 1.78 ## NOTE: This state should never be reached.
1099 wakaba 1.77 !!!cp (68);
1100 wakaba 1.1 }
1101     } else {
1102     die "$0: $self->{current_token}->{type}: Unknown token type";
1103     }
1104 wakaba 1.57 $self->{state} = DATA_STATE;
1105 wakaba 1.1 # reconsume
1106    
1107     !!!emit ($self->{current_token}); # start tag or end tag
1108    
1109     redo A;
1110     } else {
1111 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
1112     $self->{next_char} == 0x0027) { # '
1113 wakaba 1.77 !!!cp (69);
1114 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1115 wakaba 1.77 } else {
1116     !!!cp (70);
1117 wakaba 1.72 }
1118 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
1119 wakaba 1.1 ## Stay in the state
1120     !!!next-input-character;
1121     redo A;
1122     }
1123 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1124 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1125     $self->{next_char} == 0x000A or # LF
1126     $self->{next_char} == 0x000B or # VT
1127     $self->{next_char} == 0x000C or # FF
1128     $self->{next_char} == 0x0020) { # SP
1129 wakaba 1.77 !!!cp (71);
1130 wakaba 1.1 ## Stay in the state
1131     !!!next-input-character;
1132     redo A;
1133 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1134 wakaba 1.77 !!!cp (72);
1135 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1136 wakaba 1.1 !!!next-input-character;
1137     redo A;
1138 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1139 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1140 wakaba 1.77 !!!cp (73);
1141 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1142 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1143 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1144 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1145 wakaba 1.77 !!!cp (74);
1146 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1147 wakaba 1.77 } else {
1148 wakaba 1.78 ## NOTE: This state should never be reached.
1149 wakaba 1.77 !!!cp (75);
1150 wakaba 1.1 }
1151     } else {
1152     die "$0: $self->{current_token}->{type}: Unknown token type";
1153     }
1154 wakaba 1.57 $self->{state} = DATA_STATE;
1155 wakaba 1.1 !!!next-input-character;
1156    
1157     !!!emit ($self->{current_token}); # start tag or end tag
1158    
1159     redo A;
1160 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1161     $self->{next_char} <= 0x005A) { # A..Z
1162 wakaba 1.77 !!!cp (76);
1163 wakaba 1.119 $self->{current_attribute}
1164     = {name => chr ($self->{next_char} + 0x0020),
1165     value => '',
1166     line => $self->{line}, column => $self->{column}};
1167 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1168 wakaba 1.1 !!!next-input-character;
1169     redo A;
1170 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1171 wakaba 1.125 !!!cp (77);
1172     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1173 wakaba 1.1 !!!next-input-character;
1174     redo A;
1175 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1176 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1177 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1178 wakaba 1.77 !!!cp (79);
1179 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1180 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1181 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1182 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1183 wakaba 1.77 !!!cp (80);
1184 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1185 wakaba 1.77 } else {
1186 wakaba 1.78 ## NOTE: This state should never be reached.
1187 wakaba 1.77 !!!cp (81);
1188 wakaba 1.1 }
1189     } else {
1190     die "$0: $self->{current_token}->{type}: Unknown token type";
1191     }
1192 wakaba 1.57 $self->{state} = DATA_STATE;
1193 wakaba 1.1 # reconsume
1194    
1195     !!!emit ($self->{current_token}); # start tag or end tag
1196    
1197     redo A;
1198     } else {
1199 wakaba 1.77 !!!cp (82);
1200 wakaba 1.119 $self->{current_attribute}
1201     = {name => chr ($self->{next_char}),
1202     value => '',
1203     line => $self->{line}, column => $self->{column}};
1204 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1205 wakaba 1.1 !!!next-input-character;
1206     redo A;
1207     }
1208 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1209 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1210     $self->{next_char} == 0x000A or # LF
1211     $self->{next_char} == 0x000B or # VT
1212     $self->{next_char} == 0x000C or # FF
1213     $self->{next_char} == 0x0020) { # SP
1214 wakaba 1.77 !!!cp (83);
1215 wakaba 1.1 ## Stay in the state
1216     !!!next-input-character;
1217     redo A;
1218 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1219 wakaba 1.77 !!!cp (84);
1220 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1221 wakaba 1.1 !!!next-input-character;
1222     redo A;
1223 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1224 wakaba 1.77 !!!cp (85);
1225 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1226 wakaba 1.1 ## reconsume
1227     redo A;
1228 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1229 wakaba 1.77 !!!cp (86);
1230 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1231 wakaba 1.1 !!!next-input-character;
1232     redo A;
1233 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1234 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1235 wakaba 1.77 !!!cp (87);
1236 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1237 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1238 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1239 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1240 wakaba 1.77 !!!cp (88);
1241 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1242 wakaba 1.77 } else {
1243 wakaba 1.78 ## NOTE: This state should never be reached.
1244 wakaba 1.77 !!!cp (89);
1245 wakaba 1.1 }
1246     } else {
1247     die "$0: $self->{current_token}->{type}: Unknown token type";
1248     }
1249 wakaba 1.57 $self->{state} = DATA_STATE;
1250 wakaba 1.1 !!!next-input-character;
1251    
1252     !!!emit ($self->{current_token}); # start tag or end tag
1253    
1254     redo A;
1255 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1256 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1257 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1258 wakaba 1.77 !!!cp (90);
1259 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1260 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1261 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1262 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1263 wakaba 1.77 !!!cp (91);
1264 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1265 wakaba 1.77 } else {
1266 wakaba 1.78 ## NOTE: This state should never be reached.
1267 wakaba 1.77 !!!cp (92);
1268 wakaba 1.1 }
1269     } else {
1270     die "$0: $self->{current_token}->{type}: Unknown token type";
1271     }
1272 wakaba 1.57 $self->{state} = DATA_STATE;
1273 wakaba 1.1 ## reconsume
1274    
1275     !!!emit ($self->{current_token}); # start tag or end tag
1276    
1277     redo A;
1278     } else {
1279 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1280 wakaba 1.77 !!!cp (93);
1281 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1282 wakaba 1.77 } else {
1283     !!!cp (94);
1284 wakaba 1.72 }
1285 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1286 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1287 wakaba 1.1 !!!next-input-character;
1288     redo A;
1289     }
1290 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1291 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1292 wakaba 1.77 !!!cp (95);
1293 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1294 wakaba 1.1 !!!next-input-character;
1295     redo A;
1296 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1297 wakaba 1.77 !!!cp (96);
1298 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1299     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1300 wakaba 1.1 !!!next-input-character;
1301     redo A;
1302 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1303 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1304 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1305 wakaba 1.77 !!!cp (97);
1306 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1307 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1308 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1309 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1310 wakaba 1.77 !!!cp (98);
1311 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1312 wakaba 1.77 } else {
1313 wakaba 1.78 ## NOTE: This state should never be reached.
1314 wakaba 1.77 !!!cp (99);
1315 wakaba 1.1 }
1316     } else {
1317     die "$0: $self->{current_token}->{type}: Unknown token type";
1318     }
1319 wakaba 1.57 $self->{state} = DATA_STATE;
1320 wakaba 1.1 ## reconsume
1321    
1322     !!!emit ($self->{current_token}); # start tag or end tag
1323    
1324     redo A;
1325     } else {
1326 wakaba 1.77 !!!cp (100);
1327 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1328 wakaba 1.1 ## Stay in the state
1329     !!!next-input-character;
1330     redo A;
1331     }
1332 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1333 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1334 wakaba 1.77 !!!cp (101);
1335 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1336 wakaba 1.1 !!!next-input-character;
1337     redo A;
1338 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1339 wakaba 1.77 !!!cp (102);
1340 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1341     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1342 wakaba 1.1 !!!next-input-character;
1343     redo A;
1344 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1345 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1346 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1347 wakaba 1.77 !!!cp (103);
1348 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1349 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1350 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1351 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1352 wakaba 1.77 !!!cp (104);
1353 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1354 wakaba 1.77 } else {
1355 wakaba 1.78 ## NOTE: This state should never be reached.
1356 wakaba 1.77 !!!cp (105);
1357 wakaba 1.1 }
1358     } else {
1359     die "$0: $self->{current_token}->{type}: Unknown token type";
1360     }
1361 wakaba 1.57 $self->{state} = DATA_STATE;
1362 wakaba 1.1 ## reconsume
1363    
1364     !!!emit ($self->{current_token}); # start tag or end tag
1365    
1366     redo A;
1367     } else {
1368 wakaba 1.77 !!!cp (106);
1369 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1370 wakaba 1.1 ## Stay in the state
1371     !!!next-input-character;
1372     redo A;
1373     }
1374 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1375 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1376     $self->{next_char} == 0x000A or # LF
1377     $self->{next_char} == 0x000B or # HT
1378     $self->{next_char} == 0x000C or # FF
1379     $self->{next_char} == 0x0020) { # SP
1380 wakaba 1.77 !!!cp (107);
1381 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1382 wakaba 1.1 !!!next-input-character;
1383     redo A;
1384 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1385 wakaba 1.77 !!!cp (108);
1386 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1387     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1388 wakaba 1.1 !!!next-input-character;
1389     redo A;
1390 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1391 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1392 wakaba 1.77 !!!cp (109);
1393 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1394 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1395 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1396 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1397 wakaba 1.77 !!!cp (110);
1398 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1399 wakaba 1.77 } else {
1400 wakaba 1.78 ## NOTE: This state should never be reached.
1401 wakaba 1.77 !!!cp (111);
1402 wakaba 1.1 }
1403     } else {
1404     die "$0: $self->{current_token}->{type}: Unknown token type";
1405     }
1406 wakaba 1.57 $self->{state} = DATA_STATE;
1407 wakaba 1.1 !!!next-input-character;
1408    
1409     !!!emit ($self->{current_token}); # start tag or end tag
1410    
1411     redo A;
1412 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1413 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1414 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1415 wakaba 1.77 !!!cp (112);
1416 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1417 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1418 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1419 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1420 wakaba 1.77 !!!cp (113);
1421 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1422 wakaba 1.77 } else {
1423 wakaba 1.78 ## NOTE: This state should never be reached.
1424 wakaba 1.77 !!!cp (114);
1425 wakaba 1.1 }
1426     } else {
1427     die "$0: $self->{current_token}->{type}: Unknown token type";
1428     }
1429 wakaba 1.57 $self->{state} = DATA_STATE;
1430 wakaba 1.1 ## reconsume
1431    
1432     !!!emit ($self->{current_token}); # start tag or end tag
1433    
1434     redo A;
1435     } else {
1436 wakaba 1.72 if ({
1437     0x0022 => 1, # "
1438     0x0027 => 1, # '
1439     0x003D => 1, # =
1440 wakaba 1.76 }->{$self->{next_char}}) {
1441 wakaba 1.77 !!!cp (115);
1442 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1443 wakaba 1.77 } else {
1444     !!!cp (116);
1445 wakaba 1.72 }
1446 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1447 wakaba 1.1 ## Stay in the state
1448     !!!next-input-character;
1449     redo A;
1450     }
1451 wakaba 1.57 } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1452 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity
1453     (1,
1454     $self->{last_attribute_value_state}
1455     == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # "
1456     $self->{last_attribute_value_state}
1457     == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # '
1458     -1);
1459 wakaba 1.1
1460     unless (defined $token) {
1461 wakaba 1.77 !!!cp (117);
1462 wakaba 1.1 $self->{current_attribute}->{value} .= '&';
1463     } else {
1464 wakaba 1.77 !!!cp (118);
1465 wakaba 1.1 $self->{current_attribute}->{value} .= $token->{data};
1466 wakaba 1.66 $self->{current_attribute}->{has_reference} = $token->{has_reference};
1467 wakaba 1.1 ## ISSUE: spec says "append the returned character token to the current attribute's value"
1468     }
1469    
1470     $self->{state} = $self->{last_attribute_value_state};
1471     # next-input-character is already done
1472     redo A;
1473 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1474 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1475     $self->{next_char} == 0x000A or # LF
1476     $self->{next_char} == 0x000B or # VT
1477     $self->{next_char} == 0x000C or # FF
1478     $self->{next_char} == 0x0020) { # SP
1479 wakaba 1.77 !!!cp (118);
1480 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1481     !!!next-input-character;
1482     redo A;
1483 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1484 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1485 wakaba 1.77 !!!cp (119);
1486 wakaba 1.72 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1487     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1488     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1489     if ($self->{current_token}->{attributes}) {
1490 wakaba 1.77 !!!cp (120);
1491 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1492 wakaba 1.77 } else {
1493 wakaba 1.78 ## NOTE: This state should never be reached.
1494 wakaba 1.77 !!!cp (121);
1495 wakaba 1.72 }
1496     } else {
1497     die "$0: $self->{current_token}->{type}: Unknown token type";
1498     }
1499     $self->{state} = DATA_STATE;
1500     !!!next-input-character;
1501    
1502     !!!emit ($self->{current_token}); # start tag or end tag
1503    
1504     redo A;
1505 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1506 wakaba 1.125 !!!cp (122);
1507     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1508 wakaba 1.72 !!!next-input-character;
1509 wakaba 1.125 redo A;
1510     } else {
1511     !!!cp ('124.1');
1512     !!!parse-error (type => 'no space between attributes');
1513     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1514     ## reconsume
1515     redo A;
1516     }
1517     } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
1518     if ($self->{next_char} == 0x003E) { # >
1519     if ($self->{current_token}->{type} == END_TAG_TOKEN) {
1520     !!!cp ('124.2');
1521     !!!parse-error (type => 'nestc', token => $self->{current_token});
1522     ## TODO: Different type than slash in start tag
1523     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1524     if ($self->{current_token}->{attributes}) {
1525     !!!cp ('124.4');
1526     !!!parse-error (type => 'end tag attribute');
1527     } else {
1528     !!!cp ('124.5');
1529     }
1530     ## TODO: Test |<title></title/>|
1531 wakaba 1.72 } else {
1532 wakaba 1.125 !!!cp ('124.3');
1533     $self->{self_closing} = 1;
1534 wakaba 1.72 }
1535 wakaba 1.125
1536     $self->{state} = DATA_STATE;
1537     !!!next-input-character;
1538    
1539     !!!emit ($self->{current_token}); # start tag or end tag
1540    
1541 wakaba 1.72 redo A;
1542     } else {
1543 wakaba 1.125 !!!cp ('124.4');
1544     !!!parse-error (type => 'nestc');
1545     ## TODO: This error type is wrong.
1546 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1547 wakaba 1.125 ## Reconsume.
1548 wakaba 1.72 redo A;
1549     }
1550 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1551 wakaba 1.1 ## (only happen if PCDATA state)
1552    
1553 wakaba 1.112 ## NOTE: Set by the previous state
1554     #my $token = {type => COMMENT_TOKEN, data => ''};
1555 wakaba 1.1
1556     BC: {
1557 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1558 wakaba 1.77 !!!cp (124);
1559 wakaba 1.57 $self->{state} = DATA_STATE;
1560 wakaba 1.1 !!!next-input-character;
1561    
1562 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1563 wakaba 1.1
1564     redo A;
1565 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1566 wakaba 1.77 !!!cp (125);
1567 wakaba 1.57 $self->{state} = DATA_STATE;
1568 wakaba 1.1 ## reconsume
1569    
1570 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1571 wakaba 1.1
1572     redo A;
1573     } else {
1574 wakaba 1.77 !!!cp (126);
1575 wakaba 1.112 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1576 wakaba 1.1 !!!next-input-character;
1577     redo BC;
1578     }
1579     } # BC
1580 wakaba 1.77
1581     die "$0: _get_next_token: unexpected case [BC]";
1582 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1583 wakaba 1.1 ## (only happen if PCDATA state)
1584    
1585 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1);
1586 wakaba 1.112
1587 wakaba 1.1 my @next_char;
1588 wakaba 1.76 push @next_char, $self->{next_char};
1589 wakaba 1.1
1590 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1591 wakaba 1.1 !!!next-input-character;
1592 wakaba 1.76 push @next_char, $self->{next_char};
1593     if ($self->{next_char} == 0x002D) { # -
1594 wakaba 1.77 !!!cp (127);
1595 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1596 wakaba 1.120 line => $l, column => $c,
1597 wakaba 1.118 };
1598 wakaba 1.57 $self->{state} = COMMENT_START_STATE;
1599 wakaba 1.1 !!!next-input-character;
1600     redo A;
1601 wakaba 1.77 } else {
1602     !!!cp (128);
1603 wakaba 1.1 }
1604 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
1605     $self->{next_char} == 0x0064) { # d
1606 wakaba 1.1 !!!next-input-character;
1607 wakaba 1.76 push @next_char, $self->{next_char};
1608     if ($self->{next_char} == 0x004F or # O
1609     $self->{next_char} == 0x006F) { # o
1610 wakaba 1.1 !!!next-input-character;
1611 wakaba 1.76 push @next_char, $self->{next_char};
1612     if ($self->{next_char} == 0x0043 or # C
1613     $self->{next_char} == 0x0063) { # c
1614 wakaba 1.1 !!!next-input-character;
1615 wakaba 1.76 push @next_char, $self->{next_char};
1616     if ($self->{next_char} == 0x0054 or # T
1617     $self->{next_char} == 0x0074) { # t
1618 wakaba 1.1 !!!next-input-character;
1619 wakaba 1.76 push @next_char, $self->{next_char};
1620     if ($self->{next_char} == 0x0059 or # Y
1621     $self->{next_char} == 0x0079) { # y
1622 wakaba 1.1 !!!next-input-character;
1623 wakaba 1.76 push @next_char, $self->{next_char};
1624     if ($self->{next_char} == 0x0050 or # P
1625     $self->{next_char} == 0x0070) { # p
1626 wakaba 1.1 !!!next-input-character;
1627 wakaba 1.76 push @next_char, $self->{next_char};
1628     if ($self->{next_char} == 0x0045 or # E
1629     $self->{next_char} == 0x0065) { # e
1630 wakaba 1.77 !!!cp (129);
1631     ## TODO: What a stupid code this is!
1632 wakaba 1.57 $self->{state} = DOCTYPE_STATE;
1633 wakaba 1.112 $self->{current_token} = {type => DOCTYPE_TOKEN,
1634     quirks => 1,
1635 wakaba 1.120 line => $l, column => $c,
1636 wakaba 1.118 };
1637 wakaba 1.1 !!!next-input-character;
1638     redo A;
1639 wakaba 1.77 } else {
1640     !!!cp (130);
1641 wakaba 1.1 }
1642 wakaba 1.77 } else {
1643     !!!cp (131);
1644 wakaba 1.1 }
1645 wakaba 1.77 } else {
1646     !!!cp (132);
1647 wakaba 1.1 }
1648 wakaba 1.77 } else {
1649     !!!cp (133);
1650 wakaba 1.1 }
1651 wakaba 1.77 } else {
1652     !!!cp (134);
1653 wakaba 1.1 }
1654 wakaba 1.77 } else {
1655     !!!cp (135);
1656 wakaba 1.1 }
1657 wakaba 1.77 } else {
1658     !!!cp (136);
1659 wakaba 1.1 }
1660    
1661 wakaba 1.30 !!!parse-error (type => 'bogus comment');
1662 wakaba 1.76 $self->{next_char} = shift @next_char;
1663 wakaba 1.1 !!!back-next-input-character (@next_char);
1664 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1665 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1666 wakaba 1.120 line => $l, column => $c,
1667 wakaba 1.118 };
1668 wakaba 1.1 redo A;
1669    
1670     ## ISSUE: typos in spec: chacacters, is is a parse error
1671     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1672 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
1673 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1674 wakaba 1.77 !!!cp (137);
1675 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
1676 wakaba 1.23 !!!next-input-character;
1677     redo A;
1678 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1679 wakaba 1.77 !!!cp (138);
1680 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1681 wakaba 1.57 $self->{state} = DATA_STATE;
1682 wakaba 1.23 !!!next-input-character;
1683    
1684     !!!emit ($self->{current_token}); # comment
1685    
1686     redo A;
1687 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1688 wakaba 1.77 !!!cp (139);
1689 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1690 wakaba 1.57 $self->{state} = DATA_STATE;
1691 wakaba 1.23 ## reconsume
1692    
1693     !!!emit ($self->{current_token}); # comment
1694    
1695     redo A;
1696     } else {
1697 wakaba 1.77 !!!cp (140);
1698 wakaba 1.23 $self->{current_token}->{data} # comment
1699 wakaba 1.76 .= chr ($self->{next_char});
1700 wakaba 1.57 $self->{state} = COMMENT_STATE;
1701 wakaba 1.23 !!!next-input-character;
1702     redo A;
1703     }
1704 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
1705 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1706 wakaba 1.77 !!!cp (141);
1707 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1708 wakaba 1.23 !!!next-input-character;
1709     redo A;
1710 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1711 wakaba 1.77 !!!cp (142);
1712 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1713 wakaba 1.57 $self->{state} = DATA_STATE;
1714 wakaba 1.23 !!!next-input-character;
1715    
1716     !!!emit ($self->{current_token}); # comment
1717    
1718     redo A;
1719 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1720 wakaba 1.77 !!!cp (143);
1721 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1722 wakaba 1.57 $self->{state} = DATA_STATE;
1723 wakaba 1.23 ## reconsume
1724    
1725     !!!emit ($self->{current_token}); # comment
1726    
1727     redo A;
1728     } else {
1729 wakaba 1.77 !!!cp (144);
1730 wakaba 1.23 $self->{current_token}->{data} # comment
1731 wakaba 1.76 .= '-' . chr ($self->{next_char});
1732 wakaba 1.57 $self->{state} = COMMENT_STATE;
1733 wakaba 1.23 !!!next-input-character;
1734     redo A;
1735     }
1736 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
1737 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1738 wakaba 1.77 !!!cp (145);
1739 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
1740 wakaba 1.1 !!!next-input-character;
1741     redo A;
1742 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1743 wakaba 1.77 !!!cp (146);
1744 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1745 wakaba 1.57 $self->{state} = DATA_STATE;
1746 wakaba 1.1 ## reconsume
1747    
1748     !!!emit ($self->{current_token}); # comment
1749    
1750     redo A;
1751     } else {
1752 wakaba 1.77 !!!cp (147);
1753 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1754 wakaba 1.1 ## Stay in the state
1755     !!!next-input-character;
1756     redo A;
1757     }
1758 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
1759 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1760 wakaba 1.77 !!!cp (148);
1761 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1762 wakaba 1.1 !!!next-input-character;
1763     redo A;
1764 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1765 wakaba 1.77 !!!cp (149);
1766 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1767 wakaba 1.57 $self->{state} = DATA_STATE;
1768 wakaba 1.1 ## reconsume
1769    
1770     !!!emit ($self->{current_token}); # comment
1771    
1772     redo A;
1773     } else {
1774 wakaba 1.77 !!!cp (150);
1775 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
1776 wakaba 1.57 $self->{state} = COMMENT_STATE;
1777 wakaba 1.1 !!!next-input-character;
1778     redo A;
1779     }
1780 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
1781 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1782 wakaba 1.77 !!!cp (151);
1783 wakaba 1.57 $self->{state} = DATA_STATE;
1784 wakaba 1.1 !!!next-input-character;
1785    
1786     !!!emit ($self->{current_token}); # comment
1787    
1788     redo A;
1789 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
1790 wakaba 1.77 !!!cp (152);
1791 wakaba 1.114 !!!parse-error (type => 'dash in comment',
1792     line => $self->{line_prev},
1793     column => $self->{column_prev});
1794 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1795     ## Stay in the state
1796     !!!next-input-character;
1797     redo A;
1798 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1799 wakaba 1.77 !!!cp (153);
1800 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1801 wakaba 1.57 $self->{state} = DATA_STATE;
1802 wakaba 1.1 ## reconsume
1803    
1804     !!!emit ($self->{current_token}); # comment
1805    
1806     redo A;
1807     } else {
1808 wakaba 1.77 !!!cp (154);
1809 wakaba 1.114 !!!parse-error (type => 'dash in comment',
1810     line => $self->{line_prev},
1811     column => $self->{column_prev});
1812 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
1813 wakaba 1.57 $self->{state} = COMMENT_STATE;
1814 wakaba 1.1 !!!next-input-character;
1815     redo A;
1816     }
1817 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
1818 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1819     $self->{next_char} == 0x000A or # LF
1820     $self->{next_char} == 0x000B or # VT
1821     $self->{next_char} == 0x000C or # FF
1822     $self->{next_char} == 0x0020) { # SP
1823 wakaba 1.77 !!!cp (155);
1824 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1825 wakaba 1.1 !!!next-input-character;
1826     redo A;
1827     } else {
1828 wakaba 1.77 !!!cp (156);
1829 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1830 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1831 wakaba 1.1 ## reconsume
1832     redo A;
1833     }
1834 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
1835 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1836     $self->{next_char} == 0x000A or # LF
1837     $self->{next_char} == 0x000B or # VT
1838     $self->{next_char} == 0x000C or # FF
1839     $self->{next_char} == 0x0020) { # SP
1840 wakaba 1.77 !!!cp (157);
1841 wakaba 1.1 ## Stay in the state
1842     !!!next-input-character;
1843     redo A;
1844 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1845 wakaba 1.77 !!!cp (158);
1846 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1847 wakaba 1.57 $self->{state} = DATA_STATE;
1848 wakaba 1.1 !!!next-input-character;
1849    
1850 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
1851 wakaba 1.1
1852     redo A;
1853 wakaba 1.77 } elsif ($self->{next_char} == -1) {
1854     !!!cp (159);
1855 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1856 wakaba 1.57 $self->{state} = DATA_STATE;
1857 wakaba 1.1 ## reconsume
1858    
1859 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
1860 wakaba 1.1
1861     redo A;
1862     } else {
1863 wakaba 1.77 !!!cp (160);
1864 wakaba 1.112 $self->{current_token}->{name} = chr $self->{next_char};
1865     delete $self->{current_token}->{quirks};
1866 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1867 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
1868 wakaba 1.1 !!!next-input-character;
1869     redo A;
1870     }
1871 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
1872 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
1873 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1874     $self->{next_char} == 0x000A or # LF
1875     $self->{next_char} == 0x000B or # VT
1876     $self->{next_char} == 0x000C or # FF
1877     $self->{next_char} == 0x0020) { # SP
1878 wakaba 1.77 !!!cp (161);
1879 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
1880 wakaba 1.1 !!!next-input-character;
1881     redo A;
1882 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1883 wakaba 1.77 !!!cp (162);
1884 wakaba 1.57 $self->{state} = DATA_STATE;
1885 wakaba 1.1 !!!next-input-character;
1886    
1887     !!!emit ($self->{current_token}); # DOCTYPE
1888    
1889     redo A;
1890 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1891 wakaba 1.77 !!!cp (163);
1892 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1893 wakaba 1.57 $self->{state} = DATA_STATE;
1894 wakaba 1.1 ## reconsume
1895    
1896 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1897 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1898 wakaba 1.1
1899     redo A;
1900     } else {
1901 wakaba 1.77 !!!cp (164);
1902 wakaba 1.1 $self->{current_token}->{name}
1903 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
1904 wakaba 1.1 ## Stay in the state
1905     !!!next-input-character;
1906     redo A;
1907     }
1908 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
1909 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1910     $self->{next_char} == 0x000A or # LF
1911     $self->{next_char} == 0x000B or # VT
1912     $self->{next_char} == 0x000C or # FF
1913     $self->{next_char} == 0x0020) { # SP
1914 wakaba 1.77 !!!cp (165);
1915 wakaba 1.1 ## Stay in the state
1916     !!!next-input-character;
1917     redo A;
1918 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1919 wakaba 1.77 !!!cp (166);
1920 wakaba 1.57 $self->{state} = DATA_STATE;
1921 wakaba 1.1 !!!next-input-character;
1922    
1923     !!!emit ($self->{current_token}); # DOCTYPE
1924    
1925     redo A;
1926 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1927 wakaba 1.77 !!!cp (167);
1928 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1929 wakaba 1.57 $self->{state} = DATA_STATE;
1930 wakaba 1.1 ## reconsume
1931    
1932 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1933 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1934    
1935     redo A;
1936 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
1937     $self->{next_char} == 0x0070) { # p
1938 wakaba 1.18 !!!next-input-character;
1939 wakaba 1.76 if ($self->{next_char} == 0x0055 or # U
1940     $self->{next_char} == 0x0075) { # u
1941 wakaba 1.18 !!!next-input-character;
1942 wakaba 1.76 if ($self->{next_char} == 0x0042 or # B
1943     $self->{next_char} == 0x0062) { # b
1944 wakaba 1.18 !!!next-input-character;
1945 wakaba 1.76 if ($self->{next_char} == 0x004C or # L
1946     $self->{next_char} == 0x006C) { # l
1947 wakaba 1.18 !!!next-input-character;
1948 wakaba 1.76 if ($self->{next_char} == 0x0049 or # I
1949     $self->{next_char} == 0x0069) { # i
1950 wakaba 1.18 !!!next-input-character;
1951 wakaba 1.76 if ($self->{next_char} == 0x0043 or # C
1952     $self->{next_char} == 0x0063) { # c
1953 wakaba 1.77 !!!cp (168);
1954 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1955 wakaba 1.18 !!!next-input-character;
1956     redo A;
1957 wakaba 1.77 } else {
1958     !!!cp (169);
1959 wakaba 1.18 }
1960 wakaba 1.77 } else {
1961     !!!cp (170);
1962 wakaba 1.18 }
1963 wakaba 1.77 } else {
1964     !!!cp (171);
1965 wakaba 1.18 }
1966 wakaba 1.77 } else {
1967     !!!cp (172);
1968 wakaba 1.18 }
1969 wakaba 1.77 } else {
1970     !!!cp (173);
1971 wakaba 1.18 }
1972    
1973     #
1974 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
1975     $self->{next_char} == 0x0073) { # s
1976 wakaba 1.18 !!!next-input-character;
1977 wakaba 1.76 if ($self->{next_char} == 0x0059 or # Y
1978     $self->{next_char} == 0x0079) { # y
1979 wakaba 1.18 !!!next-input-character;
1980 wakaba 1.76 if ($self->{next_char} == 0x0053 or # S
1981     $self->{next_char} == 0x0073) { # s
1982 wakaba 1.18 !!!next-input-character;
1983 wakaba 1.76 if ($self->{next_char} == 0x0054 or # T
1984     $self->{next_char} == 0x0074) { # t
1985 wakaba 1.18 !!!next-input-character;
1986 wakaba 1.76 if ($self->{next_char} == 0x0045 or # E
1987     $self->{next_char} == 0x0065) { # e
1988 wakaba 1.18 !!!next-input-character;
1989 wakaba 1.76 if ($self->{next_char} == 0x004D or # M
1990     $self->{next_char} == 0x006D) { # m
1991 wakaba 1.77 !!!cp (174);
1992 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
1993 wakaba 1.18 !!!next-input-character;
1994     redo A;
1995 wakaba 1.77 } else {
1996     !!!cp (175);
1997 wakaba 1.18 }
1998 wakaba 1.77 } else {
1999     !!!cp (176);
2000 wakaba 1.18 }
2001 wakaba 1.77 } else {
2002     !!!cp (177);
2003 wakaba 1.18 }
2004 wakaba 1.77 } else {
2005     !!!cp (178);
2006 wakaba 1.18 }
2007 wakaba 1.77 } else {
2008     !!!cp (179);
2009 wakaba 1.18 }
2010    
2011     #
2012     } else {
2013 wakaba 1.77 !!!cp (180);
2014 wakaba 1.18 !!!next-input-character;
2015     #
2016     }
2017    
2018     !!!parse-error (type => 'string after DOCTYPE name');
2019 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2020 wakaba 1.73
2021 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2022 wakaba 1.18 # next-input-character is already done
2023     redo A;
2024 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2025 wakaba 1.18 if ({
2026     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2027     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2028 wakaba 1.76 }->{$self->{next_char}}) {
2029 wakaba 1.77 !!!cp (181);
2030 wakaba 1.18 ## Stay in the state
2031     !!!next-input-character;
2032     redo A;
2033 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
2034 wakaba 1.77 !!!cp (182);
2035 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2036 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2037 wakaba 1.18 !!!next-input-character;
2038     redo A;
2039 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
2040 wakaba 1.77 !!!cp (183);
2041 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2042 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2043 wakaba 1.18 !!!next-input-character;
2044     redo A;
2045 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
2046 wakaba 1.77 !!!cp (184);
2047 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
2048    
2049 wakaba 1.57 $self->{state} = DATA_STATE;
2050 wakaba 1.18 !!!next-input-character;
2051    
2052 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2053 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2054    
2055     redo A;
2056 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2057 wakaba 1.77 !!!cp (185);
2058 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2059    
2060 wakaba 1.57 $self->{state} = DATA_STATE;
2061 wakaba 1.18 ## reconsume
2062    
2063 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2064 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2065    
2066     redo A;
2067     } else {
2068 wakaba 1.77 !!!cp (186);
2069 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
2070 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2071 wakaba 1.73
2072 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2073 wakaba 1.18 !!!next-input-character;
2074     redo A;
2075     }
2076 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2077 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2078 wakaba 1.77 !!!cp (187);
2079 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2080 wakaba 1.18 !!!next-input-character;
2081     redo A;
2082 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2083 wakaba 1.77 !!!cp (188);
2084 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2085    
2086     $self->{state} = DATA_STATE;
2087     !!!next-input-character;
2088    
2089 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2090 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2091    
2092     redo A;
2093 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2094 wakaba 1.77 !!!cp (189);
2095 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2096    
2097 wakaba 1.57 $self->{state} = DATA_STATE;
2098 wakaba 1.18 ## reconsume
2099    
2100 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2101 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2102    
2103     redo A;
2104     } else {
2105 wakaba 1.77 !!!cp (190);
2106 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2107 wakaba 1.76 .= chr $self->{next_char};
2108 wakaba 1.18 ## Stay in the state
2109     !!!next-input-character;
2110     redo A;
2111     }
2112 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2113 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2114 wakaba 1.77 !!!cp (191);
2115 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2116 wakaba 1.18 !!!next-input-character;
2117     redo A;
2118 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2119 wakaba 1.77 !!!cp (192);
2120 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2121    
2122     $self->{state} = DATA_STATE;
2123     !!!next-input-character;
2124    
2125 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2126 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2127    
2128     redo A;
2129 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2130 wakaba 1.77 !!!cp (193);
2131 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2132    
2133 wakaba 1.57 $self->{state} = DATA_STATE;
2134 wakaba 1.18 ## reconsume
2135    
2136 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2137 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2138    
2139     redo A;
2140     } else {
2141 wakaba 1.77 !!!cp (194);
2142 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2143 wakaba 1.76 .= chr $self->{next_char};
2144 wakaba 1.18 ## Stay in the state
2145     !!!next-input-character;
2146     redo A;
2147     }
2148 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2149 wakaba 1.18 if ({
2150     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2151     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2152 wakaba 1.76 }->{$self->{next_char}}) {
2153 wakaba 1.77 !!!cp (195);
2154 wakaba 1.18 ## Stay in the state
2155     !!!next-input-character;
2156     redo A;
2157 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2158 wakaba 1.77 !!!cp (196);
2159 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2160 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2161 wakaba 1.18 !!!next-input-character;
2162     redo A;
2163 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2164 wakaba 1.77 !!!cp (197);
2165 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2166 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2167 wakaba 1.18 !!!next-input-character;
2168     redo A;
2169 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2170 wakaba 1.77 !!!cp (198);
2171 wakaba 1.57 $self->{state} = DATA_STATE;
2172 wakaba 1.18 !!!next-input-character;
2173    
2174     !!!emit ($self->{current_token}); # DOCTYPE
2175    
2176     redo A;
2177 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2178 wakaba 1.77 !!!cp (199);
2179 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2180    
2181 wakaba 1.57 $self->{state} = DATA_STATE;
2182 wakaba 1.26 ## reconsume
2183 wakaba 1.18
2184 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2185 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2186    
2187     redo A;
2188     } else {
2189 wakaba 1.77 !!!cp (200);
2190 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2191 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2192 wakaba 1.73
2193 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2194 wakaba 1.18 !!!next-input-character;
2195     redo A;
2196     }
2197 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2198 wakaba 1.18 if ({
2199     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2200     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2201 wakaba 1.76 }->{$self->{next_char}}) {
2202 wakaba 1.77 !!!cp (201);
2203 wakaba 1.18 ## Stay in the state
2204     !!!next-input-character;
2205     redo A;
2206 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2207 wakaba 1.77 !!!cp (202);
2208 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2209 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2210 wakaba 1.18 !!!next-input-character;
2211     redo A;
2212 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2213 wakaba 1.77 !!!cp (203);
2214 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2215 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2216 wakaba 1.18 !!!next-input-character;
2217     redo A;
2218 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2219 wakaba 1.77 !!!cp (204);
2220 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2221 wakaba 1.57 $self->{state} = DATA_STATE;
2222 wakaba 1.18 !!!next-input-character;
2223    
2224 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2225 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2226    
2227     redo A;
2228 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2229 wakaba 1.77 !!!cp (205);
2230 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2231    
2232 wakaba 1.57 $self->{state} = DATA_STATE;
2233 wakaba 1.26 ## reconsume
2234 wakaba 1.18
2235 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2236 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2237    
2238     redo A;
2239     } else {
2240 wakaba 1.77 !!!cp (206);
2241 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2242 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2243 wakaba 1.73
2244 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2245 wakaba 1.18 !!!next-input-character;
2246     redo A;
2247     }
2248 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2249 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2250 wakaba 1.77 !!!cp (207);
2251 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2252 wakaba 1.18 !!!next-input-character;
2253     redo A;
2254 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2255 wakaba 1.77 !!!cp (208);
2256 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2257    
2258     $self->{state} = DATA_STATE;
2259     !!!next-input-character;
2260    
2261 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2262 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2263    
2264     redo A;
2265 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2266 wakaba 1.77 !!!cp (209);
2267 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2268    
2269 wakaba 1.57 $self->{state} = DATA_STATE;
2270 wakaba 1.18 ## reconsume
2271    
2272 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2273 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2274    
2275     redo A;
2276     } else {
2277 wakaba 1.77 !!!cp (210);
2278 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2279 wakaba 1.76 .= chr $self->{next_char};
2280 wakaba 1.18 ## Stay in the state
2281     !!!next-input-character;
2282     redo A;
2283     }
2284 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2285 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2286 wakaba 1.77 !!!cp (211);
2287 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2288 wakaba 1.18 !!!next-input-character;
2289     redo A;
2290 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2291 wakaba 1.77 !!!cp (212);
2292 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2293    
2294     $self->{state} = DATA_STATE;
2295     !!!next-input-character;
2296    
2297 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2298 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2299    
2300     redo A;
2301 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2302 wakaba 1.77 !!!cp (213);
2303 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2304    
2305 wakaba 1.57 $self->{state} = DATA_STATE;
2306 wakaba 1.18 ## reconsume
2307    
2308 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2309 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2310    
2311     redo A;
2312     } else {
2313 wakaba 1.77 !!!cp (214);
2314 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2315 wakaba 1.76 .= chr $self->{next_char};
2316 wakaba 1.18 ## Stay in the state
2317     !!!next-input-character;
2318     redo A;
2319     }
2320 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2321 wakaba 1.18 if ({
2322     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2323     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2324 wakaba 1.76 }->{$self->{next_char}}) {
2325 wakaba 1.77 !!!cp (215);
2326 wakaba 1.18 ## Stay in the state
2327     !!!next-input-character;
2328     redo A;
2329 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2330 wakaba 1.77 !!!cp (216);
2331 wakaba 1.57 $self->{state} = DATA_STATE;
2332 wakaba 1.18 !!!next-input-character;
2333    
2334     !!!emit ($self->{current_token}); # DOCTYPE
2335    
2336     redo A;
2337 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2338 wakaba 1.77 !!!cp (217);
2339 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2340    
2341 wakaba 1.57 $self->{state} = DATA_STATE;
2342 wakaba 1.26 ## reconsume
2343 wakaba 1.18
2344 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2345 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2346    
2347     redo A;
2348     } else {
2349 wakaba 1.77 !!!cp (218);
2350 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2351 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2352 wakaba 1.73
2353 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2354 wakaba 1.1 !!!next-input-character;
2355     redo A;
2356     }
2357 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2358 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2359 wakaba 1.77 !!!cp (219);
2360 wakaba 1.57 $self->{state} = DATA_STATE;
2361 wakaba 1.1 !!!next-input-character;
2362    
2363     !!!emit ($self->{current_token}); # DOCTYPE
2364    
2365     redo A;
2366 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2367 wakaba 1.77 !!!cp (220);
2368 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2369 wakaba 1.57 $self->{state} = DATA_STATE;
2370 wakaba 1.1 ## reconsume
2371    
2372     !!!emit ($self->{current_token}); # DOCTYPE
2373    
2374     redo A;
2375     } else {
2376 wakaba 1.77 !!!cp (221);
2377 wakaba 1.1 ## Stay in the state
2378     !!!next-input-character;
2379     redo A;
2380     }
2381     } else {
2382     die "$0: $self->{state}: Unknown state";
2383     }
2384     } # A
2385    
2386     die "$0: _get_next_token: unexpected case";
2387     } # _get_next_token
2388    
2389 wakaba 1.72 sub _tokenize_attempt_to_consume_an_entity ($$$) {
2390     my ($self, $in_attr, $additional) = @_;
2391 wakaba 1.20
2392 wakaba 1.112 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
2393    
2394 wakaba 1.20 if ({
2395     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2396     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2397 wakaba 1.72 $additional => 1,
2398 wakaba 1.76 }->{$self->{next_char}}) {
2399 wakaba 1.78 !!!cp (1001);
2400 wakaba 1.20 ## Don't consume
2401     ## No error
2402     return undef;
2403 wakaba 1.76 } elsif ($self->{next_char} == 0x0023) { # #
2404 wakaba 1.1 !!!next-input-character;
2405 wakaba 1.76 if ($self->{next_char} == 0x0078 or # x
2406     $self->{next_char} == 0x0058) { # X
2407 wakaba 1.26 my $code;
2408 wakaba 1.1 X: {
2409 wakaba 1.76 my $x_char = $self->{next_char};
2410 wakaba 1.1 !!!next-input-character;
2411 wakaba 1.76 if (0x0030 <= $self->{next_char} and
2412     $self->{next_char} <= 0x0039) { # 0..9
2413 wakaba 1.78 !!!cp (1002);
2414 wakaba 1.26 $code ||= 0;
2415     $code *= 0x10;
2416 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2417 wakaba 1.1 redo X;
2418 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
2419     $self->{next_char} <= 0x0066) { # a..f
2420 wakaba 1.78 !!!cp (1003);
2421 wakaba 1.26 $code ||= 0;
2422     $code *= 0x10;
2423 wakaba 1.76 $code += $self->{next_char} - 0x0060 + 9;
2424 wakaba 1.1 redo X;
2425 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
2426     $self->{next_char} <= 0x0046) { # A..F
2427 wakaba 1.78 !!!cp (1004);
2428 wakaba 1.26 $code ||= 0;
2429     $code *= 0x10;
2430 wakaba 1.76 $code += $self->{next_char} - 0x0040 + 9;
2431 wakaba 1.1 redo X;
2432 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2433 wakaba 1.78 !!!cp (1005);
2434 wakaba 1.112 !!!parse-error (type => 'bare hcro', line => $l, column => $c);
2435 wakaba 1.76 !!!back-next-input-character ($x_char, $self->{next_char});
2436     $self->{next_char} = 0x0023; # #
2437 wakaba 1.1 return undef;
2438 wakaba 1.76 } elsif ($self->{next_char} == 0x003B) { # ;
2439 wakaba 1.78 !!!cp (1006);
2440 wakaba 1.1 !!!next-input-character;
2441     } else {
2442 wakaba 1.78 !!!cp (1007);
2443 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2444 wakaba 1.1 }
2445    
2446 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2447 wakaba 1.78 !!!cp (1008);
2448 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2449 wakaba 1.26 $code = 0xFFFD;
2450     } elsif ($code > 0x10FFFF) {
2451 wakaba 1.78 !!!cp (1009);
2452 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2453 wakaba 1.26 $code = 0xFFFD;
2454     } elsif ($code == 0x000D) {
2455 wakaba 1.78 !!!cp (1010);
2456 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2457 wakaba 1.26 $code = 0x000A;
2458     } elsif (0x80 <= $code and $code <= 0x9F) {
2459 wakaba 1.78 !!!cp (1011);
2460 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2461 wakaba 1.26 $code = $c1_entity_char->{$code};
2462 wakaba 1.1 }
2463    
2464 wakaba 1.66 return {type => CHARACTER_TOKEN, data => chr $code,
2465 wakaba 1.118 has_reference => 1,
2466 wakaba 1.120 line => $l, column => $c,
2467 wakaba 1.118 };
2468 wakaba 1.1 } # X
2469 wakaba 1.76 } elsif (0x0030 <= $self->{next_char} and
2470     $self->{next_char} <= 0x0039) { # 0..9
2471     my $code = $self->{next_char} - 0x0030;
2472 wakaba 1.1 !!!next-input-character;
2473    
2474 wakaba 1.76 while (0x0030 <= $self->{next_char} and
2475     $self->{next_char} <= 0x0039) { # 0..9
2476 wakaba 1.78 !!!cp (1012);
2477 wakaba 1.1 $code *= 10;
2478 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2479 wakaba 1.1
2480     !!!next-input-character;
2481     }
2482    
2483 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2484 wakaba 1.78 !!!cp (1013);
2485 wakaba 1.1 !!!next-input-character;
2486     } else {
2487 wakaba 1.78 !!!cp (1014);
2488 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2489 wakaba 1.1 }
2490    
2491 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2492 wakaba 1.78 !!!cp (1015);
2493 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2494 wakaba 1.26 $code = 0xFFFD;
2495     } elsif ($code > 0x10FFFF) {
2496 wakaba 1.78 !!!cp (1016);
2497 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2498 wakaba 1.26 $code = 0xFFFD;
2499     } elsif ($code == 0x000D) {
2500 wakaba 1.78 !!!cp (1017);
2501 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2502 wakaba 1.26 $code = 0x000A;
2503 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2504 wakaba 1.78 !!!cp (1018);
2505 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2506 wakaba 1.4 $code = $c1_entity_char->{$code};
2507 wakaba 1.1 }
2508    
2509 wakaba 1.112 return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
2510 wakaba 1.120 line => $l, column => $c,
2511 wakaba 1.118 };
2512 wakaba 1.1 } else {
2513 wakaba 1.78 !!!cp (1019);
2514 wakaba 1.112 !!!parse-error (type => 'bare nero', line => $l, column => $c);
2515 wakaba 1.76 !!!back-next-input-character ($self->{next_char});
2516     $self->{next_char} = 0x0023; # #
2517 wakaba 1.1 return undef;
2518     }
2519 wakaba 1.76 } elsif ((0x0041 <= $self->{next_char} and
2520     $self->{next_char} <= 0x005A) or
2521     (0x0061 <= $self->{next_char} and
2522     $self->{next_char} <= 0x007A)) {
2523     my $entity_name = chr $self->{next_char};
2524 wakaba 1.1 !!!next-input-character;
2525    
2526     my $value = $entity_name;
2527 wakaba 1.37 my $match = 0;
2528 wakaba 1.16 require Whatpm::_NamedEntityList;
2529     our $EntityChar;
2530 wakaba 1.1
2531     while (length $entity_name < 10 and
2532     ## NOTE: Some number greater than the maximum length of entity name
2533 wakaba 1.76 ((0x0041 <= $self->{next_char} and # a
2534     $self->{next_char} <= 0x005A) or # x
2535     (0x0061 <= $self->{next_char} and # a
2536     $self->{next_char} <= 0x007A) or # z
2537     (0x0030 <= $self->{next_char} and # 0
2538     $self->{next_char} <= 0x0039) or # 9
2539     $self->{next_char} == 0x003B)) { # ;
2540     $entity_name .= chr $self->{next_char};
2541 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2542 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2543 wakaba 1.78 !!!cp (1020);
2544 wakaba 1.26 $value = $EntityChar->{$entity_name};
2545 wakaba 1.16 $match = 1;
2546     !!!next-input-character;
2547     last;
2548 wakaba 1.37 } else {
2549 wakaba 1.78 !!!cp (1021);
2550 wakaba 1.26 $value = $EntityChar->{$entity_name};
2551     $match = -1;
2552 wakaba 1.37 !!!next-input-character;
2553 wakaba 1.16 }
2554 wakaba 1.1 } else {
2555 wakaba 1.78 !!!cp (1022);
2556 wakaba 1.76 $value .= chr $self->{next_char};
2557 wakaba 1.37 $match *= 2;
2558     !!!next-input-character;
2559 wakaba 1.1 }
2560     }
2561    
2562 wakaba 1.16 if ($match > 0) {
2563 wakaba 1.78 !!!cp (1023);
2564 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2565 wakaba 1.120 line => $l, column => $c,
2566 wakaba 1.118 };
2567 wakaba 1.16 } elsif ($match < 0) {
2568 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2569 wakaba 1.37 if ($in_attr and $match < -1) {
2570 wakaba 1.78 !!!cp (1024);
2571 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
2572 wakaba 1.120 line => $l, column => $c,
2573 wakaba 1.118 };
2574 wakaba 1.37 } else {
2575 wakaba 1.78 !!!cp (1025);
2576 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2577 wakaba 1.120 line => $l, column => $c,
2578 wakaba 1.118 };
2579 wakaba 1.37 }
2580 wakaba 1.1 } else {
2581 wakaba 1.78 !!!cp (1026);
2582 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
2583 wakaba 1.66 ## NOTE: "No characters are consumed" in the spec.
2584 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$value,
2585 wakaba 1.120 line => $l, column => $c,
2586 wakaba 1.118 };
2587 wakaba 1.1 }
2588     } else {
2589 wakaba 1.78 !!!cp (1027);
2590 wakaba 1.1 ## no characters are consumed
2591 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
2592 wakaba 1.1 return undef;
2593     }
2594     } # _tokenize_attempt_to_consume_an_entity
2595    
2596     sub _initialize_tree_constructor ($) {
2597     my $self = shift;
2598     ## NOTE: $self->{document} MUST be specified before this method is called
2599     $self->{document}->strict_error_checking (0);
2600     ## TODO: Turn mutation events off # MUST
2601     ## TODO: Turn loose Document option (manakai extension) on
2602 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2603 wakaba 1.1 } # _initialize_tree_constructor
2604    
2605     sub _terminate_tree_constructor ($) {
2606     my $self = shift;
2607     $self->{document}->strict_error_checking (1);
2608     ## TODO: Turn mutation events on
2609     } # _terminate_tree_constructor
2610    
2611     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2612    
2613 wakaba 1.3 { # tree construction stage
2614     my $token;
2615    
2616 wakaba 1.1 sub _construct_tree ($) {
2617     my ($self) = @_;
2618    
2619     ## When an interactive UA render the $self->{document} available
2620     ## to the user, or when it begin accepting user input, are
2621     ## not defined.
2622    
2623     ## Append a character: collect it and all subsequent consecutive
2624     ## characters and insert one Text node whose data is concatenation
2625     ## of all those characters. # MUST
2626    
2627     !!!next-token;
2628    
2629 wakaba 1.3 undef $self->{form_element};
2630     undef $self->{head_element};
2631     $self->{open_elements} = [];
2632     undef $self->{inner_html_node};
2633    
2634 wakaba 1.84 ## NOTE: The "initial" insertion mode.
2635 wakaba 1.3 $self->_tree_construction_initial; # MUST
2636 wakaba 1.84
2637     ## NOTE: The "before html" insertion mode.
2638 wakaba 1.3 $self->_tree_construction_root_element;
2639 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
2640    
2641     ## NOTE: The "before head" insertion mode and so on.
2642 wakaba 1.3 $self->_tree_construction_main;
2643     } # _construct_tree
2644    
2645     sub _tree_construction_initial ($) {
2646     my $self = shift;
2647 wakaba 1.84
2648     ## NOTE: "initial" insertion mode
2649    
2650 wakaba 1.18 INITIAL: {
2651 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2652 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2653     ## error, switch to a conformance checking mode for another
2654     ## language.
2655     my $doctype_name = $token->{name};
2656     $doctype_name = '' unless defined $doctype_name;
2657     $doctype_name =~ tr/a-z/A-Z/;
2658     if (not defined $token->{name} or # <!DOCTYPE>
2659     defined $token->{public_identifier} or
2660     defined $token->{system_identifier}) {
2661 wakaba 1.79 !!!cp ('t1');
2662 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
2663 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
2664 wakaba 1.79 !!!cp ('t2');
2665 wakaba 1.18 ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2666 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
2667 wakaba 1.79 } else {
2668     !!!cp ('t3');
2669 wakaba 1.18 }
2670    
2671     my $doctype = $self->{document}->create_document_type_definition
2672     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2673 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
2674     ## are empty strings, so that we don't set any value in missing cases.
2675 wakaba 1.18 $doctype->public_id ($token->{public_identifier})
2676     if defined $token->{public_identifier};
2677     $doctype->system_id ($token->{system_identifier})
2678     if defined $token->{system_identifier};
2679     ## NOTE: Other DocumentType attributes are null or empty lists.
2680     ## ISSUE: internalSubset = null??
2681     $self->{document}->append_child ($doctype);
2682    
2683 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
2684 wakaba 1.79 !!!cp ('t4');
2685 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2686     } elsif (defined $token->{public_identifier}) {
2687     my $pubid = $token->{public_identifier};
2688     $pubid =~ tr/a-z/A-z/;
2689     if ({
2690     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2691     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2692     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2693     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2694     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2695     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2696     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2697     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2698     "-//IETF//DTD HTML 2.0//EN" => 1,
2699     "-//IETF//DTD HTML 2.1E//EN" => 1,
2700     "-//IETF//DTD HTML 3.0//EN" => 1,
2701     "-//IETF//DTD HTML 3.0//EN//" => 1,
2702     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2703     "-//IETF//DTD HTML 3.2//EN" => 1,
2704     "-//IETF//DTD HTML 3//EN" => 1,
2705     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2706     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2707     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2708     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2709     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2710     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2711     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2712     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2713     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2714     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2715     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2716     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2717     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2718     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2719     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2720     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2721     "-//IETF//DTD HTML STRICT//EN" => 1,
2722     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2723     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2724     "-//IETF//DTD HTML//EN" => 1,
2725     "-//IETF//DTD HTML//EN//2.0" => 1,
2726     "-//IETF//DTD HTML//EN//3.0" => 1,
2727     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2728     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2729     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2730     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2731     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2732     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2733     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2734     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2735     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2736     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2737     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2738 wakaba 1.72 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//EN" => 1,
2739     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//EN" => 1,
2740     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//EN" => 1,
2741 wakaba 1.18 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2742     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2743     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2744     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2745     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2746     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2747     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2748     "-//W3C//DTD HTML 3.2//EN" => 1,
2749     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2750     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2751     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2752     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2753     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2754     "-//W3C//DTD W3 HTML//EN" => 1,
2755     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2756     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2757     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2758     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2759     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2760     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2761     "HTML" => 1,
2762     }->{$pubid}) {
2763 wakaba 1.79 !!!cp ('t5');
2764 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2765     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2766     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2767     if (defined $token->{system_identifier}) {
2768 wakaba 1.79 !!!cp ('t6');
2769 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2770     } else {
2771 wakaba 1.79 !!!cp ('t7');
2772 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
2773 wakaba 1.3 }
2774 wakaba 1.80 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 FRAMESET//EN" or
2775     $pubid eq "-//W3C//DTD XHTML 1.0 TRANSITIONAL//EN") {
2776 wakaba 1.79 !!!cp ('t8');
2777 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
2778 wakaba 1.79 } else {
2779     !!!cp ('t9');
2780 wakaba 1.18 }
2781 wakaba 1.79 } else {
2782     !!!cp ('t10');
2783 wakaba 1.18 }
2784     if (defined $token->{system_identifier}) {
2785     my $sysid = $token->{system_identifier};
2786     $sysid =~ tr/A-Z/a-z/;
2787     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2788 wakaba 1.80 ## TODO: Check the spec: PUBLIC "(limited quirks)" "(quirks)"
2789 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2790 wakaba 1.79 !!!cp ('t11');
2791     } else {
2792     !!!cp ('t12');
2793 wakaba 1.18 }
2794 wakaba 1.79 } else {
2795     !!!cp ('t13');
2796 wakaba 1.18 }
2797    
2798 wakaba 1.84 ## Go to the "before html" insertion mode.
2799 wakaba 1.18 !!!next-token;
2800     return;
2801     } elsif ({
2802 wakaba 1.55 START_TAG_TOKEN, 1,
2803     END_TAG_TOKEN, 1,
2804     END_OF_FILE_TOKEN, 1,
2805 wakaba 1.18 }->{$token->{type}}) {
2806 wakaba 1.79 !!!cp ('t14');
2807 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
2808 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2809 wakaba 1.84 ## Go to the "before html" insertion mode.
2810 wakaba 1.18 ## reprocess
2811 wakaba 1.125 !!!ack-later;
2812 wakaba 1.18 return;
2813 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
2814 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2815     ## Ignore the token
2816 wakaba 1.26
2817 wakaba 1.18 unless (length $token->{data}) {
2818 wakaba 1.79 !!!cp ('t15');
2819 wakaba 1.84 ## Stay in the insertion mode.
2820 wakaba 1.18 !!!next-token;
2821     redo INITIAL;
2822 wakaba 1.79 } else {
2823     !!!cp ('t16');
2824 wakaba 1.3 }
2825 wakaba 1.79 } else {
2826     !!!cp ('t17');
2827 wakaba 1.3 }
2828 wakaba 1.18
2829 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
2830 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2831 wakaba 1.84 ## Go to the "before html" insertion mode.
2832 wakaba 1.18 ## reprocess
2833     return;
2834 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
2835 wakaba 1.79 !!!cp ('t18');
2836 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
2837     $self->{document}->append_child ($comment);
2838    
2839 wakaba 1.84 ## Stay in the insertion mode.
2840 wakaba 1.18 !!!next-token;
2841     redo INITIAL;
2842     } else {
2843 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
2844 wakaba 1.18 }
2845     } # INITIAL
2846 wakaba 1.79
2847     die "$0: _tree_construction_initial: This should be never reached";
2848 wakaba 1.3 } # _tree_construction_initial
2849    
2850     sub _tree_construction_root_element ($) {
2851     my $self = shift;
2852 wakaba 1.84
2853     ## NOTE: "before html" insertion mode.
2854 wakaba 1.3
2855     B: {
2856 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2857 wakaba 1.79 !!!cp ('t19');
2858 wakaba 1.113 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
2859 wakaba 1.3 ## Ignore the token
2860 wakaba 1.84 ## Stay in the insertion mode.
2861 wakaba 1.3 !!!next-token;
2862     redo B;
2863 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
2864 wakaba 1.79 !!!cp ('t20');
2865 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
2866     $self->{document}->append_child ($comment);
2867 wakaba 1.84 ## Stay in the insertion mode.
2868 wakaba 1.3 !!!next-token;
2869     redo B;
2870 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
2871 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2872     ## Ignore the token.
2873    
2874 wakaba 1.3 unless (length $token->{data}) {
2875 wakaba 1.79 !!!cp ('t21');
2876 wakaba 1.84 ## Stay in the insertion mode.
2877 wakaba 1.3 !!!next-token;
2878     redo B;
2879 wakaba 1.79 } else {
2880     !!!cp ('t22');
2881 wakaba 1.3 }
2882 wakaba 1.79 } else {
2883     !!!cp ('t23');
2884 wakaba 1.3 }
2885 wakaba 1.61
2886     $self->{application_cache_selection}->(undef);
2887    
2888     #
2889     } elsif ($token->{type} == START_TAG_TOKEN) {
2890 wakaba 1.84 if ($token->{tag_name} eq 'html') {
2891     my $root_element;
2892 wakaba 1.116 !!!create-element ($root_element, $token->{tag_name}, $token->{attributes}, $token);
2893 wakaba 1.84 $self->{document}->append_child ($root_element);
2894 wakaba 1.123 push @{$self->{open_elements}},
2895     [$root_element, $el_category->{html}];
2896 wakaba 1.84
2897     if ($token->{attributes}->{manifest}) {
2898     !!!cp ('t24');
2899     $self->{application_cache_selection}
2900     ->($token->{attributes}->{manifest}->{value});
2901 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
2902     ## According to Hixie (#whatwg 2008-03-19), it should be
2903     ## resolved against the base URI of the document in HTML
2904     ## or xml:base of the element in XHTML.
2905 wakaba 1.84 } else {
2906     !!!cp ('t25');
2907     $self->{application_cache_selection}->(undef);
2908     }
2909    
2910 wakaba 1.125 !!!nack ('t25c');
2911    
2912 wakaba 1.84 !!!next-token;
2913     return; ## Go to the "before head" insertion mode.
2914 wakaba 1.61 } else {
2915 wakaba 1.84 !!!cp ('t25.1');
2916     #
2917 wakaba 1.61 }
2918 wakaba 1.3 } elsif ({
2919 wakaba 1.55 END_TAG_TOKEN, 1,
2920     END_OF_FILE_TOKEN, 1,
2921 wakaba 1.3 }->{$token->{type}}) {
2922 wakaba 1.79 !!!cp ('t26');
2923 wakaba 1.3 #
2924     } else {
2925 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
2926 wakaba 1.3 }
2927 wakaba 1.61
2928 wakaba 1.116 my $root_element; !!!create-element ($root_element, 'html',, $token);
2929 wakaba 1.84 $self->{document}->append_child ($root_element);
2930 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
2931 wakaba 1.84
2932     $self->{application_cache_selection}->(undef);
2933    
2934     ## NOTE: Reprocess the token.
2935 wakaba 1.125 !!!ack-later;
2936 wakaba 1.84 return; ## Go to the "before head" insertion mode.
2937    
2938     ## ISSUE: There is an issue in the spec
2939 wakaba 1.3 } # B
2940 wakaba 1.79
2941     die "$0: _tree_construction_root_element: This should never be reached";
2942 wakaba 1.3 } # _tree_construction_root_element
2943    
2944     sub _reset_insertion_mode ($) {
2945     my $self = shift;
2946    
2947     ## Step 1
2948     my $last;
2949    
2950     ## Step 2
2951     my $i = -1;
2952     my $node = $self->{open_elements}->[$i];
2953    
2954     ## Step 3
2955     S3: {
2956 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
2957     $last = 1;
2958     if (defined $self->{inner_html_node}) {
2959 wakaba 1.123 if ($self->{inner_html_node}->[1] & TABLE_CELL_EL) {
2960 wakaba 1.79 !!!cp ('t27');
2961 wakaba 1.29 #
2962     } else {
2963 wakaba 1.79 !!!cp ('t28');
2964 wakaba 1.29 $node = $self->{inner_html_node};
2965     }
2966 wakaba 1.3 }
2967     }
2968    
2969     ## Step 4..13
2970     my $new_mode = {
2971 wakaba 1.54 select => IN_SELECT_IM,
2972 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
2973     ## insertion mode to "in select" by themselves.
2974 wakaba 1.54 td => IN_CELL_IM,
2975     th => IN_CELL_IM,
2976     tr => IN_ROW_IM,
2977     tbody => IN_TABLE_BODY_IM,
2978     thead => IN_TABLE_BODY_IM,
2979     tfoot => IN_TABLE_BODY_IM,
2980     caption => IN_CAPTION_IM,
2981     colgroup => IN_COLUMN_GROUP_IM,
2982     table => IN_TABLE_IM,
2983     head => IN_BODY_IM, # not in head!
2984     body => IN_BODY_IM,
2985     frameset => IN_FRAMESET_IM,
2986 wakaba 1.123 }->{$node->[0]->manakai_local_name};
2987     ## TODO: Foreign namespace case OK?
2988 wakaba 1.3 $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2989    
2990     ## Step 14
2991 wakaba 1.123 if ($node->[1] & HTML_EL) {
2992 wakaba 1.3 unless (defined $self->{head_element}) {
2993 wakaba 1.79 !!!cp ('t29');
2994 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
2995 wakaba 1.3 } else {
2996 wakaba 1.81 ## ISSUE: Can this state be reached?
2997 wakaba 1.79 !!!cp ('t30');
2998 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2999 wakaba 1.3 }
3000     return;
3001 wakaba 1.79 } else {
3002     !!!cp ('t31');
3003 wakaba 1.3 }
3004    
3005     ## Step 15
3006 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
3007 wakaba 1.3
3008     ## Step 16
3009     $i--;
3010     $node = $self->{open_elements}->[$i];
3011    
3012     ## Step 17
3013     redo S3;
3014     } # S3
3015 wakaba 1.79
3016     die "$0: _reset_insertion_mode: This line should never be reached";
3017 wakaba 1.3 } # _reset_insertion_mode
3018    
3019     sub _tree_construction_main ($) {
3020     my $self = shift;
3021    
3022 wakaba 1.1 my $active_formatting_elements = [];
3023    
3024     my $reconstruct_active_formatting_elements = sub { # MUST
3025     my $insert = shift;
3026    
3027     ## Step 1
3028     return unless @$active_formatting_elements;
3029    
3030     ## Step 3
3031     my $i = -1;
3032     my $entry = $active_formatting_elements->[$i];
3033    
3034     ## Step 2
3035     return if $entry->[0] eq '#marker';
3036 wakaba 1.3 for (@{$self->{open_elements}}) {
3037 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3038 wakaba 1.79 !!!cp ('t32');
3039 wakaba 1.1 return;
3040     }
3041     }
3042    
3043     S4: {
3044     ## Step 4
3045     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
3046    
3047     ## Step 5
3048     $i--;
3049     $entry = $active_formatting_elements->[$i];
3050    
3051     ## Step 6
3052     if ($entry->[0] eq '#marker') {
3053 wakaba 1.81 !!!cp ('t33_1');
3054 wakaba 1.1 #
3055     } else {
3056     my $in_open_elements;
3057 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
3058 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3059 wakaba 1.79 !!!cp ('t33');
3060 wakaba 1.1 $in_open_elements = 1;
3061     last OE;
3062     }
3063     }
3064     if ($in_open_elements) {
3065 wakaba 1.79 !!!cp ('t34');
3066 wakaba 1.1 #
3067     } else {
3068 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
3069 wakaba 1.79 !!!cp ('t35');
3070 wakaba 1.1 redo S4;
3071     }
3072     }
3073    
3074     ## Step 7
3075     $i++;
3076     $entry = $active_formatting_elements->[$i];
3077     } # S4
3078    
3079     S7: {
3080     ## Step 8
3081     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
3082    
3083     ## Step 9
3084     $insert->($clone->[0]);
3085 wakaba 1.3 push @{$self->{open_elements}}, $clone;
3086 wakaba 1.1
3087     ## Step 10
3088 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
3089 wakaba 1.1
3090     ## Step 11
3091     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
3092 wakaba 1.79 !!!cp ('t36');
3093 wakaba 1.1 ## Step 7'
3094     $i++;
3095     $entry = $active_formatting_elements->[$i];
3096    
3097     redo S7;
3098     }
3099 wakaba 1.79
3100     !!!cp ('t37');
3101 wakaba 1.1 } # S7
3102     }; # $reconstruct_active_formatting_elements
3103    
3104     my $clear_up_to_marker = sub {
3105     for (reverse 0..$#$active_formatting_elements) {
3106     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3107 wakaba 1.79 !!!cp ('t38');
3108 wakaba 1.1 splice @$active_formatting_elements, $_;
3109     return;
3110     }
3111     }
3112 wakaba 1.79
3113     !!!cp ('t39');
3114 wakaba 1.1 }; # $clear_up_to_marker
3115    
3116 wakaba 1.96 my $insert;
3117    
3118     my $parse_rcdata = sub ($) {
3119     my ($content_model_flag) = @_;
3120 wakaba 1.25
3121     ## Step 1
3122     my $start_tag_name = $token->{tag_name};
3123     my $el;
3124 wakaba 1.116 !!!create-element ($el, $start_tag_name, $token->{attributes}, $token);
3125 wakaba 1.25
3126     ## Step 2
3127 wakaba 1.96 $insert->($el);
3128 wakaba 1.25
3129     ## Step 3
3130 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3131 wakaba 1.13 delete $self->{escape}; # MUST
3132 wakaba 1.25
3133     ## Step 4
3134 wakaba 1.1 my $text = '';
3135 wakaba 1.125 !!!nack ('t40.1');
3136 wakaba 1.1 !!!next-token;
3137 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
3138 wakaba 1.79 !!!cp ('t40');
3139 wakaba 1.1 $text .= $token->{data};
3140     !!!next-token;
3141 wakaba 1.25 }
3142    
3143     ## Step 5
3144 wakaba 1.1 if (length $text) {
3145 wakaba 1.79 !!!cp ('t41');
3146 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3147     $el->append_child ($text);
3148 wakaba 1.1 }
3149 wakaba 1.25
3150     ## Step 6
3151 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3152 wakaba 1.25
3153     ## Step 7
3154 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
3155     $token->{tag_name} eq $start_tag_name) {
3156     !!!cp ('t42');
3157 wakaba 1.1 ## Ignore the token
3158     } else {
3159 wakaba 1.96 ## NOTE: An end-of-file token.
3160     if ($content_model_flag == CDATA_CONTENT_MODEL) {
3161     !!!cp ('t43');
3162 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3163 wakaba 1.96 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3164     !!!cp ('t44');
3165 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
3166 wakaba 1.96 } else {
3167     die "$0: $content_model_flag in parse_rcdata";
3168     }
3169 wakaba 1.1 }
3170     !!!next-token;
3171 wakaba 1.25 }; # $parse_rcdata
3172 wakaba 1.1
3173 wakaba 1.96 my $script_start_tag = sub () {
3174 wakaba 1.1 my $script_el;
3175 wakaba 1.116 !!!create-element ($script_el, 'script', $token->{attributes}, $token);
3176 wakaba 1.1 ## TODO: mark as "parser-inserted"
3177    
3178 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
3179 wakaba 1.13 delete $self->{escape}; # MUST
3180 wakaba 1.1
3181     my $text = '';
3182 wakaba 1.125 !!!nack ('t45.1');
3183 wakaba 1.1 !!!next-token;
3184 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
3185 wakaba 1.79 !!!cp ('t45');
3186 wakaba 1.1 $text .= $token->{data};
3187     !!!next-token;
3188     } # stop if non-character token or tokenizer stops tokenising
3189     if (length $text) {
3190 wakaba 1.79 !!!cp ('t46');
3191 wakaba 1.1 $script_el->manakai_append_text ($text);
3192     }
3193    
3194 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3195 wakaba 1.1
3196 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
3197 wakaba 1.1 $token->{tag_name} eq 'script') {
3198 wakaba 1.79 !!!cp ('t47');
3199 wakaba 1.1 ## Ignore the token
3200     } else {
3201 wakaba 1.79 !!!cp ('t48');
3202 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3203 wakaba 1.1 ## ISSUE: And ignore?
3204     ## TODO: mark as "already executed"
3205     }
3206    
3207 wakaba 1.3 if (defined $self->{inner_html_node}) {
3208 wakaba 1.79 !!!cp ('t49');
3209 wakaba 1.3 ## TODO: mark as "already executed"
3210     } else {
3211 wakaba 1.79 !!!cp ('t50');
3212 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3213     ## TODO: insertion point = just before the next input character
3214 wakaba 1.25
3215     $insert->($script_el);
3216 wakaba 1.1
3217     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3218    
3219     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3220     }
3221    
3222     !!!next-token;
3223     }; # $script_start_tag
3224    
3225 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
3226     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
3227     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
3228    
3229 wakaba 1.1 my $formatting_end_tag = sub {
3230 wakaba 1.113 my $end_tag_token = shift;
3231     my $tag_name = $end_tag_token->{tag_name};
3232 wakaba 1.1
3233 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
3234 wakaba 1.102
3235 wakaba 1.1 FET: {
3236     ## Step 1
3237     my $formatting_element;
3238     my $formatting_element_i_in_active;
3239     AFE: for (reverse 0..$#$active_formatting_elements) {
3240 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3241     !!!cp ('t52');
3242     last AFE;
3243     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
3244     eq $tag_name) {
3245 wakaba 1.79 !!!cp ('t51');
3246 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
3247     $formatting_element_i_in_active = $_;
3248     last AFE;
3249     }
3250     } # AFE
3251     unless (defined $formatting_element) {
3252 wakaba 1.79 !!!cp ('t53');
3253 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$tag_name, token => $end_tag_token);
3254 wakaba 1.1 ## Ignore the token
3255     !!!next-token;
3256     return;
3257     }
3258     ## has an element in scope
3259     my $in_scope = 1;
3260     my $formatting_element_i_in_open;
3261 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3262     my $node = $self->{open_elements}->[$_];
3263 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3264     if ($in_scope) {
3265 wakaba 1.79 !!!cp ('t54');
3266 wakaba 1.1 $formatting_element_i_in_open = $_;
3267     last INSCOPE;
3268     } else { # in open elements but not in scope
3269 wakaba 1.79 !!!cp ('t55');
3270 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3271     token => $end_tag_token);
3272 wakaba 1.1 ## Ignore the token
3273     !!!next-token;
3274     return;
3275     }
3276 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
3277 wakaba 1.79 !!!cp ('t56');
3278 wakaba 1.1 $in_scope = 0;
3279     }
3280     } # INSCOPE
3281     unless (defined $formatting_element_i_in_open) {
3282 wakaba 1.79 !!!cp ('t57');
3283 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3284     token => $end_tag_token);
3285 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3286     !!!next-token; ## TODO: ok?
3287     return;
3288     }
3289 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3290 wakaba 1.79 !!!cp ('t58');
3291 wakaba 1.122 !!!parse-error (type => 'not closed',
3292     value => $self->{open_elements}->[-1]->[0]
3293     ->manakai_local_name,
3294 wakaba 1.113 token => $end_tag_token);
3295 wakaba 1.1 }
3296    
3297     ## Step 2
3298     my $furthest_block;
3299     my $furthest_block_i_in_open;
3300 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3301     my $node = $self->{open_elements}->[$_];
3302 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
3303 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
3304 wakaba 1.123 ($node->[1] & SPECIAL_EL or
3305     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
3306 wakaba 1.79 !!!cp ('t59');
3307 wakaba 1.1 $furthest_block = $node;
3308     $furthest_block_i_in_open = $_;
3309     } elsif ($node->[0] eq $formatting_element->[0]) {
3310 wakaba 1.79 !!!cp ('t60');
3311 wakaba 1.1 last OE;
3312     }
3313     } # OE
3314    
3315     ## Step 3
3316     unless (defined $furthest_block) { # MUST
3317 wakaba 1.79 !!!cp ('t61');
3318 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3319 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3320     !!!next-token;
3321     return;
3322     }
3323    
3324     ## Step 4
3325 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3326 wakaba 1.1
3327     ## Step 5
3328     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3329     if (defined $furthest_block_parent) {
3330 wakaba 1.79 !!!cp ('t62');
3331 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
3332     }
3333    
3334     ## Step 6
3335     my $bookmark_prev_el
3336     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3337     ->[0];
3338    
3339     ## Step 7
3340     my $node = $furthest_block;
3341     my $node_i_in_open = $furthest_block_i_in_open;
3342     my $last_node = $furthest_block;
3343     S7: {
3344     ## Step 1
3345     $node_i_in_open--;
3346 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3347 wakaba 1.1
3348     ## Step 2
3349     my $node_i_in_active;
3350     S7S2: {
3351     for (reverse 0..$#$active_formatting_elements) {
3352     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3353 wakaba 1.79 !!!cp ('t63');
3354 wakaba 1.1 $node_i_in_active = $_;
3355     last S7S2;
3356     }
3357     }
3358 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3359 wakaba 1.1 redo S7;
3360     } # S7S2
3361    
3362     ## Step 3
3363     last S7 if $node->[0] eq $formatting_element->[0];
3364    
3365     ## Step 4
3366     if ($last_node->[0] eq $furthest_block->[0]) {
3367 wakaba 1.79 !!!cp ('t64');
3368 wakaba 1.1 $bookmark_prev_el = $node->[0];
3369     }
3370    
3371     ## Step 5
3372     if ($node->[0]->has_child_nodes ()) {
3373 wakaba 1.79 !!!cp ('t65');
3374 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
3375     $active_formatting_elements->[$node_i_in_active] = $clone;
3376 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3377 wakaba 1.1 $node = $clone;
3378     }
3379    
3380     ## Step 6
3381     $node->[0]->append_child ($last_node->[0]);
3382    
3383     ## Step 7
3384     $last_node = $node;
3385    
3386     ## Step 8
3387     redo S7;
3388     } # S7
3389    
3390     ## Step 8
3391 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
3392 wakaba 1.102 my $foster_parent_element;
3393     my $next_sibling;
3394 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3395     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3396 wakaba 1.102 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3397     if (defined $parent and $parent->node_type == 1) {
3398     !!!cp ('t65.1');
3399     $foster_parent_element = $parent;
3400     $next_sibling = $self->{open_elements}->[$_]->[0];
3401     } else {
3402     !!!cp ('t65.2');
3403     $foster_parent_element
3404     = $self->{open_elements}->[$_ - 1]->[0];
3405     }
3406     last OE;
3407     }
3408     } # OE
3409     $foster_parent_element = $self->{open_elements}->[0]->[0]
3410     unless defined $foster_parent_element;
3411     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
3412     $open_tables->[-1]->[1] = 1; # tainted
3413     } else {
3414     !!!cp ('t65.3');
3415     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3416     }
3417 wakaba 1.1
3418     ## Step 9
3419     my $clone = [$formatting_element->[0]->clone_node (0),
3420     $formatting_element->[1]];
3421    
3422     ## Step 10
3423     my @cn = @{$furthest_block->[0]->child_nodes};
3424     $clone->[0]->append_child ($_) for @cn;
3425    
3426     ## Step 11
3427     $furthest_block->[0]->append_child ($clone->[0]);
3428    
3429     ## Step 12
3430     my $i;
3431     AFE: for (reverse 0..$#$active_formatting_elements) {
3432     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3433 wakaba 1.79 !!!cp ('t66');
3434 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
3435     $i-- and last AFE if defined $i;
3436     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3437 wakaba 1.79 !!!cp ('t67');
3438 wakaba 1.1 $i = $_;
3439     }
3440     } # AFE
3441     splice @$active_formatting_elements, $i + 1, 0, $clone;
3442    
3443     ## Step 13
3444     undef $i;
3445 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3446     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3447 wakaba 1.79 !!!cp ('t68');
3448 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
3449 wakaba 1.1 $i-- and last OE if defined $i;
3450 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3451 wakaba 1.79 !!!cp ('t69');
3452 wakaba 1.1 $i = $_;
3453     }
3454     } # OE
3455 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3456 wakaba 1.1
3457     ## Step 14
3458     redo FET;
3459     } # FET
3460     }; # $formatting_end_tag
3461    
3462 wakaba 1.96 $insert = my $insert_to_current = sub {
3463 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3464 wakaba 1.1 }; # $insert_to_current
3465    
3466     my $insert_to_foster = sub {
3467 wakaba 1.95 my $child = shift;
3468 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
3469 wakaba 1.95 # MUST
3470     my $foster_parent_element;
3471     my $next_sibling;
3472 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3473     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3474 wakaba 1.3 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3475 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3476 wakaba 1.79 !!!cp ('t70');
3477 wakaba 1.1 $foster_parent_element = $parent;
3478 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3479 wakaba 1.1 } else {
3480 wakaba 1.79 !!!cp ('t71');
3481 wakaba 1.1 $foster_parent_element
3482 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3483 wakaba 1.1 }
3484     last OE;
3485     }
3486     } # OE
3487 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3488 wakaba 1.1 unless defined $foster_parent_element;
3489     $foster_parent_element->insert_before
3490     ($child, $next_sibling);
3491 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
3492     } else {
3493     !!!cp ('t72');
3494     $self->{open_elements}->[-1]->[0]->append_child ($child);
3495     }
3496 wakaba 1.1 }; # $insert_to_foster
3497    
3498 wakaba 1.52 B: {
3499 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3500 wakaba 1.79 !!!cp ('t73');
3501 wakaba 1.113 !!!parse-error (type => 'DOCTYPE in the middle', token => $token);
3502 wakaba 1.52 ## Ignore the token
3503     ## Stay in the phase
3504     !!!next-token;
3505     redo B;
3506 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
3507 wakaba 1.52 $token->{tag_name} eq 'html') {
3508 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
3509 wakaba 1.79 !!!cp ('t79');
3510 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3511 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
3512     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
3513 wakaba 1.79 !!!cp ('t80');
3514 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3515 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
3516 wakaba 1.79 } else {
3517     !!!cp ('t81');
3518 wakaba 1.52 }
3519    
3520 wakaba 1.84 !!!cp ('t82');
3521 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
3522 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
3523     for my $attr_name (keys %{$token->{attributes}}) {
3524     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3525 wakaba 1.79 !!!cp ('t84');
3526 wakaba 1.52 $top_el->set_attribute_ns
3527     (undef, [undef, $attr_name],
3528     $token->{attributes}->{$attr_name}->{value});
3529     }
3530     }
3531 wakaba 1.125 !!!nack ('t84.1');
3532 wakaba 1.52 !!!next-token;
3533     redo B;
3534 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3535 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
3536 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
3537 wakaba 1.79 !!!cp ('t85');
3538 wakaba 1.52 $self->{document}->append_child ($comment);
3539 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
3540 wakaba 1.79 !!!cp ('t86');
3541 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
3542     } else {
3543 wakaba 1.79 !!!cp ('t87');
3544 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3545     }
3546     !!!next-token;
3547     redo B;
3548 wakaba 1.56 } elsif ($self->{insertion_mode} & HEAD_IMS) {
3549 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3550 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3551 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3552     !!!cp ('t88.2');
3553     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3554     } else {
3555     !!!cp ('t88.1');
3556     ## Ignore the token.
3557     !!!next-token;
3558     redo B;
3559     }
3560 wakaba 1.52 unless (length $token->{data}) {
3561 wakaba 1.79 !!!cp ('t88');
3562 wakaba 1.52 !!!next-token;
3563     redo B;
3564 wakaba 1.1 }
3565     }
3566 wakaba 1.52
3567 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3568 wakaba 1.79 !!!cp ('t89');
3569 wakaba 1.52 ## As if <head>
3570 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3571 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3572 wakaba 1.123 push @{$self->{open_elements}},
3573     [$self->{head_element}, $el_category->{head}];
3574 wakaba 1.52
3575     ## Reprocess in the "in head" insertion mode...
3576     pop @{$self->{open_elements}};
3577    
3578     ## Reprocess in the "after head" insertion mode...
3579 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3580 wakaba 1.79 !!!cp ('t90');
3581 wakaba 1.52 ## As if </noscript>
3582     pop @{$self->{open_elements}};
3583 wakaba 1.113 !!!parse-error (type => 'in noscript:#character', token => $token);
3584 wakaba 1.1
3585 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
3586     ## As if </head>
3587     pop @{$self->{open_elements}};
3588    
3589     ## Reprocess in the "after head" insertion mode...
3590 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3591 wakaba 1.79 !!!cp ('t91');
3592 wakaba 1.52 pop @{$self->{open_elements}};
3593    
3594     ## Reprocess in the "after head" insertion mode...
3595 wakaba 1.79 } else {
3596     !!!cp ('t92');
3597 wakaba 1.1 }
3598 wakaba 1.52
3599 wakaba 1.123 ## "after head" insertion mode
3600     ## As if <body>
3601     !!!insert-element ('body',, $token);
3602     $self->{insertion_mode} = IN_BODY_IM;
3603     ## reprocess
3604     redo B;
3605     } elsif ($token->{type} == START_TAG_TOKEN) {
3606     if ($token->{tag_name} eq 'head') {
3607     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3608     !!!cp ('t93');
3609     !!!create-element ($self->{head_element}, $token->{tag_name}, $token->{attributes}, $token);
3610     $self->{open_elements}->[-1]->[0]->append_child
3611     ($self->{head_element});
3612     push @{$self->{open_elements}},
3613     [$self->{head_element}, $el_category->{head}];
3614     $self->{insertion_mode} = IN_HEAD_IM;
3615 wakaba 1.125 !!!nack ('t93.1');
3616 wakaba 1.123 !!!next-token;
3617 wakaba 1.52 redo B;
3618 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3619     !!!cp ('t94');
3620     #
3621     } else {
3622     !!!cp ('t95');
3623     !!!parse-error (type => 'in head:head', token => $token); # or in head noscript
3624     ## Ignore the token
3625     !!!nack ('t95.1');
3626     !!!next-token;
3627     redo B;
3628     }
3629     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3630 wakaba 1.79 !!!cp ('t96');
3631 wakaba 1.52 ## As if <head>
3632 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3633 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3634 wakaba 1.123 push @{$self->{open_elements}},
3635     [$self->{head_element}, $el_category->{head}];
3636 wakaba 1.52
3637 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3638 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
3639 wakaba 1.79 } else {
3640     !!!cp ('t97');
3641 wakaba 1.1 }
3642 wakaba 1.52
3643 wakaba 1.49 if ($token->{tag_name} eq 'base') {
3644 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3645 wakaba 1.79 !!!cp ('t98');
3646 wakaba 1.49 ## As if </noscript>
3647     pop @{$self->{open_elements}};
3648 wakaba 1.113 !!!parse-error (type => 'in noscript:base', token => $token);
3649 wakaba 1.49
3650 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3651 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3652 wakaba 1.79 } else {
3653     !!!cp ('t99');
3654 wakaba 1.49 }
3655    
3656     ## NOTE: There is a "as if in head" code clone.
3657 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3658 wakaba 1.79 !!!cp ('t100');
3659 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3660 wakaba 1.123 push @{$self->{open_elements}},
3661     [$self->{head_element}, $el_category->{head}];
3662 wakaba 1.79 } else {
3663     !!!cp ('t101');
3664 wakaba 1.49 }
3665 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3666 wakaba 1.49 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3667 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3668 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3669 wakaba 1.125 !!!nack ('t101.1');
3670 wakaba 1.49 !!!next-token;
3671     redo B;
3672     } elsif ($token->{tag_name} eq 'link') {
3673 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3674 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3675 wakaba 1.79 !!!cp ('t102');
3676 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3677 wakaba 1.123 push @{$self->{open_elements}},
3678     [$self->{head_element}, $el_category->{head}];
3679 wakaba 1.79 } else {
3680     !!!cp ('t103');
3681 wakaba 1.25 }
3682 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3683 wakaba 1.25 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3684 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3685 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3686 wakaba 1.125 !!!ack ('t103.1');
3687 wakaba 1.1 !!!next-token;
3688 wakaba 1.25 redo B;
3689 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
3690     ## NOTE: There is a "as if in head" code clone.
3691 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3692 wakaba 1.79 !!!cp ('t104');
3693 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3694 wakaba 1.123 push @{$self->{open_elements}},
3695     [$self->{head_element}, $el_category->{head}];
3696 wakaba 1.79 } else {
3697     !!!cp ('t105');
3698 wakaba 1.34 }
3699 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3700 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3701 wakaba 1.34
3702     unless ($self->{confident}) {
3703     if ($token->{attributes}->{charset}) { ## TODO: And if supported
3704 wakaba 1.79 !!!cp ('t106');
3705 wakaba 1.63 $self->{change_encoding}
3706 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
3707     $token);
3708 wakaba 1.66
3709     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
3710     ->set_user_data (manakai_has_reference =>
3711     $token->{attributes}->{charset}
3712     ->{has_reference});
3713 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
3714 wakaba 1.35 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
3715 wakaba 1.63 if ($token->{attributes}->{content}->{value}
3716 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
3717     [\x09-\x0D\x20]*=
3718 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
3719     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
3720 wakaba 1.79 !!!cp ('t107');
3721 wakaba 1.63 $self->{change_encoding}
3722 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
3723     $token);
3724 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
3725     ->set_user_data (manakai_has_reference =>
3726     $token->{attributes}->{content}
3727     ->{has_reference});
3728 wakaba 1.79 } else {
3729     !!!cp ('t108');
3730 wakaba 1.63 }
3731 wakaba 1.34 }
3732 wakaba 1.66 } else {
3733     if ($token->{attributes}->{charset}) {
3734 wakaba 1.79 !!!cp ('t109');
3735 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
3736     ->set_user_data (manakai_has_reference =>
3737     $token->{attributes}->{charset}
3738     ->{has_reference});
3739     }
3740 wakaba 1.68 if ($token->{attributes}->{content}) {
3741 wakaba 1.79 !!!cp ('t110');
3742 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
3743     ->set_user_data (manakai_has_reference =>
3744     $token->{attributes}->{content}
3745     ->{has_reference});
3746     }
3747 wakaba 1.34 }
3748    
3749 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3750 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3751 wakaba 1.125 !!!ack ('t110.1');
3752 wakaba 1.34 !!!next-token;
3753     redo B;
3754 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
3755 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3756 wakaba 1.79 !!!cp ('t111');
3757 wakaba 1.49 ## As if </noscript>
3758     pop @{$self->{open_elements}};
3759 wakaba 1.113 !!!parse-error (type => 'in noscript:title', token => $token);
3760 wakaba 1.49
3761 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3762 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3763 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3764 wakaba 1.79 !!!cp ('t112');
3765 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3766 wakaba 1.123 push @{$self->{open_elements}},
3767     [$self->{head_element}, $el_category->{head}];
3768 wakaba 1.79 } else {
3769     !!!cp ('t113');
3770 wakaba 1.25 }
3771 wakaba 1.49
3772     ## NOTE: There is a "as if in head" code clone.
3773 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
3774     : $self->{open_elements}->[-1]->[0];
3775 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
3776 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3777 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3778 wakaba 1.25 redo B;
3779     } elsif ($token->{tag_name} eq 'style') {
3780     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
3781 wakaba 1.54 ## insertion mode IN_HEAD_IM)
3782 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3783 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3784 wakaba 1.79 !!!cp ('t114');
3785 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3786 wakaba 1.123 push @{$self->{open_elements}},
3787     [$self->{head_element}, $el_category->{head}];
3788 wakaba 1.79 } else {
3789     !!!cp ('t115');
3790 wakaba 1.25 }
3791 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
3792 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3793 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3794 wakaba 1.25 redo B;
3795     } elsif ($token->{tag_name} eq 'noscript') {
3796 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
3797 wakaba 1.79 !!!cp ('t116');
3798 wakaba 1.25 ## NOTE: and scripting is disalbed
3799 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3800 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
3801 wakaba 1.125 !!!nack ('t116.1');
3802 wakaba 1.1 !!!next-token;
3803 wakaba 1.25 redo B;
3804 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3805 wakaba 1.79 !!!cp ('t117');
3806 wakaba 1.113 !!!parse-error (type => 'in noscript:noscript', token => $token);
3807 wakaba 1.1 ## Ignore the token
3808 wakaba 1.125 !!!nack ('t117.1');
3809 wakaba 1.41 !!!next-token;
3810 wakaba 1.25 redo B;
3811 wakaba 1.1 } else {
3812 wakaba 1.79 !!!cp ('t118');
3813 wakaba 1.25 #
3814 wakaba 1.1 }
3815 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
3816 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3817 wakaba 1.79 !!!cp ('t119');
3818 wakaba 1.49 ## As if </noscript>
3819     pop @{$self->{open_elements}};
3820 wakaba 1.113 !!!parse-error (type => 'in noscript:script', token => $token);
3821 wakaba 1.49
3822 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3823 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3824 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3825 wakaba 1.79 !!!cp ('t120');
3826 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3827 wakaba 1.123 push @{$self->{open_elements}},
3828     [$self->{head_element}, $el_category->{head}];
3829 wakaba 1.79 } else {
3830     !!!cp ('t121');
3831 wakaba 1.25 }
3832 wakaba 1.49
3833 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3834 wakaba 1.100 $script_start_tag->();
3835     pop @{$self->{open_elements}} # <head>
3836 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3837 wakaba 1.1 redo B;
3838 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
3839 wakaba 1.25 $token->{tag_name} eq 'frameset') {
3840 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3841 wakaba 1.79 !!!cp ('t122');
3842 wakaba 1.49 ## As if </noscript>
3843     pop @{$self->{open_elements}};
3844 wakaba 1.113 !!!parse-error (type => 'in noscript:'.$token->{tag_name}, token => $token);
3845 wakaba 1.49
3846     ## Reprocess in the "in head" insertion mode...
3847     ## As if </head>
3848     pop @{$self->{open_elements}};
3849    
3850     ## Reprocess in the "after head" insertion mode...
3851 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3852 wakaba 1.79 !!!cp ('t124');
3853 wakaba 1.49 pop @{$self->{open_elements}};
3854    
3855     ## Reprocess in the "after head" insertion mode...
3856 wakaba 1.79 } else {
3857     !!!cp ('t125');
3858 wakaba 1.49 }
3859    
3860     ## "after head" insertion mode
3861 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3862 wakaba 1.54 if ($token->{tag_name} eq 'body') {
3863 wakaba 1.79 !!!cp ('t126');
3864 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3865     } elsif ($token->{tag_name} eq 'frameset') {
3866 wakaba 1.79 !!!cp ('t127');
3867 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
3868     } else {
3869     die "$0: tag name: $self->{tag_name}";
3870     }
3871 wakaba 1.125 !!!nack ('t127.1');
3872 wakaba 1.1 !!!next-token;
3873     redo B;
3874     } else {
3875 wakaba 1.79 !!!cp ('t128');
3876 wakaba 1.1 #
3877     }
3878 wakaba 1.49
3879 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3880 wakaba 1.79 !!!cp ('t129');
3881 wakaba 1.49 ## As if </noscript>
3882     pop @{$self->{open_elements}};
3883 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
3884 wakaba 1.49
3885     ## Reprocess in the "in head" insertion mode...
3886     ## As if </head>
3887 wakaba 1.25 pop @{$self->{open_elements}};
3888 wakaba 1.49
3889     ## Reprocess in the "after head" insertion mode...
3890 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3891 wakaba 1.79 !!!cp ('t130');
3892 wakaba 1.49 ## As if </head>
3893 wakaba 1.25 pop @{$self->{open_elements}};
3894 wakaba 1.49
3895     ## Reprocess in the "after head" insertion mode...
3896 wakaba 1.79 } else {
3897     !!!cp ('t131');
3898 wakaba 1.49 }
3899    
3900     ## "after head" insertion mode
3901     ## As if <body>
3902 wakaba 1.116 !!!insert-element ('body',, $token);
3903 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3904 wakaba 1.49 ## reprocess
3905 wakaba 1.125 !!!ack-later;
3906 wakaba 1.49 redo B;
3907 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
3908 wakaba 1.49 if ($token->{tag_name} eq 'head') {
3909 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3910 wakaba 1.79 !!!cp ('t132');
3911 wakaba 1.50 ## As if <head>
3912 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3913 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3914 wakaba 1.123 push @{$self->{open_elements}},
3915     [$self->{head_element}, $el_category->{head}];
3916 wakaba 1.50
3917     ## Reprocess in the "in head" insertion mode...
3918     pop @{$self->{open_elements}};
3919 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3920 wakaba 1.50 !!!next-token;
3921     redo B;
3922 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3923 wakaba 1.79 !!!cp ('t133');
3924 wakaba 1.49 ## As if </noscript>
3925     pop @{$self->{open_elements}};
3926 wakaba 1.113 !!!parse-error (type => 'in noscript:/head', token => $token);
3927 wakaba 1.49
3928     ## Reprocess in the "in head" insertion mode...
3929 wakaba 1.50 pop @{$self->{open_elements}};
3930 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3931 wakaba 1.50 !!!next-token;
3932     redo B;
3933 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3934 wakaba 1.79 !!!cp ('t134');
3935 wakaba 1.49 pop @{$self->{open_elements}};
3936 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3937 wakaba 1.49 !!!next-token;
3938     redo B;
3939     } else {
3940 wakaba 1.79 !!!cp ('t135');
3941 wakaba 1.49 #
3942     }
3943     } elsif ($token->{tag_name} eq 'noscript') {
3944 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3945 wakaba 1.79 !!!cp ('t136');
3946 wakaba 1.49 pop @{$self->{open_elements}};
3947 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3948 wakaba 1.49 !!!next-token;
3949     redo B;
3950 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3951 wakaba 1.79 !!!cp ('t137');
3952 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:noscript', token => $token);
3953 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
3954     !!!next-token;
3955     redo B;
3956 wakaba 1.49 } else {
3957 wakaba 1.79 !!!cp ('t138');
3958 wakaba 1.49 #
3959     }
3960     } elsif ({
3961 wakaba 1.31 body => 1, html => 1,
3962     }->{$token->{tag_name}}) {
3963 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3964 wakaba 1.79 !!!cp ('t139');
3965 wakaba 1.50 ## As if <head>
3966 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3967 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3968 wakaba 1.123 push @{$self->{open_elements}},
3969     [$self->{head_element}, $el_category->{head}];
3970 wakaba 1.50
3971 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3972 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
3973 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3974 wakaba 1.79 !!!cp ('t140');
3975 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
3976 wakaba 1.49 ## Ignore the token
3977     !!!next-token;
3978     redo B;
3979 wakaba 1.79 } else {
3980     !!!cp ('t141');
3981 wakaba 1.49 }
3982 wakaba 1.50
3983     #
3984 wakaba 1.49 } elsif ({
3985 wakaba 1.31 p => 1, br => 1,
3986     }->{$token->{tag_name}}) {
3987 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3988 wakaba 1.79 !!!cp ('t142');
3989 wakaba 1.50 ## As if <head>
3990 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3991 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3992 wakaba 1.123 push @{$self->{open_elements}},
3993     [$self->{head_element}, $el_category->{head}];
3994 wakaba 1.50
3995 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3996 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
3997 wakaba 1.79 } else {
3998     !!!cp ('t143');
3999 wakaba 1.50 }
4000    
4001 wakaba 1.1 #
4002 wakaba 1.25 } else {
4003 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4004 wakaba 1.79 !!!cp ('t144');
4005 wakaba 1.54 #
4006     } else {
4007 wakaba 1.79 !!!cp ('t145');
4008 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4009 wakaba 1.49 ## Ignore the token
4010     !!!next-token;
4011     redo B;
4012     }
4013     }
4014    
4015 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4016 wakaba 1.79 !!!cp ('t146');
4017 wakaba 1.49 ## As if </noscript>
4018     pop @{$self->{open_elements}};
4019 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
4020 wakaba 1.49
4021     ## Reprocess in the "in head" insertion mode...
4022     ## As if </head>
4023     pop @{$self->{open_elements}};
4024    
4025     ## Reprocess in the "after head" insertion mode...
4026 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4027 wakaba 1.79 !!!cp ('t147');
4028 wakaba 1.49 ## As if </head>
4029     pop @{$self->{open_elements}};
4030    
4031     ## Reprocess in the "after head" insertion mode...
4032 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4033 wakaba 1.82 ## ISSUE: This case cannot be reached?
4034 wakaba 1.79 !!!cp ('t148');
4035 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4036 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4037     !!!next-token;
4038     redo B;
4039 wakaba 1.79 } else {
4040     !!!cp ('t149');
4041 wakaba 1.1 }
4042    
4043 wakaba 1.49 ## "after head" insertion mode
4044     ## As if <body>
4045 wakaba 1.116 !!!insert-element ('body',, $token);
4046 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4047 wakaba 1.52 ## reprocess
4048     redo B;
4049 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4050     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4051     !!!cp ('t149.1');
4052    
4053     ## NOTE: As if <head>
4054 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
4055 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
4056     ($self->{head_element});
4057 wakaba 1.123 #push @{$self->{open_elements}},
4058     # [$self->{head_element}, $el_category->{head}];
4059 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
4060     ## NOTE: Reprocess.
4061    
4062     ## NOTE: As if </head>
4063     #pop @{$self->{open_elements}};
4064     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4065     ## NOTE: Reprocess.
4066    
4067     #
4068     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4069     !!!cp ('t149.2');
4070    
4071     ## NOTE: As if </head>
4072     pop @{$self->{open_elements}};
4073     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4074     ## NOTE: Reprocess.
4075    
4076     #
4077     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4078     !!!cp ('t149.3');
4079    
4080 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
4081 wakaba 1.104
4082     ## As if </noscript>
4083     pop @{$self->{open_elements}};
4084     #$self->{insertion_mode} = IN_HEAD_IM;
4085     ## NOTE: Reprocess.
4086    
4087     ## NOTE: As if </head>
4088     pop @{$self->{open_elements}};
4089     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4090     ## NOTE: Reprocess.
4091    
4092     #
4093     } else {
4094     !!!cp ('t149.4');
4095     #
4096     }
4097    
4098     ## NOTE: As if <body>
4099 wakaba 1.116 !!!insert-element ('body',, $token);
4100 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
4101     ## NOTE: Reprocess.
4102     redo B;
4103     } else {
4104     die "$0: $token->{type}: Unknown token type";
4105     }
4106 wakaba 1.52
4107     ## ISSUE: An issue in the spec.
4108 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
4109 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4110 wakaba 1.79 !!!cp ('t150');
4111 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
4112     $reconstruct_active_formatting_elements->($insert_to_current);
4113    
4114     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4115    
4116     !!!next-token;
4117     redo B;
4118 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4119 wakaba 1.52 if ({
4120     caption => 1, col => 1, colgroup => 1, tbody => 1,
4121     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4122     }->{$token->{tag_name}}) {
4123 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4124 wakaba 1.52 ## have an element in table scope
4125 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
4126 wakaba 1.52 my $node = $self->{open_elements}->[$_];
4127 wakaba 1.123 if ($node->[1] & TABLE_CELL_EL) {
4128 wakaba 1.79 !!!cp ('t151');
4129 wakaba 1.108
4130     ## Close the cell
4131 wakaba 1.125 !!!back-token; # <x>
4132 wakaba 1.122 $token = {type => END_TAG_TOKEN,
4133     tag_name => $node->[0]->manakai_local_name,
4134 wakaba 1.114 line => $token->{line},
4135     column => $token->{column}};
4136 wakaba 1.108 redo B;
4137 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4138 wakaba 1.79 !!!cp ('t152');
4139 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
4140     last;
4141 wakaba 1.52 }
4142 wakaba 1.108 }
4143    
4144     !!!cp ('t153');
4145     !!!parse-error (type => 'start tag not allowed',
4146 wakaba 1.113 value => $token->{tag_name}, token => $token);
4147 wakaba 1.108 ## Ignore the token
4148 wakaba 1.125 !!!nack ('t153.1');
4149 wakaba 1.108 !!!next-token;
4150 wakaba 1.52 redo B;
4151 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4152 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4153 wakaba 1.52
4154 wakaba 1.108 ## NOTE: As if </caption>.
4155 wakaba 1.52 ## have a table element in table scope
4156     my $i;
4157 wakaba 1.108 INSCOPE: {
4158     for (reverse 0..$#{$self->{open_elements}}) {
4159     my $node = $self->{open_elements}->[$_];
4160 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4161 wakaba 1.108 !!!cp ('t155');
4162     $i = $_;
4163     last INSCOPE;
4164 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4165 wakaba 1.108 !!!cp ('t156');
4166     last;
4167     }
4168 wakaba 1.52 }
4169 wakaba 1.108
4170     !!!cp ('t157');
4171     !!!parse-error (type => 'start tag not allowed',
4172 wakaba 1.113 value => $token->{tag_name}, token => $token);
4173 wakaba 1.108 ## Ignore the token
4174 wakaba 1.125 !!!nack ('t157.1');
4175 wakaba 1.108 !!!next-token;
4176     redo B;
4177 wakaba 1.52 } # INSCOPE
4178    
4179     ## generate implied end tags
4180 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4181     & END_TAG_OPTIONAL_EL) {
4182 wakaba 1.79 !!!cp ('t158');
4183 wakaba 1.86 pop @{$self->{open_elements}};
4184 wakaba 1.52 }
4185    
4186 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4187 wakaba 1.79 !!!cp ('t159');
4188 wakaba 1.122 !!!parse-error (type => 'not closed',
4189     value => $self->{open_elements}->[-1]->[0]
4190     ->manakai_local_name,
4191     token => $token);
4192 wakaba 1.79 } else {
4193     !!!cp ('t160');
4194 wakaba 1.52 }
4195    
4196     splice @{$self->{open_elements}}, $i;
4197    
4198     $clear_up_to_marker->();
4199    
4200 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4201 wakaba 1.52
4202     ## reprocess
4203 wakaba 1.125 !!!ack-later;
4204 wakaba 1.52 redo B;
4205     } else {
4206 wakaba 1.79 !!!cp ('t161');
4207 wakaba 1.52 #
4208     }
4209     } else {
4210 wakaba 1.79 !!!cp ('t162');
4211 wakaba 1.52 #
4212     }
4213 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4214 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4215 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4216 wakaba 1.43 ## have an element in table scope
4217 wakaba 1.52 my $i;
4218 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4219     my $node = $self->{open_elements}->[$_];
4220 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4221 wakaba 1.79 !!!cp ('t163');
4222 wakaba 1.52 $i = $_;
4223 wakaba 1.43 last INSCOPE;
4224 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4225 wakaba 1.79 !!!cp ('t164');
4226 wakaba 1.43 last INSCOPE;
4227     }
4228     } # INSCOPE
4229 wakaba 1.52 unless (defined $i) {
4230 wakaba 1.79 !!!cp ('t165');
4231 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4232 wakaba 1.43 ## Ignore the token
4233     !!!next-token;
4234     redo B;
4235     }
4236    
4237 wakaba 1.52 ## generate implied end tags
4238 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4239     & END_TAG_OPTIONAL_EL) {
4240 wakaba 1.79 !!!cp ('t166');
4241 wakaba 1.86 pop @{$self->{open_elements}};
4242 wakaba 1.52 }
4243 wakaba 1.86
4244 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
4245     ne $token->{tag_name}) {
4246 wakaba 1.79 !!!cp ('t167');
4247 wakaba 1.122 !!!parse-error (type => 'not closed',
4248     value => $self->{open_elements}->[-1]->[0]
4249     ->manakai_local_name,
4250     token => $token);
4251 wakaba 1.79 } else {
4252     !!!cp ('t168');
4253 wakaba 1.52 }
4254    
4255     splice @{$self->{open_elements}}, $i;
4256    
4257     $clear_up_to_marker->();
4258    
4259 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4260 wakaba 1.52
4261     !!!next-token;
4262 wakaba 1.43 redo B;
4263 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4264 wakaba 1.79 !!!cp ('t169');
4265 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4266 wakaba 1.52 ## Ignore the token
4267     !!!next-token;
4268     redo B;
4269     } else {
4270 wakaba 1.79 !!!cp ('t170');
4271 wakaba 1.52 #
4272     }
4273     } elsif ($token->{tag_name} eq 'caption') {
4274 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
4275 wakaba 1.43 ## have a table element in table scope
4276     my $i;
4277 wakaba 1.108 INSCOPE: {
4278     for (reverse 0..$#{$self->{open_elements}}) {
4279     my $node = $self->{open_elements}->[$_];
4280 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4281 wakaba 1.108 !!!cp ('t171');
4282     $i = $_;
4283     last INSCOPE;
4284 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4285 wakaba 1.108 !!!cp ('t172');
4286     last;
4287     }
4288 wakaba 1.43 }
4289 wakaba 1.108
4290     !!!cp ('t173');
4291     !!!parse-error (type => 'unmatched end tag',
4292 wakaba 1.113 value => $token->{tag_name}, token => $token);
4293 wakaba 1.108 ## Ignore the token
4294     !!!next-token;
4295     redo B;
4296 wakaba 1.43 } # INSCOPE
4297    
4298     ## generate implied end tags
4299 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4300     & END_TAG_OPTIONAL_EL) {
4301 wakaba 1.79 !!!cp ('t174');
4302 wakaba 1.86 pop @{$self->{open_elements}};
4303 wakaba 1.43 }
4304 wakaba 1.52
4305 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4306 wakaba 1.79 !!!cp ('t175');
4307 wakaba 1.122 !!!parse-error (type => 'not closed',
4308     value => $self->{open_elements}->[-1]->[0]
4309     ->manakai_local_name,
4310     token => $token);
4311 wakaba 1.79 } else {
4312     !!!cp ('t176');
4313 wakaba 1.52 }
4314    
4315     splice @{$self->{open_elements}}, $i;
4316    
4317     $clear_up_to_marker->();
4318    
4319 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4320 wakaba 1.52
4321     !!!next-token;
4322     redo B;
4323 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
4324 wakaba 1.79 !!!cp ('t177');
4325 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4326 wakaba 1.52 ## Ignore the token
4327     !!!next-token;
4328     redo B;
4329     } else {
4330 wakaba 1.79 !!!cp ('t178');
4331 wakaba 1.52 #
4332     }
4333     } elsif ({
4334     table => 1, tbody => 1, tfoot => 1,
4335     thead => 1, tr => 1,
4336     }->{$token->{tag_name}} and
4337 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
4338 wakaba 1.52 ## have an element in table scope
4339     my $i;
4340     my $tn;
4341 wakaba 1.108 INSCOPE: {
4342     for (reverse 0..$#{$self->{open_elements}}) {
4343     my $node = $self->{open_elements}->[$_];
4344 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4345 wakaba 1.108 !!!cp ('t179');
4346     $i = $_;
4347    
4348     ## Close the cell
4349 wakaba 1.125 !!!back-token; # </x>
4350 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
4351     line => $token->{line},
4352     column => $token->{column}};
4353 wakaba 1.108 redo B;
4354 wakaba 1.123 } elsif ($node->[1] & TABLE_CELL_EL) {
4355 wakaba 1.108 !!!cp ('t180');
4356 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
4357 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
4358     ## in scope in the stack of open elements by definition.
4359 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4360 wakaba 1.108 ## ISSUE: Can this be reached?
4361     !!!cp ('t181');
4362     last;
4363     }
4364 wakaba 1.52 }
4365 wakaba 1.108
4366 wakaba 1.79 !!!cp ('t182');
4367 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
4368 wakaba 1.113 value => $token->{tag_name}, token => $token);
4369 wakaba 1.52 ## Ignore the token
4370     !!!next-token;
4371     redo B;
4372 wakaba 1.108 } # INSCOPE
4373 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
4374 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4375 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4376 wakaba 1.52
4377     ## As if </caption>
4378     ## have a table element in table scope
4379     my $i;
4380     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4381     my $node = $self->{open_elements}->[$_];
4382 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4383 wakaba 1.79 !!!cp ('t184');
4384 wakaba 1.52 $i = $_;
4385     last INSCOPE;
4386 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4387 wakaba 1.79 !!!cp ('t185');
4388 wakaba 1.52 last INSCOPE;
4389     }
4390     } # INSCOPE
4391     unless (defined $i) {
4392 wakaba 1.79 !!!cp ('t186');
4393 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:caption', token => $token);
4394 wakaba 1.52 ## Ignore the token
4395     !!!next-token;
4396     redo B;
4397     }
4398    
4399     ## generate implied end tags
4400 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4401 wakaba 1.79 !!!cp ('t187');
4402 wakaba 1.86 pop @{$self->{open_elements}};
4403 wakaba 1.52 }
4404    
4405 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4406 wakaba 1.79 !!!cp ('t188');
4407 wakaba 1.122 !!!parse-error (type => 'not closed',
4408     value => $self->{open_elements}->[-1]->[0]
4409     ->manakai_local_name,
4410     token => $token);
4411 wakaba 1.79 } else {
4412     !!!cp ('t189');
4413 wakaba 1.52 }
4414    
4415     splice @{$self->{open_elements}}, $i;
4416    
4417     $clear_up_to_marker->();
4418    
4419 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4420 wakaba 1.52
4421     ## reprocess
4422     redo B;
4423     } elsif ({
4424     body => 1, col => 1, colgroup => 1, html => 1,
4425     }->{$token->{tag_name}}) {
4426 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
4427 wakaba 1.79 !!!cp ('t190');
4428 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4429 wakaba 1.52 ## Ignore the token
4430     !!!next-token;
4431     redo B;
4432     } else {
4433 wakaba 1.79 !!!cp ('t191');
4434 wakaba 1.52 #
4435     }
4436     } elsif ({
4437     tbody => 1, tfoot => 1,
4438     thead => 1, tr => 1,
4439     }->{$token->{tag_name}} and
4440 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4441 wakaba 1.79 !!!cp ('t192');
4442 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4443 wakaba 1.52 ## Ignore the token
4444     !!!next-token;
4445     redo B;
4446     } else {
4447 wakaba 1.79 !!!cp ('t193');
4448 wakaba 1.52 #
4449     }
4450 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4451     for my $entry (@{$self->{open_elements}}) {
4452 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
4453 wakaba 1.104 !!!cp ('t75');
4454 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4455 wakaba 1.104 last;
4456     }
4457     }
4458    
4459     ## Stop parsing.
4460     last B;
4461 wakaba 1.52 } else {
4462     die "$0: $token->{type}: Unknown token type";
4463     }
4464    
4465     $insert = $insert_to_current;
4466     #
4467 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
4468 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
4469 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
4470     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4471     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4472 wakaba 1.52
4473 wakaba 1.95 unless (length $token->{data}) {
4474     !!!cp ('t194');
4475     !!!next-token;
4476     redo B;
4477     } else {
4478     !!!cp ('t195');
4479     }
4480     }
4481 wakaba 1.52
4482 wakaba 1.113 !!!parse-error (type => 'in table:#character', token => $token);
4483 wakaba 1.52
4484     ## As if in body, but insert into foster parent element
4485     ## ISSUE: Spec says that "whenever a node would be inserted
4486     ## into the current node" while characters might not be
4487     ## result in a new Text node.
4488     $reconstruct_active_formatting_elements->($insert_to_foster);
4489    
4490 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
4491 wakaba 1.52 # MUST
4492     my $foster_parent_element;
4493     my $next_sibling;
4494     my $prev_sibling;
4495     OE: for (reverse 0..$#{$self->{open_elements}}) {
4496 wakaba 1.123 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
4497 wakaba 1.52 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4498     if (defined $parent and $parent->node_type == 1) {
4499 wakaba 1.79 !!!cp ('t196');
4500 wakaba 1.52 $foster_parent_element = $parent;
4501     $next_sibling = $self->{open_elements}->[$_]->[0];
4502     $prev_sibling = $next_sibling->previous_sibling;
4503     } else {
4504 wakaba 1.79 !!!cp ('t197');
4505 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4506     $prev_sibling = $foster_parent_element->last_child;
4507     }
4508     last OE;
4509     }
4510     } # OE
4511     $foster_parent_element = $self->{open_elements}->[0]->[0] and
4512     $prev_sibling = $foster_parent_element->last_child
4513     unless defined $foster_parent_element;
4514     if (defined $prev_sibling and
4515     $prev_sibling->node_type == 3) {
4516 wakaba 1.79 !!!cp ('t198');
4517 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
4518     } else {
4519 wakaba 1.79 !!!cp ('t199');
4520 wakaba 1.52 $foster_parent_element->insert_before
4521     ($self->{document}->create_text_node ($token->{data}),
4522     $next_sibling);
4523     }
4524 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
4525     } else {
4526     !!!cp ('t200');
4527     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4528     }
4529 wakaba 1.52
4530 wakaba 1.95 !!!next-token;
4531     redo B;
4532 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
4533 wakaba 1.52 if ({
4534 wakaba 1.54 tr => ($self->{insertion_mode} != IN_ROW_IM),
4535 wakaba 1.52 th => 1, td => 1,
4536     }->{$token->{tag_name}}) {
4537 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_IM) {
4538 wakaba 1.52 ## Clear back to table context
4539 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4540     & TABLE_SCOPING_EL)) {
4541 wakaba 1.79 !!!cp ('t201');
4542 wakaba 1.52 pop @{$self->{open_elements}};
4543 wakaba 1.43 }
4544    
4545 wakaba 1.116 !!!insert-element ('tbody',, $token);
4546 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4547 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4548     }
4549    
4550 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4551 wakaba 1.52 unless ($token->{tag_name} eq 'tr') {
4552 wakaba 1.79 !!!cp ('t202');
4553 wakaba 1.113 !!!parse-error (type => 'missing start tag:tr', token => $token);
4554 wakaba 1.52 }
4555 wakaba 1.43
4556 wakaba 1.52 ## Clear back to table body context
4557 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4558     & TABLE_ROWS_SCOPING_EL)) {
4559 wakaba 1.79 !!!cp ('t203');
4560 wakaba 1.83 ## ISSUE: Can this case be reached?
4561 wakaba 1.52 pop @{$self->{open_elements}};
4562     }
4563 wakaba 1.43
4564 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4565 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4566 wakaba 1.79 !!!cp ('t204');
4567 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4568 wakaba 1.125 !!!nack ('t204');
4569 wakaba 1.52 !!!next-token;
4570     redo B;
4571     } else {
4572 wakaba 1.79 !!!cp ('t205');
4573 wakaba 1.116 !!!insert-element ('tr',, $token);
4574 wakaba 1.52 ## reprocess in the "in row" insertion mode
4575     }
4576 wakaba 1.79 } else {
4577     !!!cp ('t206');
4578 wakaba 1.52 }
4579    
4580     ## Clear back to table row context
4581 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4582     & TABLE_ROW_SCOPING_EL)) {
4583 wakaba 1.79 !!!cp ('t207');
4584 wakaba 1.52 pop @{$self->{open_elements}};
4585 wakaba 1.43 }
4586 wakaba 1.52
4587 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4588 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
4589 wakaba 1.52
4590     push @$active_formatting_elements, ['#marker', ''];
4591    
4592 wakaba 1.125 !!!nack ('t207.1');
4593 wakaba 1.52 !!!next-token;
4594     redo B;
4595     } elsif ({
4596     caption => 1, col => 1, colgroup => 1,
4597     tbody => 1, tfoot => 1, thead => 1,
4598 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
4599 wakaba 1.52 }->{$token->{tag_name}}) {
4600 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4601 wakaba 1.52 ## As if </tr>
4602 wakaba 1.43 ## have an element in table scope
4603     my $i;
4604     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4605     my $node = $self->{open_elements}->[$_];
4606 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
4607 wakaba 1.79 !!!cp ('t208');
4608 wakaba 1.43 $i = $_;
4609     last INSCOPE;
4610 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4611 wakaba 1.79 !!!cp ('t209');
4612 wakaba 1.43 last INSCOPE;
4613     }
4614     } # INSCOPE
4615 wakaba 1.79 unless (defined $i) {
4616 wakaba 1.125 !!!cp ('t210');
4617 wakaba 1.83 ## TODO: This type is wrong.
4618 wakaba 1.125 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name}, token => $token);
4619 wakaba 1.52 ## Ignore the token
4620 wakaba 1.125 !!!nack ('t210.1');
4621 wakaba 1.52 !!!next-token;
4622 wakaba 1.43 redo B;
4623     }
4624    
4625 wakaba 1.52 ## Clear back to table row context
4626 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4627     & TABLE_ROW_SCOPING_EL)) {
4628 wakaba 1.79 !!!cp ('t211');
4629 wakaba 1.83 ## ISSUE: Can this case be reached?
4630 wakaba 1.52 pop @{$self->{open_elements}};
4631 wakaba 1.1 }
4632 wakaba 1.43
4633 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4634 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4635 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4636 wakaba 1.79 !!!cp ('t212');
4637 wakaba 1.52 ## reprocess
4638 wakaba 1.125 !!!ack-later;
4639 wakaba 1.52 redo B;
4640     } else {
4641 wakaba 1.79 !!!cp ('t213');
4642 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4643     }
4644 wakaba 1.1 }
4645 wakaba 1.52
4646 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4647 wakaba 1.52 ## have an element in table scope
4648 wakaba 1.43 my $i;
4649     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4650     my $node = $self->{open_elements}->[$_];
4651 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
4652 wakaba 1.79 !!!cp ('t214');
4653 wakaba 1.43 $i = $_;
4654     last INSCOPE;
4655 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4656 wakaba 1.79 !!!cp ('t215');
4657 wakaba 1.43 last INSCOPE;
4658     }
4659     } # INSCOPE
4660 wakaba 1.52 unless (defined $i) {
4661 wakaba 1.79 !!!cp ('t216');
4662 wakaba 1.82 ## TODO: This erorr type ios wrong.
4663 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4664 wakaba 1.52 ## Ignore the token
4665 wakaba 1.125 !!!nack ('t216.1');
4666 wakaba 1.52 !!!next-token;
4667 wakaba 1.43 redo B;
4668     }
4669 wakaba 1.52
4670     ## Clear back to table body context
4671 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4672     & TABLE_ROWS_SCOPING_EL)) {
4673 wakaba 1.79 !!!cp ('t217');
4674 wakaba 1.83 ## ISSUE: Can this state be reached?
4675 wakaba 1.52 pop @{$self->{open_elements}};
4676 wakaba 1.43 }
4677    
4678 wakaba 1.52 ## As if <{current node}>
4679     ## have an element in table scope
4680     ## true by definition
4681 wakaba 1.43
4682 wakaba 1.52 ## Clear back to table body context
4683     ## nop by definition
4684 wakaba 1.43
4685 wakaba 1.52 pop @{$self->{open_elements}};
4686 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4687 wakaba 1.52 ## reprocess in "in table" insertion mode...
4688 wakaba 1.79 } else {
4689     !!!cp ('t218');
4690 wakaba 1.52 }
4691    
4692     if ($token->{tag_name} eq 'col') {
4693     ## Clear back to table context
4694 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4695     & TABLE_SCOPING_EL)) {
4696 wakaba 1.79 !!!cp ('t219');
4697 wakaba 1.83 ## ISSUE: Can this state be reached?
4698 wakaba 1.52 pop @{$self->{open_elements}};
4699     }
4700 wakaba 1.43
4701 wakaba 1.116 !!!insert-element ('colgroup',, $token);
4702 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
4703 wakaba 1.52 ## reprocess
4704 wakaba 1.125 !!!ack-later;
4705 wakaba 1.43 redo B;
4706 wakaba 1.52 } elsif ({
4707     caption => 1,
4708     colgroup => 1,
4709     tbody => 1, tfoot => 1, thead => 1,
4710     }->{$token->{tag_name}}) {
4711     ## Clear back to table context
4712 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4713     & TABLE_SCOPING_EL)) {
4714 wakaba 1.79 !!!cp ('t220');
4715 wakaba 1.83 ## ISSUE: Can this state be reached?
4716 wakaba 1.52 pop @{$self->{open_elements}};
4717 wakaba 1.1 }
4718 wakaba 1.52
4719     push @$active_formatting_elements, ['#marker', '']
4720     if $token->{tag_name} eq 'caption';
4721    
4722 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4723 wakaba 1.52 $self->{insertion_mode} = {
4724 wakaba 1.54 caption => IN_CAPTION_IM,
4725     colgroup => IN_COLUMN_GROUP_IM,
4726     tbody => IN_TABLE_BODY_IM,
4727     tfoot => IN_TABLE_BODY_IM,
4728     thead => IN_TABLE_BODY_IM,
4729 wakaba 1.52 }->{$token->{tag_name}};
4730 wakaba 1.1 !!!next-token;
4731 wakaba 1.125 !!!nack ('t220.1');
4732 wakaba 1.1 redo B;
4733 wakaba 1.52 } else {
4734     die "$0: in table: <>: $token->{tag_name}";
4735 wakaba 1.1 }
4736 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
4737 wakaba 1.122 !!!parse-error (type => 'not closed',
4738     value => $self->{open_elements}->[-1]->[0]
4739     ->manakai_local_name,
4740     token => $token);
4741 wakaba 1.1
4742 wakaba 1.52 ## As if </table>
4743 wakaba 1.1 ## have a table element in table scope
4744     my $i;
4745 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4746     my $node = $self->{open_elements}->[$_];
4747 wakaba 1.123 if ($node->[1] & TABLE_EL) {
4748 wakaba 1.79 !!!cp ('t221');
4749 wakaba 1.1 $i = $_;
4750     last INSCOPE;
4751 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4752 wakaba 1.79 !!!cp ('t222');
4753 wakaba 1.1 last INSCOPE;
4754     }
4755     } # INSCOPE
4756     unless (defined $i) {
4757 wakaba 1.79 !!!cp ('t223');
4758 wakaba 1.83 ## TODO: The following is wrong, maybe.
4759 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:table', token => $token);
4760 wakaba 1.52 ## Ignore tokens </table><table>
4761 wakaba 1.125 !!!nack ('t223.1');
4762 wakaba 1.1 !!!next-token;
4763     redo B;
4764     }
4765    
4766 wakaba 1.106 ## TODO: Followings are removed from the latest spec.
4767 wakaba 1.1 ## generate implied end tags
4768 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4769 wakaba 1.79 !!!cp ('t224');
4770 wakaba 1.86 pop @{$self->{open_elements}};
4771 wakaba 1.1 }
4772    
4773 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & TABLE_EL) {
4774 wakaba 1.79 !!!cp ('t225');
4775 wakaba 1.122 ## NOTE: |<table><tr><table>|
4776     !!!parse-error (type => 'not closed',
4777     value => $self->{open_elements}->[-1]->[0]
4778     ->manakai_local_name,
4779     token => $token);
4780 wakaba 1.79 } else {
4781     !!!cp ('t226');
4782 wakaba 1.1 }
4783    
4784 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4785 wakaba 1.95 pop @{$open_tables};
4786 wakaba 1.1
4787 wakaba 1.52 $self->_reset_insertion_mode;
4788 wakaba 1.1
4789 wakaba 1.125 ## reprocess
4790     !!!ack-later;
4791     redo B;
4792 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
4793     if (not $open_tables->[-1]->[1]) { # tainted
4794     !!!cp ('t227.8');
4795     ## NOTE: This is a "as if in head" code clone.
4796     $parse_rcdata->(CDATA_CONTENT_MODEL);
4797     redo B;
4798     } else {
4799     !!!cp ('t227.7');
4800     #
4801     }
4802     } elsif ($token->{tag_name} eq 'script') {
4803     if (not $open_tables->[-1]->[1]) { # tainted
4804     !!!cp ('t227.6');
4805     ## NOTE: This is a "as if in head" code clone.
4806     $script_start_tag->();
4807     redo B;
4808     } else {
4809     !!!cp ('t227.5');
4810     #
4811     }
4812 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
4813     if (not $open_tables->[-1]->[1]) { # tainted
4814     if ($token->{attributes}->{type}) { ## TODO: case
4815     my $type = lc $token->{attributes}->{type}->{value};
4816     if ($type eq 'hidden') {
4817     !!!cp ('t227.3');
4818 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
4819 wakaba 1.98
4820 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4821 wakaba 1.98
4822     ## TODO: form element pointer
4823    
4824     pop @{$self->{open_elements}};
4825    
4826     !!!next-token;
4827 wakaba 1.125 !!!ack ('t227.2.1');
4828 wakaba 1.98 redo B;
4829     } else {
4830     !!!cp ('t227.2');
4831     #
4832     }
4833     } else {
4834     !!!cp ('t227.1');
4835     #
4836     }
4837     } else {
4838     !!!cp ('t227.4');
4839     #
4840     }
4841 wakaba 1.58 } else {
4842 wakaba 1.79 !!!cp ('t227');
4843 wakaba 1.58 #
4844     }
4845 wakaba 1.98
4846 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
4847 wakaba 1.98
4848     $insert = $insert_to_foster;
4849     #
4850 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
4851 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
4852 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
4853 wakaba 1.52 ## have an element in table scope
4854     my $i;
4855     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4856     my $node = $self->{open_elements}->[$_];
4857 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
4858 wakaba 1.79 !!!cp ('t228');
4859 wakaba 1.52 $i = $_;
4860     last INSCOPE;
4861 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4862 wakaba 1.79 !!!cp ('t229');
4863 wakaba 1.52 last INSCOPE;
4864     }
4865     } # INSCOPE
4866     unless (defined $i) {
4867 wakaba 1.79 !!!cp ('t230');
4868 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4869 wakaba 1.52 ## Ignore the token
4870 wakaba 1.125 !!!nack ('t230.1');
4871 wakaba 1.42 !!!next-token;
4872     redo B;
4873 wakaba 1.79 } else {
4874     !!!cp ('t232');
4875 wakaba 1.42 }
4876    
4877 wakaba 1.52 ## Clear back to table row context
4878 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4879     & TABLE_ROW_SCOPING_EL)) {
4880 wakaba 1.79 !!!cp ('t231');
4881 wakaba 1.83 ## ISSUE: Can this state be reached?
4882 wakaba 1.52 pop @{$self->{open_elements}};
4883     }
4884 wakaba 1.42
4885 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4886 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4887 wakaba 1.52 !!!next-token;
4888 wakaba 1.125 !!!nack ('t231.1');
4889 wakaba 1.52 redo B;
4890     } elsif ($token->{tag_name} eq 'table') {
4891 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4892 wakaba 1.52 ## As if </tr>
4893     ## have an element in table scope
4894     my $i;
4895     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4896     my $node = $self->{open_elements}->[$_];
4897 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
4898 wakaba 1.79 !!!cp ('t233');
4899 wakaba 1.52 $i = $_;
4900     last INSCOPE;
4901 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4902 wakaba 1.79 !!!cp ('t234');
4903 wakaba 1.52 last INSCOPE;
4904 wakaba 1.42 }
4905 wakaba 1.52 } # INSCOPE
4906     unless (defined $i) {
4907 wakaba 1.79 !!!cp ('t235');
4908 wakaba 1.83 ## TODO: The following is wrong.
4909 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{type}, token => $token);
4910 wakaba 1.52 ## Ignore the token
4911 wakaba 1.125 !!!nack ('t236.1');
4912 wakaba 1.52 !!!next-token;
4913     redo B;
4914 wakaba 1.42 }
4915 wakaba 1.52
4916     ## Clear back to table row context
4917 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4918     & TABLE_ROW_SCOPING_EL)) {
4919 wakaba 1.79 !!!cp ('t236');
4920 wakaba 1.83 ## ISSUE: Can this state be reached?
4921 wakaba 1.46 pop @{$self->{open_elements}};
4922 wakaba 1.1 }
4923 wakaba 1.46
4924 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4925 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4926 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
4927 wakaba 1.1 }
4928    
4929 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4930 wakaba 1.52 ## have an element in table scope
4931     my $i;
4932     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4933     my $node = $self->{open_elements}->[$_];
4934 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
4935 wakaba 1.79 !!!cp ('t237');
4936 wakaba 1.52 $i = $_;
4937     last INSCOPE;
4938 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4939 wakaba 1.79 !!!cp ('t238');
4940 wakaba 1.52 last INSCOPE;
4941     }
4942     } # INSCOPE
4943     unless (defined $i) {
4944 wakaba 1.79 !!!cp ('t239');
4945 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4946 wakaba 1.52 ## Ignore the token
4947 wakaba 1.125 !!!nack ('t239.1');
4948 wakaba 1.52 !!!next-token;
4949     redo B;
4950 wakaba 1.47 }
4951    
4952     ## Clear back to table body context
4953 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4954     & TABLE_ROWS_SCOPING_EL)) {
4955 wakaba 1.79 !!!cp ('t240');
4956 wakaba 1.47 pop @{$self->{open_elements}};
4957     }
4958    
4959 wakaba 1.52 ## As if <{current node}>
4960     ## have an element in table scope
4961     ## true by definition
4962    
4963     ## Clear back to table body context
4964     ## nop by definition
4965    
4966     pop @{$self->{open_elements}};
4967 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4968 wakaba 1.52 ## reprocess in the "in table" insertion mode...
4969     }
4970    
4971 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
4972     ## When you edit the code fragment below, please ensure that
4973     ## the code for <table> in the "in table" insertion mode
4974     ## is synced with it.
4975    
4976 wakaba 1.52 ## have a table element in table scope
4977     my $i;
4978     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4979     my $node = $self->{open_elements}->[$_];
4980 wakaba 1.123 if ($node->[1] & TABLE_EL) {
4981 wakaba 1.79 !!!cp ('t241');
4982 wakaba 1.52 $i = $_;
4983     last INSCOPE;
4984 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4985 wakaba 1.79 !!!cp ('t242');
4986 wakaba 1.52 last INSCOPE;
4987 wakaba 1.47 }
4988 wakaba 1.52 } # INSCOPE
4989     unless (defined $i) {
4990 wakaba 1.79 !!!cp ('t243');
4991 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4992 wakaba 1.52 ## Ignore the token
4993 wakaba 1.125 !!!nack ('t243.1');
4994 wakaba 1.52 !!!next-token;
4995     redo B;
4996 wakaba 1.3 }
4997 wakaba 1.52
4998     splice @{$self->{open_elements}}, $i;
4999 wakaba 1.95 pop @{$open_tables};
5000 wakaba 1.1
5001 wakaba 1.52 $self->_reset_insertion_mode;
5002 wakaba 1.47
5003     !!!next-token;
5004     redo B;
5005     } elsif ({
5006 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
5007 wakaba 1.52 }->{$token->{tag_name}} and
5008 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
5009 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5010 wakaba 1.52 ## have an element in table scope
5011     my $i;
5012     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5013     my $node = $self->{open_elements}->[$_];
5014 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5015 wakaba 1.79 !!!cp ('t247');
5016 wakaba 1.52 $i = $_;
5017     last INSCOPE;
5018 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5019 wakaba 1.79 !!!cp ('t248');
5020 wakaba 1.52 last INSCOPE;
5021     }
5022     } # INSCOPE
5023     unless (defined $i) {
5024 wakaba 1.79 !!!cp ('t249');
5025 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5026 wakaba 1.52 ## Ignore the token
5027 wakaba 1.125 !!!nack ('t249.1');
5028 wakaba 1.52 !!!next-token;
5029     redo B;
5030     }
5031    
5032 wakaba 1.48 ## As if </tr>
5033     ## have an element in table scope
5034     my $i;
5035     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5036     my $node = $self->{open_elements}->[$_];
5037 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5038 wakaba 1.79 !!!cp ('t250');
5039 wakaba 1.48 $i = $_;
5040     last INSCOPE;
5041 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5042 wakaba 1.79 !!!cp ('t251');
5043 wakaba 1.48 last INSCOPE;
5044     }
5045     } # INSCOPE
5046 wakaba 1.52 unless (defined $i) {
5047 wakaba 1.79 !!!cp ('t252');
5048 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:tr', token => $token);
5049 wakaba 1.52 ## Ignore the token
5050 wakaba 1.125 !!!nack ('t252.1');
5051 wakaba 1.52 !!!next-token;
5052     redo B;
5053     }
5054 wakaba 1.48
5055     ## Clear back to table row context
5056 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5057     & TABLE_ROW_SCOPING_EL)) {
5058 wakaba 1.79 !!!cp ('t253');
5059 wakaba 1.83 ## ISSUE: Can this case be reached?
5060 wakaba 1.48 pop @{$self->{open_elements}};
5061     }
5062    
5063     pop @{$self->{open_elements}}; # tr
5064 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5065 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5066     }
5067    
5068     ## have an element in table scope
5069     my $i;
5070     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5071     my $node = $self->{open_elements}->[$_];
5072 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5073 wakaba 1.79 !!!cp ('t254');
5074 wakaba 1.52 $i = $_;
5075     last INSCOPE;
5076 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5077 wakaba 1.79 !!!cp ('t255');
5078 wakaba 1.52 last INSCOPE;
5079     }
5080     } # INSCOPE
5081     unless (defined $i) {
5082 wakaba 1.79 !!!cp ('t256');
5083 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5084 wakaba 1.52 ## Ignore the token
5085 wakaba 1.125 !!!nack ('t256.1');
5086 wakaba 1.52 !!!next-token;
5087     redo B;
5088     }
5089    
5090     ## Clear back to table body context
5091 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5092     & TABLE_ROWS_SCOPING_EL)) {
5093 wakaba 1.79 !!!cp ('t257');
5094 wakaba 1.83 ## ISSUE: Can this case be reached?
5095 wakaba 1.52 pop @{$self->{open_elements}};
5096     }
5097    
5098     pop @{$self->{open_elements}};
5099 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5100 wakaba 1.125 !!!nack ('t257.1');
5101 wakaba 1.52 !!!next-token;
5102     redo B;
5103     } elsif ({
5104     body => 1, caption => 1, col => 1, colgroup => 1,
5105     html => 1, td => 1, th => 1,
5106 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5107     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
5108 wakaba 1.52 }->{$token->{tag_name}}) {
5109 wakaba 1.125 !!!cp ('t258');
5110     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5111     ## Ignore the token
5112     !!!nack ('t258.1');
5113     !!!next-token;
5114     redo B;
5115 wakaba 1.58 } else {
5116 wakaba 1.79 !!!cp ('t259');
5117 wakaba 1.113 !!!parse-error (type => 'in table:/'.$token->{tag_name}, token => $token);
5118 wakaba 1.52
5119 wakaba 1.58 $insert = $insert_to_foster;
5120     #
5121     }
5122 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5123 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5124 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5125 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5126 wakaba 1.104 !!!cp ('t259.1');
5127 wakaba 1.105 #
5128 wakaba 1.104 } else {
5129     !!!cp ('t259.2');
5130 wakaba 1.105 #
5131 wakaba 1.104 }
5132    
5133     ## Stop parsing
5134     last B;
5135 wakaba 1.58 } else {
5136     die "$0: $token->{type}: Unknown token type";
5137     }
5138 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
5139 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5140 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5141     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5142     unless (length $token->{data}) {
5143 wakaba 1.79 !!!cp ('t260');
5144 wakaba 1.52 !!!next-token;
5145     redo B;
5146     }
5147     }
5148    
5149 wakaba 1.79 !!!cp ('t261');
5150 wakaba 1.52 #
5151 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5152 wakaba 1.52 if ($token->{tag_name} eq 'col') {
5153 wakaba 1.79 !!!cp ('t262');
5154 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5155 wakaba 1.52 pop @{$self->{open_elements}};
5156 wakaba 1.125 !!!ack ('t262.1');
5157 wakaba 1.52 !!!next-token;
5158     redo B;
5159     } else {
5160 wakaba 1.79 !!!cp ('t263');
5161 wakaba 1.52 #
5162     }
5163 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5164 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
5165 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5166 wakaba 1.79 !!!cp ('t264');
5167 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5168 wakaba 1.52 ## Ignore the token
5169     !!!next-token;
5170     redo B;
5171     } else {
5172 wakaba 1.79 !!!cp ('t265');
5173 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5174 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5175 wakaba 1.52 !!!next-token;
5176     redo B;
5177     }
5178     } elsif ($token->{tag_name} eq 'col') {
5179 wakaba 1.79 !!!cp ('t266');
5180 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:col', token => $token);
5181 wakaba 1.52 ## Ignore the token
5182     !!!next-token;
5183     redo B;
5184     } else {
5185 wakaba 1.79 !!!cp ('t267');
5186 wakaba 1.52 #
5187     }
5188 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5189 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
5190 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5191     !!!cp ('t270.2');
5192     ## Stop parsing.
5193     last B;
5194     } else {
5195     ## NOTE: As if </colgroup>.
5196     !!!cp ('t270.1');
5197     pop @{$self->{open_elements}}; # colgroup
5198     $self->{insertion_mode} = IN_TABLE_IM;
5199     ## Reprocess.
5200     redo B;
5201     }
5202     } else {
5203     die "$0: $token->{type}: Unknown token type";
5204     }
5205 wakaba 1.52
5206     ## As if </colgroup>
5207 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5208 wakaba 1.79 !!!cp ('t269');
5209 wakaba 1.104 ## TODO: Wrong error type?
5210 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5211 wakaba 1.52 ## Ignore the token
5212 wakaba 1.125 !!!nack ('t269.1');
5213 wakaba 1.52 !!!next-token;
5214     redo B;
5215     } else {
5216 wakaba 1.79 !!!cp ('t270');
5217 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5218 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5219 wakaba 1.125 !!!ack-later;
5220 wakaba 1.52 ## reprocess
5221     redo B;
5222     }
5223 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
5224 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5225 wakaba 1.79 !!!cp ('t271');
5226 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5227     !!!next-token;
5228     redo B;
5229     } elsif ($token->{type} == START_TAG_TOKEN) {
5230 wakaba 1.123 if ($token->{tag_name} eq 'option') {
5231     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5232     !!!cp ('t272');
5233     ## As if </option>
5234     pop @{$self->{open_elements}};
5235     } else {
5236     !!!cp ('t273');
5237     }
5238 wakaba 1.52
5239 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5240 wakaba 1.125 !!!nack ('t273.1');
5241 wakaba 1.123 !!!next-token;
5242     redo B;
5243     } elsif ($token->{tag_name} eq 'optgroup') {
5244     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5245     !!!cp ('t274');
5246     ## As if </option>
5247     pop @{$self->{open_elements}};
5248     } else {
5249     !!!cp ('t275');
5250     }
5251 wakaba 1.52
5252 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5253     !!!cp ('t276');
5254     ## As if </optgroup>
5255     pop @{$self->{open_elements}};
5256     } else {
5257     !!!cp ('t277');
5258     }
5259 wakaba 1.52
5260 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5261 wakaba 1.125 !!!nack ('t277.1');
5262 wakaba 1.123 !!!next-token;
5263     redo B;
5264 wakaba 1.101 } elsif ($token->{tag_name} eq 'select' or
5265     $token->{tag_name} eq 'input' or
5266     ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5267     {
5268     caption => 1, table => 1,
5269     tbody => 1, tfoot => 1, thead => 1,
5270     tr => 1, td => 1, th => 1,
5271     }->{$token->{tag_name}})) {
5272     ## TODO: The type below is not good - <select> is replaced by </select>
5273 wakaba 1.113 !!!parse-error (type => 'not closed:select', token => $token);
5274 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
5275     ## as if there were </select> (otherwise).
5276 wakaba 1.123 ## have an element in table scope
5277     my $i;
5278     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5279     my $node = $self->{open_elements}->[$_];
5280     if ($node->[1] & SELECT_EL) {
5281     !!!cp ('t278');
5282     $i = $_;
5283     last INSCOPE;
5284     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5285     !!!cp ('t279');
5286     last INSCOPE;
5287     }
5288     } # INSCOPE
5289     unless (defined $i) {
5290     !!!cp ('t280');
5291     !!!parse-error (type => 'unmatched end tag:select', token => $token);
5292     ## Ignore the token
5293 wakaba 1.125 !!!nack ('t280.1');
5294 wakaba 1.123 !!!next-token;
5295     redo B;
5296     }
5297 wakaba 1.52
5298 wakaba 1.123 !!!cp ('t281');
5299     splice @{$self->{open_elements}}, $i;
5300 wakaba 1.52
5301 wakaba 1.123 $self->_reset_insertion_mode;
5302 wakaba 1.47
5303 wakaba 1.101 if ($token->{tag_name} eq 'select') {
5304 wakaba 1.125 !!!nack ('t281.2');
5305 wakaba 1.101 !!!next-token;
5306     redo B;
5307     } else {
5308     !!!cp ('t281.1');
5309 wakaba 1.125 !!!ack-later;
5310 wakaba 1.101 ## Reprocess the token.
5311     redo B;
5312     }
5313 wakaba 1.58 } else {
5314 wakaba 1.79 !!!cp ('t282');
5315 wakaba 1.113 !!!parse-error (type => 'in select:'.$token->{tag_name}, token => $token);
5316 wakaba 1.58 ## Ignore the token
5317 wakaba 1.125 !!!nack ('t282.1');
5318 wakaba 1.58 !!!next-token;
5319     redo B;
5320     }
5321     } elsif ($token->{type} == END_TAG_TOKEN) {
5322 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
5323     if ($self->{open_elements}->[-1]->[1] & OPTION_EL and
5324     $self->{open_elements}->[-2]->[1] & OPTGROUP_EL) {
5325     !!!cp ('t283');
5326     ## As if </option>
5327     splice @{$self->{open_elements}}, -2;
5328     } elsif ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5329     !!!cp ('t284');
5330     pop @{$self->{open_elements}};
5331     } else {
5332     !!!cp ('t285');
5333     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5334     ## Ignore the token
5335     }
5336 wakaba 1.125 !!!nack ('t285.1');
5337 wakaba 1.123 !!!next-token;
5338     redo B;
5339     } elsif ($token->{tag_name} eq 'option') {
5340     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5341     !!!cp ('t286');
5342     pop @{$self->{open_elements}};
5343     } else {
5344     !!!cp ('t287');
5345     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5346     ## Ignore the token
5347     }
5348 wakaba 1.125 !!!nack ('t287.1');
5349 wakaba 1.123 !!!next-token;
5350     redo B;
5351     } elsif ($token->{tag_name} eq 'select') {
5352     ## have an element in table scope
5353     my $i;
5354     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5355     my $node = $self->{open_elements}->[$_];
5356     if ($node->[1] & SELECT_EL) {
5357     !!!cp ('t288');
5358     $i = $_;
5359     last INSCOPE;
5360     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5361     !!!cp ('t289');
5362     last INSCOPE;
5363     }
5364     } # INSCOPE
5365     unless (defined $i) {
5366     !!!cp ('t290');
5367     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5368     ## Ignore the token
5369 wakaba 1.125 !!!nack ('t290.1');
5370 wakaba 1.123 !!!next-token;
5371     redo B;
5372     }
5373 wakaba 1.52
5374 wakaba 1.123 !!!cp ('t291');
5375     splice @{$self->{open_elements}}, $i;
5376 wakaba 1.52
5377 wakaba 1.123 $self->_reset_insertion_mode;
5378 wakaba 1.52
5379 wakaba 1.125 !!!nack ('t291.1');
5380 wakaba 1.123 !!!next-token;
5381     redo B;
5382 wakaba 1.101 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5383     {
5384     caption => 1, table => 1, tbody => 1,
5385     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
5386     }->{$token->{tag_name}}) {
5387 wakaba 1.83 ## TODO: The following is wrong?
5388 wakaba 1.123 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5389 wakaba 1.52
5390 wakaba 1.123 ## have an element in table scope
5391     my $i;
5392     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5393     my $node = $self->{open_elements}->[$_];
5394     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5395     !!!cp ('t292');
5396     $i = $_;
5397     last INSCOPE;
5398     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5399     !!!cp ('t293');
5400     last INSCOPE;
5401     }
5402     } # INSCOPE
5403     unless (defined $i) {
5404     !!!cp ('t294');
5405     ## Ignore the token
5406 wakaba 1.125 !!!nack ('t294.1');
5407 wakaba 1.123 !!!next-token;
5408     redo B;
5409     }
5410 wakaba 1.52
5411 wakaba 1.123 ## As if </select>
5412     ## have an element in table scope
5413     undef $i;
5414     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5415     my $node = $self->{open_elements}->[$_];
5416     if ($node->[1] & SELECT_EL) {
5417     !!!cp ('t295');
5418     $i = $_;
5419     last INSCOPE;
5420     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5421 wakaba 1.83 ## ISSUE: Can this state be reached?
5422 wakaba 1.123 !!!cp ('t296');
5423     last INSCOPE;
5424     }
5425     } # INSCOPE
5426     unless (defined $i) {
5427     !!!cp ('t297');
5428 wakaba 1.83 ## TODO: The following error type is correct?
5429 wakaba 1.123 !!!parse-error (type => 'unmatched end tag:select', token => $token);
5430     ## Ignore the </select> token
5431 wakaba 1.125 !!!nack ('t297.1');
5432 wakaba 1.123 !!!next-token; ## TODO: ok?
5433     redo B;
5434     }
5435 wakaba 1.52
5436 wakaba 1.123 !!!cp ('t298');
5437     splice @{$self->{open_elements}}, $i;
5438 wakaba 1.52
5439 wakaba 1.123 $self->_reset_insertion_mode;
5440 wakaba 1.52
5441 wakaba 1.125 !!!ack-later;
5442 wakaba 1.123 ## reprocess
5443     redo B;
5444 wakaba 1.58 } else {
5445 wakaba 1.79 !!!cp ('t299');
5446 wakaba 1.113 !!!parse-error (type => 'in select:/'.$token->{tag_name}, token => $token);
5447 wakaba 1.52 ## Ignore the token
5448 wakaba 1.125 !!!nack ('t299.3');
5449 wakaba 1.52 !!!next-token;
5450     redo B;
5451 wakaba 1.58 }
5452 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5453 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5454 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5455     !!!cp ('t299.1');
5456 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5457 wakaba 1.104 } else {
5458     !!!cp ('t299.2');
5459     }
5460    
5461     ## Stop parsing.
5462     last B;
5463 wakaba 1.58 } else {
5464     die "$0: $token->{type}: Unknown token type";
5465     }
5466 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
5467 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5468 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5469     my $data = $1;
5470     ## As if in body
5471     $reconstruct_active_formatting_elements->($insert_to_current);
5472    
5473     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5474    
5475     unless (length $token->{data}) {
5476 wakaba 1.79 !!!cp ('t300');
5477 wakaba 1.52 !!!next-token;
5478     redo B;
5479     }
5480     }
5481    
5482 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5483 wakaba 1.79 !!!cp ('t301');
5484 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
5485 wakaba 1.52
5486 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5487 wakaba 1.79 } else {
5488     !!!cp ('t302');
5489 wakaba 1.52 }
5490    
5491     ## "after body" insertion mode
5492 wakaba 1.113 !!!parse-error (type => 'after body:#character', token => $token);
5493 wakaba 1.52
5494 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5495 wakaba 1.52 ## reprocess
5496     redo B;
5497 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5498 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5499 wakaba 1.79 !!!cp ('t303');
5500 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
5501 wakaba 1.52
5502 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5503 wakaba 1.79 } else {
5504     !!!cp ('t304');
5505 wakaba 1.52 }
5506    
5507     ## "after body" insertion mode
5508 wakaba 1.113 !!!parse-error (type => 'after body:'.$token->{tag_name}, token => $token);
5509 wakaba 1.52
5510 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5511 wakaba 1.125 !!!ack-later;
5512 wakaba 1.52 ## reprocess
5513     redo B;
5514 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5515 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5516 wakaba 1.79 !!!cp ('t305');
5517 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
5518 wakaba 1.52
5519 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5520 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5521 wakaba 1.79 } else {
5522     !!!cp ('t306');
5523 wakaba 1.52 }
5524    
5525     ## "after body" insertion mode
5526     if ($token->{tag_name} eq 'html') {
5527     if (defined $self->{inner_html_node}) {
5528 wakaba 1.79 !!!cp ('t307');
5529 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:html', token => $token);
5530 wakaba 1.52 ## Ignore the token
5531     !!!next-token;
5532     redo B;
5533     } else {
5534 wakaba 1.79 !!!cp ('t308');
5535 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
5536 wakaba 1.52 !!!next-token;
5537     redo B;
5538     }
5539     } else {
5540 wakaba 1.79 !!!cp ('t309');
5541 wakaba 1.113 !!!parse-error (type => 'after body:/'.$token->{tag_name}, token => $token);
5542 wakaba 1.52
5543 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5544 wakaba 1.52 ## reprocess
5545     redo B;
5546     }
5547 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5548     !!!cp ('t309.2');
5549     ## Stop parsing
5550     last B;
5551 wakaba 1.52 } else {
5552     die "$0: $token->{type}: Unknown token type";
5553     }
5554 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
5555 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5556 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5557     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5558    
5559     unless (length $token->{data}) {
5560 wakaba 1.79 !!!cp ('t310');
5561 wakaba 1.52 !!!next-token;
5562     redo B;
5563     }
5564     }
5565    
5566     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
5567 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5568 wakaba 1.79 !!!cp ('t311');
5569 wakaba 1.113 !!!parse-error (type => 'in frameset:#character', token => $token);
5570 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
5571 wakaba 1.79 !!!cp ('t312');
5572 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
5573 wakaba 1.52 } else { # "after html frameset"
5574 wakaba 1.79 !!!cp ('t313');
5575 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
5576 wakaba 1.52
5577 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5578 wakaba 1.84 ## Reprocess in the "after frameset" insertion mode.
5579 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
5580 wakaba 1.52 }
5581    
5582     ## Ignore the token.
5583     if (length $token->{data}) {
5584 wakaba 1.79 !!!cp ('t314');
5585 wakaba 1.52 ## reprocess the rest of characters
5586     } else {
5587 wakaba 1.79 !!!cp ('t315');
5588 wakaba 1.52 !!!next-token;
5589     }
5590     redo B;
5591     }
5592    
5593     die qq[$0: Character "$token->{data}"];
5594 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5595 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5596 wakaba 1.79 !!!cp ('t316');
5597 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
5598 wakaba 1.1
5599 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5600 wakaba 1.84 ## Process in the "after frameset" insertion mode.
5601 wakaba 1.79 } else {
5602     !!!cp ('t317');
5603     }
5604 wakaba 1.1
5605 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
5606 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5607 wakaba 1.79 !!!cp ('t318');
5608 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5609 wakaba 1.125 !!!nack ('t318.1');
5610 wakaba 1.52 !!!next-token;
5611     redo B;
5612     } elsif ($token->{tag_name} eq 'frame' and
5613 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5614 wakaba 1.79 !!!cp ('t319');
5615 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5616 wakaba 1.52 pop @{$self->{open_elements}};
5617 wakaba 1.125 !!!ack ('t319.1');
5618 wakaba 1.52 !!!next-token;
5619     redo B;
5620     } elsif ($token->{tag_name} eq 'noframes') {
5621 wakaba 1.79 !!!cp ('t320');
5622 wakaba 1.52 ## NOTE: As if in body.
5623 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5624 wakaba 1.52 redo B;
5625     } else {
5626 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5627 wakaba 1.79 !!!cp ('t321');
5628 wakaba 1.113 !!!parse-error (type => 'in frameset:'.$token->{tag_name}, token => $token);
5629 wakaba 1.52 } else {
5630 wakaba 1.79 !!!cp ('t322');
5631 wakaba 1.113 !!!parse-error (type => 'after frameset:'.$token->{tag_name}, token => $token);
5632 wakaba 1.52 }
5633     ## Ignore the token
5634 wakaba 1.125 !!!nack ('t322.1');
5635 wakaba 1.52 !!!next-token;
5636     redo B;
5637     }
5638 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5639 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5640 wakaba 1.79 !!!cp ('t323');
5641 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
5642 wakaba 1.1
5643 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5644 wakaba 1.84 ## Process in the "after frameset" insertion mode.
5645 wakaba 1.79 } else {
5646     !!!cp ('t324');
5647 wakaba 1.52 }
5648 wakaba 1.1
5649 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
5650 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5651 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
5652 wakaba 1.52 @{$self->{open_elements}} == 1) {
5653 wakaba 1.79 !!!cp ('t325');
5654 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5655 wakaba 1.52 ## Ignore the token
5656     !!!next-token;
5657     } else {
5658 wakaba 1.79 !!!cp ('t326');
5659 wakaba 1.52 pop @{$self->{open_elements}};
5660     !!!next-token;
5661     }
5662 wakaba 1.47
5663 wakaba 1.52 if (not defined $self->{inner_html_node} and
5664 wakaba 1.123 not ($self->{open_elements}->[-1]->[1] & FRAMESET_EL)) {
5665 wakaba 1.79 !!!cp ('t327');
5666 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5667 wakaba 1.79 } else {
5668     !!!cp ('t328');
5669 wakaba 1.52 }
5670     redo B;
5671     } elsif ($token->{tag_name} eq 'html' and
5672 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
5673 wakaba 1.79 !!!cp ('t329');
5674 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
5675 wakaba 1.52 !!!next-token;
5676     redo B;
5677     } else {
5678 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5679 wakaba 1.79 !!!cp ('t330');
5680 wakaba 1.113 !!!parse-error (type => 'in frameset:/'.$token->{tag_name}, token => $token);
5681 wakaba 1.52 } else {
5682 wakaba 1.79 !!!cp ('t331');
5683 wakaba 1.113 !!!parse-error (type => 'after frameset:/'.$token->{tag_name}, token => $token);
5684 wakaba 1.52 }
5685     ## Ignore the token
5686     !!!next-token;
5687     redo B;
5688     }
5689 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5690 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5691 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5692     !!!cp ('t331.1');
5693 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5694 wakaba 1.104 } else {
5695     !!!cp ('t331.2');
5696     }
5697    
5698     ## Stop parsing
5699     last B;
5700 wakaba 1.52 } else {
5701     die "$0: $token->{type}: Unknown token type";
5702     }
5703 wakaba 1.47
5704 wakaba 1.52 ## ISSUE: An issue in spec here
5705     } else {
5706     die "$0: $self->{insertion_mode}: Unknown insertion mode";
5707     }
5708 wakaba 1.47
5709 wakaba 1.52 ## "in body" insertion mode
5710 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
5711 wakaba 1.52 if ($token->{tag_name} eq 'script') {
5712 wakaba 1.79 !!!cp ('t332');
5713 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5714 wakaba 1.100 $script_start_tag->();
5715 wakaba 1.53 redo B;
5716 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
5717 wakaba 1.79 !!!cp ('t333');
5718 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5719 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5720 wakaba 1.53 redo B;
5721 wakaba 1.52 } elsif ({
5722     base => 1, link => 1,
5723     }->{$token->{tag_name}}) {
5724 wakaba 1.79 !!!cp ('t334');
5725 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
5726 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5727 wakaba 1.52 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5728 wakaba 1.125 !!!ack ('t334.1');
5729 wakaba 1.52 !!!next-token;
5730 wakaba 1.53 redo B;
5731 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
5732     ## NOTE: This is an "as if in head" code clone, only "-t" differs
5733 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5734 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5735 wakaba 1.46
5736 wakaba 1.52 unless ($self->{confident}) {
5737     if ($token->{attributes}->{charset}) { ## TODO: And if supported
5738 wakaba 1.79 !!!cp ('t335');
5739 wakaba 1.63 $self->{change_encoding}
5740 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
5741 wakaba 1.66
5742     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
5743     ->set_user_data (manakai_has_reference =>
5744     $token->{attributes}->{charset}
5745     ->{has_reference});
5746 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
5747 wakaba 1.52 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
5748 wakaba 1.63 if ($token->{attributes}->{content}->{value}
5749 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
5750     [\x09-\x0D\x20]*=
5751 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
5752     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
5753 wakaba 1.79 !!!cp ('t336');
5754 wakaba 1.63 $self->{change_encoding}
5755 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
5756 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
5757     ->set_user_data (manakai_has_reference =>
5758     $token->{attributes}->{content}
5759     ->{has_reference});
5760 wakaba 1.63 }
5761 wakaba 1.52 }
5762 wakaba 1.66 } else {
5763     if ($token->{attributes}->{charset}) {
5764 wakaba 1.79 !!!cp ('t337');
5765 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
5766     ->set_user_data (manakai_has_reference =>
5767     $token->{attributes}->{charset}
5768     ->{has_reference});
5769     }
5770 wakaba 1.68 if ($token->{attributes}->{content}) {
5771 wakaba 1.79 !!!cp ('t338');
5772 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
5773     ->set_user_data (manakai_has_reference =>
5774     $token->{attributes}->{content}
5775     ->{has_reference});
5776     }
5777 wakaba 1.52 }
5778 wakaba 1.1
5779 wakaba 1.125 !!!ack ('t338.1');
5780 wakaba 1.52 !!!next-token;
5781 wakaba 1.53 redo B;
5782 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
5783 wakaba 1.79 !!!cp ('t341');
5784 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5785 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
5786 wakaba 1.53 redo B;
5787 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
5788 wakaba 1.113 !!!parse-error (type => 'in body:body', token => $token);
5789 wakaba 1.46
5790 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
5791 wakaba 1.123 not ($self->{open_elements}->[1]->[1] & BODY_EL)) {
5792 wakaba 1.79 !!!cp ('t342');
5793 wakaba 1.52 ## Ignore the token
5794     } else {
5795     my $body_el = $self->{open_elements}->[1]->[0];
5796     for my $attr_name (keys %{$token->{attributes}}) {
5797     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
5798 wakaba 1.79 !!!cp ('t343');
5799 wakaba 1.52 $body_el->set_attribute_ns
5800     (undef, [undef, $attr_name],
5801     $token->{attributes}->{$attr_name}->{value});
5802     }
5803     }
5804     }
5805 wakaba 1.125 !!!nack ('t343.1');
5806 wakaba 1.52 !!!next-token;
5807 wakaba 1.53 redo B;
5808 wakaba 1.52 } elsif ({
5809     address => 1, blockquote => 1, center => 1, dir => 1,
5810 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
5811     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5812 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
5813     pre => 1, listing => 1,
5814 wakaba 1.109 form => 1,
5815     table => 1,
5816     hr => 1,
5817 wakaba 1.52 }->{$token->{tag_name}}) {
5818 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
5819     !!!cp ('t350');
5820 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
5821 wakaba 1.109 ## Ignore the token
5822 wakaba 1.125 !!!nack ('t350.1');
5823 wakaba 1.109 !!!next-token;
5824     redo B;
5825     }
5826    
5827 wakaba 1.52 ## has a p element in scope
5828     INSCOPE: for (reverse @{$self->{open_elements}}) {
5829 wakaba 1.123 if ($_->[1] & P_EL) {
5830 wakaba 1.79 !!!cp ('t344');
5831 wakaba 1.125 !!!back-token; # <form>
5832 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
5833     line => $token->{line}, column => $token->{column}};
5834 wakaba 1.53 redo B;
5835 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
5836 wakaba 1.79 !!!cp ('t345');
5837 wakaba 1.52 last INSCOPE;
5838     }
5839     } # INSCOPE
5840    
5841 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5842 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
5843 wakaba 1.125 !!!nack ('t346.1');
5844 wakaba 1.52 !!!next-token;
5845 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5846 wakaba 1.52 $token->{data} =~ s/^\x0A//;
5847     unless (length $token->{data}) {
5848 wakaba 1.79 !!!cp ('t346');
5849 wakaba 1.1 !!!next-token;
5850 wakaba 1.79 } else {
5851     !!!cp ('t349');
5852 wakaba 1.52 }
5853 wakaba 1.79 } else {
5854     !!!cp ('t348');
5855 wakaba 1.52 }
5856 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
5857     !!!cp ('t347.1');
5858     $self->{form_element} = $self->{open_elements}->[-1]->[0];
5859    
5860 wakaba 1.125 !!!nack ('t347.2');
5861 wakaba 1.109 !!!next-token;
5862     } elsif ($token->{tag_name} eq 'table') {
5863     !!!cp ('t382');
5864     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
5865    
5866     $self->{insertion_mode} = IN_TABLE_IM;
5867    
5868 wakaba 1.125 !!!nack ('t382.1');
5869 wakaba 1.109 !!!next-token;
5870     } elsif ($token->{tag_name} eq 'hr') {
5871     !!!cp ('t386');
5872     pop @{$self->{open_elements}};
5873    
5874 wakaba 1.125 !!!nack ('t386.1');
5875 wakaba 1.109 !!!next-token;
5876 wakaba 1.52 } else {
5877 wakaba 1.125 !!!nack ('t347.1');
5878 wakaba 1.52 !!!next-token;
5879     }
5880 wakaba 1.53 redo B;
5881 wakaba 1.109 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
5882 wakaba 1.52 ## has a p element in scope
5883     INSCOPE: for (reverse @{$self->{open_elements}}) {
5884 wakaba 1.123 if ($_->[1] & P_EL) {
5885 wakaba 1.79 !!!cp ('t353');
5886 wakaba 1.125 !!!back-token; # <x>
5887 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
5888     line => $token->{line}, column => $token->{column}};
5889 wakaba 1.53 redo B;
5890 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
5891 wakaba 1.79 !!!cp ('t354');
5892 wakaba 1.52 last INSCOPE;
5893     }
5894     } # INSCOPE
5895    
5896     ## Step 1
5897     my $i = -1;
5898     my $node = $self->{open_elements}->[$i];
5899 wakaba 1.109 my $li_or_dtdd = {li => {li => 1},
5900     dt => {dt => 1, dd => 1},
5901     dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
5902 wakaba 1.52 LI: {
5903     ## Step 2
5904 wakaba 1.123 if ($li_or_dtdd->{$node->[0]->manakai_local_name}) {
5905 wakaba 1.52 if ($i != -1) {
5906 wakaba 1.79 !!!cp ('t355');
5907 wakaba 1.122 !!!parse-error (type => 'not closed',
5908     value => $self->{open_elements}->[-1]->[0]
5909     ->manakai_local_name,
5910     token => $token);
5911 wakaba 1.79 } else {
5912     !!!cp ('t356');
5913 wakaba 1.52 }
5914     splice @{$self->{open_elements}}, $i;
5915     last LI;
5916 wakaba 1.79 } else {
5917     !!!cp ('t357');
5918 wakaba 1.52 }
5919    
5920     ## Step 3
5921 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
5922 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
5923 wakaba 1.123 ($node->[1] & SPECIAL_EL or
5924     $node->[1] & SCOPING_EL) and
5925     not ($node->[1] & ADDRESS_EL) and
5926     not ($node->[1] & DIV_EL)) {
5927 wakaba 1.79 !!!cp ('t358');
5928 wakaba 1.52 last LI;
5929     }
5930    
5931 wakaba 1.79 !!!cp ('t359');
5932 wakaba 1.52 ## Step 4
5933     $i--;
5934     $node = $self->{open_elements}->[$i];
5935     redo LI;
5936     } # LI
5937    
5938 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5939 wakaba 1.125 !!!nack ('t359.1');
5940 wakaba 1.52 !!!next-token;
5941 wakaba 1.53 redo B;
5942 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
5943     ## has a p element in scope
5944     INSCOPE: for (reverse @{$self->{open_elements}}) {
5945 wakaba 1.123 if ($_->[1] & P_EL) {
5946 wakaba 1.79 !!!cp ('t367');
5947 wakaba 1.125 !!!back-token; # <plaintext>
5948 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
5949     line => $token->{line}, column => $token->{column}};
5950 wakaba 1.53 redo B;
5951 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
5952 wakaba 1.79 !!!cp ('t368');
5953 wakaba 1.52 last INSCOPE;
5954 wakaba 1.46 }
5955 wakaba 1.52 } # INSCOPE
5956    
5957 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5958 wakaba 1.52
5959     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
5960    
5961 wakaba 1.125 !!!nack ('t368.1');
5962 wakaba 1.52 !!!next-token;
5963 wakaba 1.53 redo B;
5964 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
5965     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
5966     my $node = $active_formatting_elements->[$i];
5967 wakaba 1.123 if ($node->[1] & A_EL) {
5968 wakaba 1.79 !!!cp ('t371');
5969 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
5970 wakaba 1.52
5971 wakaba 1.125 !!!back-token; # <a>
5972 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
5973     line => $token->{line}, column => $token->{column}};
5974 wakaba 1.113 $formatting_end_tag->($token);
5975 wakaba 1.52
5976     AFE2: for (reverse 0..$#$active_formatting_elements) {
5977     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
5978 wakaba 1.79 !!!cp ('t372');
5979 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
5980     last AFE2;
5981 wakaba 1.1 }
5982 wakaba 1.52 } # AFE2
5983     OE: for (reverse 0..$#{$self->{open_elements}}) {
5984     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
5985 wakaba 1.79 !!!cp ('t373');
5986 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
5987     last OE;
5988 wakaba 1.1 }
5989 wakaba 1.52 } # OE
5990     last AFE;
5991     } elsif ($node->[0] eq '#marker') {
5992 wakaba 1.79 !!!cp ('t374');
5993 wakaba 1.52 last AFE;
5994     }
5995     } # AFE
5996    
5997     $reconstruct_active_formatting_elements->($insert_to_current);
5998 wakaba 1.1
5999 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6000 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6001 wakaba 1.1
6002 wakaba 1.125 !!!nack ('t374.1');
6003 wakaba 1.52 !!!next-token;
6004 wakaba 1.53 redo B;
6005 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
6006     $reconstruct_active_formatting_elements->($insert_to_current);
6007 wakaba 1.1
6008 wakaba 1.52 ## has a |nobr| element in scope
6009     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6010     my $node = $self->{open_elements}->[$_];
6011 wakaba 1.123 if ($node->[1] & NOBR_EL) {
6012 wakaba 1.79 !!!cp ('t376');
6013 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
6014 wakaba 1.125 !!!back-token; # <nobr>
6015 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
6016     line => $token->{line}, column => $token->{column}};
6017 wakaba 1.53 redo B;
6018 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6019 wakaba 1.79 !!!cp ('t377');
6020 wakaba 1.52 last INSCOPE;
6021     }
6022     } # INSCOPE
6023    
6024 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6025 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6026    
6027 wakaba 1.125 !!!nack ('t377.1');
6028 wakaba 1.52 !!!next-token;
6029 wakaba 1.53 redo B;
6030 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
6031     ## has a button element in scope
6032     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6033     my $node = $self->{open_elements}->[$_];
6034 wakaba 1.123 if ($node->[1] & BUTTON_EL) {
6035 wakaba 1.79 !!!cp ('t378');
6036 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
6037 wakaba 1.125 !!!back-token; # <button>
6038 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
6039     line => $token->{line}, column => $token->{column}};
6040 wakaba 1.53 redo B;
6041 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6042 wakaba 1.79 !!!cp ('t379');
6043 wakaba 1.52 last INSCOPE;
6044     }
6045     } # INSCOPE
6046    
6047     $reconstruct_active_formatting_elements->($insert_to_current);
6048    
6049 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6050 wakaba 1.85
6051     ## TODO: associate with $self->{form_element} if defined
6052    
6053 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
6054 wakaba 1.1
6055 wakaba 1.125 !!!nack ('t379.1');
6056 wakaba 1.52 !!!next-token;
6057 wakaba 1.53 redo B;
6058 wakaba 1.103 } elsif ({
6059 wakaba 1.109 xmp => 1,
6060     iframe => 1,
6061     noembed => 1,
6062     noframes => 1,
6063     noscript => 0, ## TODO: 1 if scripting is enabled
6064 wakaba 1.103 }->{$token->{tag_name}}) {
6065 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
6066     !!!cp ('t381');
6067     $reconstruct_active_formatting_elements->($insert_to_current);
6068     } else {
6069     !!!cp ('t399');
6070     }
6071     ## NOTE: There is an "as if in body" code clone.
6072 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6073 wakaba 1.53 redo B;
6074 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
6075 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
6076 wakaba 1.52
6077     if (defined $self->{form_element}) {
6078 wakaba 1.79 !!!cp ('t389');
6079 wakaba 1.52 ## Ignore the token
6080 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
6081 wakaba 1.52 !!!next-token;
6082 wakaba 1.53 redo B;
6083 wakaba 1.52 } else {
6084     my $at = $token->{attributes};
6085     my $form_attrs;
6086     $form_attrs->{action} = $at->{action} if $at->{action};
6087     my $prompt_attr = $at->{prompt};
6088     $at->{name} = {name => 'name', value => 'isindex'};
6089     delete $at->{action};
6090     delete $at->{prompt};
6091     my @tokens = (
6092 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
6093 wakaba 1.114 attributes => $form_attrs,
6094     line => $token->{line}, column => $token->{column}},
6095     {type => START_TAG_TOKEN, tag_name => 'hr',
6096     line => $token->{line}, column => $token->{column}},
6097     {type => START_TAG_TOKEN, tag_name => 'p',
6098     line => $token->{line}, column => $token->{column}},
6099     {type => START_TAG_TOKEN, tag_name => 'label',
6100     line => $token->{line}, column => $token->{column}},
6101 wakaba 1.52 );
6102     if ($prompt_attr) {
6103 wakaba 1.79 !!!cp ('t390');
6104 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
6105 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6106     };
6107 wakaba 1.1 } else {
6108 wakaba 1.79 !!!cp ('t391');
6109 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
6110 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
6111 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6112     }; # SHOULD
6113 wakaba 1.52 ## TODO: make this configurable
6114 wakaba 1.1 }
6115 wakaba 1.52 push @tokens,
6116 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
6117     line => $token->{line}, column => $token->{column}},
6118 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
6119 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
6120     line => $token->{line}, column => $token->{column}},
6121     {type => END_TAG_TOKEN, tag_name => 'p',
6122     line => $token->{line}, column => $token->{column}},
6123     {type => START_TAG_TOKEN, tag_name => 'hr',
6124     line => $token->{line}, column => $token->{column}},
6125     {type => END_TAG_TOKEN, tag_name => 'form',
6126     line => $token->{line}, column => $token->{column}};
6127 wakaba 1.125 !!!nack ('t391.1'); ## NOTE: Not acknowledged.
6128 wakaba 1.52 !!!back-token (@tokens);
6129 wakaba 1.125 !!!next-token;
6130 wakaba 1.53 redo B;
6131 wakaba 1.52 }
6132     } elsif ($token->{tag_name} eq 'textarea') {
6133     my $tag_name = $token->{tag_name};
6134     my $el;
6135 wakaba 1.116 !!!create-element ($el, $token->{tag_name}, $token->{attributes}, $token);
6136 wakaba 1.52
6137     ## TODO: $self->{form_element} if defined
6138     $self->{content_model} = RCDATA_CONTENT_MODEL;
6139     delete $self->{escape}; # MUST
6140    
6141     $insert->($el);
6142    
6143     my $text = '';
6144 wakaba 1.125 !!!nack ('t392.1');
6145 wakaba 1.52 !!!next-token;
6146 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6147 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6148 wakaba 1.51 unless (length $token->{data}) {
6149 wakaba 1.79 !!!cp ('t392');
6150 wakaba 1.51 !!!next-token;
6151 wakaba 1.79 } else {
6152     !!!cp ('t393');
6153 wakaba 1.51 }
6154 wakaba 1.79 } else {
6155     !!!cp ('t394');
6156 wakaba 1.51 }
6157 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
6158 wakaba 1.79 !!!cp ('t395');
6159 wakaba 1.52 $text .= $token->{data};
6160     !!!next-token;
6161     }
6162     if (length $text) {
6163 wakaba 1.79 !!!cp ('t396');
6164 wakaba 1.52 $el->manakai_append_text ($text);
6165     }
6166    
6167     $self->{content_model} = PCDATA_CONTENT_MODEL;
6168 wakaba 1.51
6169 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
6170 wakaba 1.52 $token->{tag_name} eq $tag_name) {
6171 wakaba 1.79 !!!cp ('t397');
6172 wakaba 1.52 ## Ignore the token
6173     } else {
6174 wakaba 1.79 !!!cp ('t398');
6175 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
6176 wakaba 1.51 }
6177 wakaba 1.52 !!!next-token;
6178 wakaba 1.53 redo B;
6179 wakaba 1.52 } elsif ({
6180     caption => 1, col => 1, colgroup => 1, frame => 1,
6181     frameset => 1, head => 1, option => 1, optgroup => 1,
6182     tbody => 1, td => 1, tfoot => 1, th => 1,
6183     thead => 1, tr => 1,
6184     }->{$token->{tag_name}}) {
6185 wakaba 1.79 !!!cp ('t401');
6186 wakaba 1.113 !!!parse-error (type => 'in body:'.$token->{tag_name}, token => $token);
6187 wakaba 1.52 ## Ignore the token
6188 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
6189 wakaba 1.52 !!!next-token;
6190 wakaba 1.53 redo B;
6191 wakaba 1.52
6192     ## ISSUE: An issue on HTML5 new elements in the spec.
6193     } else {
6194 wakaba 1.110 if ($token->{tag_name} eq 'image') {
6195     !!!cp ('t384');
6196 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
6197 wakaba 1.110 $token->{tag_name} = 'img';
6198     } else {
6199     !!!cp ('t385');
6200     }
6201    
6202     ## NOTE: There is an "as if <br>" code clone.
6203 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
6204    
6205 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6206 wakaba 1.109
6207 wakaba 1.110 if ({
6208     applet => 1, marquee => 1, object => 1,
6209     }->{$token->{tag_name}}) {
6210     !!!cp ('t380');
6211     push @$active_formatting_elements, ['#marker', ''];
6212 wakaba 1.125 !!!nack ('t380.1');
6213 wakaba 1.110 } elsif ({
6214     b => 1, big => 1, em => 1, font => 1, i => 1,
6215     s => 1, small => 1, strile => 1,
6216     strong => 1, tt => 1, u => 1,
6217     }->{$token->{tag_name}}) {
6218     !!!cp ('t375');
6219     push @$active_formatting_elements, $self->{open_elements}->[-1];
6220 wakaba 1.125 !!!nack ('t375.1');
6221 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
6222     !!!cp ('t388');
6223     ## TODO: associate with $self->{form_element} if defined
6224     pop @{$self->{open_elements}};
6225 wakaba 1.125 !!!ack ('t388.2');
6226 wakaba 1.110 } elsif ({
6227     area => 1, basefont => 1, bgsound => 1, br => 1,
6228     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
6229     #image => 1,
6230     }->{$token->{tag_name}}) {
6231     !!!cp ('t388.1');
6232     pop @{$self->{open_elements}};
6233 wakaba 1.125 !!!ack ('t388.3');
6234 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
6235 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
6236    
6237     if ($self->{insertion_mode} & TABLE_IMS or
6238     $self->{insertion_mode} & BODY_TABLE_IMS or
6239     $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6240     !!!cp ('t400.1');
6241     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
6242     } else {
6243     !!!cp ('t400.2');
6244     $self->{insertion_mode} = IN_SELECT_IM;
6245     }
6246 wakaba 1.125 !!!nack ('t400.3');
6247 wakaba 1.110 } else {
6248 wakaba 1.125 !!!nack ('t402');
6249 wakaba 1.109 }
6250 wakaba 1.51
6251 wakaba 1.52 !!!next-token;
6252 wakaba 1.53 redo B;
6253 wakaba 1.52 }
6254 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6255 wakaba 1.52 if ($token->{tag_name} eq 'body') {
6256 wakaba 1.107 ## has a |body| element in scope
6257     my $i;
6258 wakaba 1.111 INSCOPE: {
6259     for (reverse @{$self->{open_elements}}) {
6260 wakaba 1.123 if ($_->[1] & BODY_EL) {
6261 wakaba 1.111 !!!cp ('t405');
6262     $i = $_;
6263     last INSCOPE;
6264 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6265 wakaba 1.111 !!!cp ('t405.1');
6266     last;
6267     }
6268 wakaba 1.52 }
6269 wakaba 1.111
6270     !!!parse-error (type => 'start tag not allowed',
6271 wakaba 1.113 value => $token->{tag_name}, token => $token);
6272 wakaba 1.107 ## NOTE: Ignore the token.
6273 wakaba 1.52 !!!next-token;
6274 wakaba 1.53 redo B;
6275 wakaba 1.111 } # INSCOPE
6276 wakaba 1.107
6277     for (@{$self->{open_elements}}) {
6278 wakaba 1.123 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
6279 wakaba 1.107 !!!cp ('t403');
6280 wakaba 1.122 !!!parse-error (type => 'not closed',
6281     value => $_->[0]->manakai_local_name,
6282     token => $token);
6283 wakaba 1.107 last;
6284     } else {
6285     !!!cp ('t404');
6286     }
6287     }
6288    
6289     $self->{insertion_mode} = AFTER_BODY_IM;
6290     !!!next-token;
6291     redo B;
6292 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
6293 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
6294     ## up-to-date, though it has same effect as speced.
6295 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
6296     $self->{open_elements}->[1]->[1] & BODY_EL) {
6297 wakaba 1.52 ## ISSUE: There is an issue in the spec.
6298 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & BODY_EL) {
6299 wakaba 1.79 !!!cp ('t406');
6300 wakaba 1.122 !!!parse-error (type => 'not closed',
6301     value => $self->{open_elements}->[1]->[0]
6302     ->manakai_local_name,
6303     token => $token);
6304 wakaba 1.79 } else {
6305     !!!cp ('t407');
6306 wakaba 1.1 }
6307 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6308 wakaba 1.52 ## reprocess
6309 wakaba 1.53 redo B;
6310 wakaba 1.51 } else {
6311 wakaba 1.79 !!!cp ('t408');
6312 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6313 wakaba 1.52 ## Ignore the token
6314     !!!next-token;
6315 wakaba 1.53 redo B;
6316 wakaba 1.51 }
6317 wakaba 1.52 } elsif ({
6318     address => 1, blockquote => 1, center => 1, dir => 1,
6319     div => 1, dl => 1, fieldset => 1, listing => 1,
6320     menu => 1, ol => 1, pre => 1, ul => 1,
6321     dd => 1, dt => 1, li => 1,
6322 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6323 wakaba 1.52 }->{$token->{tag_name}}) {
6324     ## has an element in scope
6325     my $i;
6326     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6327     my $node = $self->{open_elements}->[$_];
6328 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6329 wakaba 1.79 !!!cp ('t410');
6330 wakaba 1.52 $i = $_;
6331 wakaba 1.87 last INSCOPE;
6332 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6333 wakaba 1.79 !!!cp ('t411');
6334 wakaba 1.52 last INSCOPE;
6335 wakaba 1.51 }
6336 wakaba 1.52 } # INSCOPE
6337 wakaba 1.89
6338     unless (defined $i) { # has an element in scope
6339     !!!cp ('t413');
6340 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6341 wakaba 1.89 } else {
6342     ## Step 1. generate implied end tags
6343     while ({
6344     dd => ($token->{tag_name} ne 'dd'),
6345     dt => ($token->{tag_name} ne 'dt'),
6346     li => ($token->{tag_name} ne 'li'),
6347     p => 1,
6348 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
6349 wakaba 1.89 !!!cp ('t409');
6350     pop @{$self->{open_elements}};
6351     }
6352    
6353     ## Step 2.
6354 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6355     ne $token->{tag_name}) {
6356 wakaba 1.79 !!!cp ('t412');
6357 wakaba 1.122 !!!parse-error (type => 'not closed',
6358     value => $self->{open_elements}->[-1]->[0]
6359     ->manakai_local_name,
6360     token => $token);
6361 wakaba 1.51 } else {
6362 wakaba 1.89 !!!cp ('t414');
6363 wakaba 1.51 }
6364 wakaba 1.89
6365     ## Step 3.
6366 wakaba 1.52 splice @{$self->{open_elements}}, $i;
6367 wakaba 1.89
6368     ## Step 4.
6369     $clear_up_to_marker->()
6370     if {
6371 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6372 wakaba 1.89 }->{$token->{tag_name}};
6373 wakaba 1.51 }
6374 wakaba 1.52 !!!next-token;
6375 wakaba 1.53 redo B;
6376 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
6377 wakaba 1.92 undef $self->{form_element};
6378    
6379 wakaba 1.52 ## has an element in scope
6380 wakaba 1.92 my $i;
6381 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6382     my $node = $self->{open_elements}->[$_];
6383 wakaba 1.123 if ($node->[1] & FORM_EL) {
6384 wakaba 1.79 !!!cp ('t418');
6385 wakaba 1.92 $i = $_;
6386 wakaba 1.52 last INSCOPE;
6387 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6388 wakaba 1.79 !!!cp ('t419');
6389 wakaba 1.52 last INSCOPE;
6390     }
6391     } # INSCOPE
6392 wakaba 1.92
6393     unless (defined $i) { # has an element in scope
6394 wakaba 1.79 !!!cp ('t421');
6395 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6396 wakaba 1.92 } else {
6397     ## Step 1. generate implied end tags
6398 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6399 wakaba 1.92 !!!cp ('t417');
6400     pop @{$self->{open_elements}};
6401     }
6402    
6403     ## Step 2.
6404 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6405     ne $token->{tag_name}) {
6406 wakaba 1.92 !!!cp ('t417.1');
6407 wakaba 1.122 !!!parse-error (type => 'not closed',
6408     value => $self->{open_elements}->[-1]->[0]
6409     ->manakai_local_name,
6410     token => $token);
6411 wakaba 1.92 } else {
6412     !!!cp ('t420');
6413     }
6414    
6415     ## Step 3.
6416     splice @{$self->{open_elements}}, $i;
6417 wakaba 1.52 }
6418    
6419     !!!next-token;
6420 wakaba 1.53 redo B;
6421 wakaba 1.52 } elsif ({
6422     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6423     }->{$token->{tag_name}}) {
6424     ## has an element in scope
6425     my $i;
6426     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6427     my $node = $self->{open_elements}->[$_];
6428 wakaba 1.123 if ($node->[1] & HEADING_EL) {
6429 wakaba 1.79 !!!cp ('t423');
6430 wakaba 1.52 $i = $_;
6431     last INSCOPE;
6432 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6433 wakaba 1.79 !!!cp ('t424');
6434 wakaba 1.52 last INSCOPE;
6435 wakaba 1.51 }
6436 wakaba 1.52 } # INSCOPE
6437 wakaba 1.93
6438     unless (defined $i) { # has an element in scope
6439     !!!cp ('t425.1');
6440 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6441 wakaba 1.79 } else {
6442 wakaba 1.93 ## Step 1. generate implied end tags
6443 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6444 wakaba 1.93 !!!cp ('t422');
6445     pop @{$self->{open_elements}};
6446     }
6447    
6448     ## Step 2.
6449 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6450     ne $token->{tag_name}) {
6451 wakaba 1.93 !!!cp ('t425');
6452 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6453 wakaba 1.93 } else {
6454     !!!cp ('t426');
6455     }
6456    
6457     ## Step 3.
6458     splice @{$self->{open_elements}}, $i;
6459 wakaba 1.36 }
6460 wakaba 1.52
6461     !!!next-token;
6462 wakaba 1.53 redo B;
6463 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
6464     ## has an element in scope
6465     my $i;
6466     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6467     my $node = $self->{open_elements}->[$_];
6468 wakaba 1.123 if ($node->[1] & P_EL) {
6469 wakaba 1.87 !!!cp ('t410.1');
6470     $i = $_;
6471 wakaba 1.88 last INSCOPE;
6472 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6473 wakaba 1.87 !!!cp ('t411.1');
6474     last INSCOPE;
6475     }
6476     } # INSCOPE
6477 wakaba 1.91
6478     if (defined $i) {
6479 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6480     ne $token->{tag_name}) {
6481 wakaba 1.87 !!!cp ('t412.1');
6482 wakaba 1.122 !!!parse-error (type => 'not closed',
6483     value => $self->{open_elements}->[-1]->[0]
6484     ->manakai_local_name,
6485     token => $token);
6486 wakaba 1.87 } else {
6487 wakaba 1.91 !!!cp ('t414.1');
6488 wakaba 1.87 }
6489 wakaba 1.91
6490 wakaba 1.87 splice @{$self->{open_elements}}, $i;
6491     } else {
6492 wakaba 1.91 !!!cp ('t413.1');
6493 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6494 wakaba 1.91
6495 wakaba 1.87 !!!cp ('t415.1');
6496     ## As if <p>, then reprocess the current token
6497     my $el;
6498 wakaba 1.116 !!!create-element ($el, 'p',, $token);
6499 wakaba 1.87 $insert->($el);
6500 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
6501 wakaba 1.87 }
6502 wakaba 1.91
6503 wakaba 1.87 !!!next-token;
6504     redo B;
6505 wakaba 1.52 } elsif ({
6506     a => 1,
6507     b => 1, big => 1, em => 1, font => 1, i => 1,
6508     nobr => 1, s => 1, small => 1, strile => 1,
6509     strong => 1, tt => 1, u => 1,
6510     }->{$token->{tag_name}}) {
6511 wakaba 1.79 !!!cp ('t427');
6512 wakaba 1.113 $formatting_end_tag->($token);
6513 wakaba 1.53 redo B;
6514 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
6515 wakaba 1.79 !!!cp ('t428');
6516 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:br', token => $token);
6517 wakaba 1.52
6518     ## As if <br>
6519     $reconstruct_active_formatting_elements->($insert_to_current);
6520    
6521     my $el;
6522 wakaba 1.116 !!!create-element ($el, 'br',, $token);
6523 wakaba 1.52 $insert->($el);
6524    
6525     ## Ignore the token.
6526     !!!next-token;
6527 wakaba 1.53 redo B;
6528 wakaba 1.52 } elsif ({
6529     caption => 1, col => 1, colgroup => 1, frame => 1,
6530     frameset => 1, head => 1, option => 1, optgroup => 1,
6531     tbody => 1, td => 1, tfoot => 1, th => 1,
6532     thead => 1, tr => 1,
6533     area => 1, basefont => 1, bgsound => 1,
6534     embed => 1, hr => 1, iframe => 1, image => 1,
6535     img => 1, input => 1, isindex => 1, noembed => 1,
6536     noframes => 1, param => 1, select => 1, spacer => 1,
6537     table => 1, textarea => 1, wbr => 1,
6538     noscript => 0, ## TODO: if scripting is enabled
6539     }->{$token->{tag_name}}) {
6540 wakaba 1.79 !!!cp ('t429');
6541 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6542 wakaba 1.52 ## Ignore the token
6543     !!!next-token;
6544 wakaba 1.53 redo B;
6545 wakaba 1.52
6546     ## ISSUE: Issue on HTML5 new elements in spec
6547    
6548     } else {
6549     ## Step 1
6550     my $node_i = -1;
6551     my $node = $self->{open_elements}->[$node_i];
6552 wakaba 1.51
6553 wakaba 1.52 ## Step 2
6554     S2: {
6555 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6556 wakaba 1.52 ## Step 1
6557     ## generate implied end tags
6558 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6559 wakaba 1.79 !!!cp ('t430');
6560 wakaba 1.83 ## ISSUE: Can this case be reached?
6561 wakaba 1.86 pop @{$self->{open_elements}};
6562 wakaba 1.52 }
6563    
6564     ## Step 2
6565 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6566     ne $token->{tag_name}) {
6567 wakaba 1.79 !!!cp ('t431');
6568 wakaba 1.58 ## NOTE: <x><y></x>
6569 wakaba 1.122 !!!parse-error (type => 'not closed',
6570     value => $self->{open_elements}->[-1]->[0]
6571     ->manakai_local_name,
6572     token => $token);
6573 wakaba 1.79 } else {
6574     !!!cp ('t432');
6575 wakaba 1.52 }
6576    
6577     ## Step 3
6578     splice @{$self->{open_elements}}, $node_i;
6579 wakaba 1.51
6580 wakaba 1.1 !!!next-token;
6581 wakaba 1.52 last S2;
6582 wakaba 1.1 } else {
6583 wakaba 1.52 ## Step 3
6584 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
6585 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
6586 wakaba 1.123 ($node->[1] & SPECIAL_EL or
6587     $node->[1] & SCOPING_EL)) {
6588 wakaba 1.79 !!!cp ('t433');
6589 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6590 wakaba 1.52 ## Ignore the token
6591     !!!next-token;
6592     last S2;
6593     }
6594 wakaba 1.79
6595     !!!cp ('t434');
6596 wakaba 1.1 }
6597 wakaba 1.52
6598     ## Step 4
6599     $node_i--;
6600     $node = $self->{open_elements}->[$node_i];
6601    
6602     ## Step 5;
6603     redo S2;
6604     } # S2
6605 wakaba 1.53 redo B;
6606 wakaba 1.1 }
6607     }
6608 wakaba 1.52 redo B;
6609 wakaba 1.1 } # B
6610    
6611     ## Stop parsing # MUST
6612    
6613     ## TODO: script stuffs
6614 wakaba 1.3 } # _tree_construct_main
6615    
6616     sub set_inner_html ($$$) {
6617     my $class = shift;
6618     my $node = shift;
6619     my $s = \$_[0];
6620     my $onerror = $_[1];
6621    
6622 wakaba 1.63 ## ISSUE: Should {confident} be true?
6623    
6624 wakaba 1.3 my $nt = $node->node_type;
6625     if ($nt == 9) {
6626     # MUST
6627    
6628     ## Step 1 # MUST
6629     ## TODO: If the document has an active parser, ...
6630     ## ISSUE: There is an issue in the spec.
6631    
6632     ## Step 2 # MUST
6633     my @cn = @{$node->child_nodes};
6634     for (@cn) {
6635     $node->remove_child ($_);
6636     }
6637    
6638     ## Step 3, 4, 5 # MUST
6639     $class->parse_string ($$s => $node, $onerror);
6640     } elsif ($nt == 1) {
6641     ## TODO: If non-html element
6642    
6643     ## NOTE: Most of this code is copied from |parse_string|
6644    
6645     ## Step 1 # MUST
6646 wakaba 1.14 my $this_doc = $node->owner_document;
6647     my $doc = $this_doc->implementation->create_document;
6648 wakaba 1.18 $doc->manakai_is_html (1);
6649 wakaba 1.3 my $p = $class->new;
6650     $p->{document} = $doc;
6651    
6652 wakaba 1.84 ## Step 8 # MUST
6653 wakaba 1.3 my $i = 0;
6654 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
6655     $p->{column_prev} = $p->{column} = 0;
6656 wakaba 1.76 $p->{set_next_char} = sub {
6657 wakaba 1.3 my $self = shift;
6658 wakaba 1.14
6659 wakaba 1.76 pop @{$self->{prev_char}};
6660     unshift @{$self->{prev_char}}, $self->{next_char};
6661 wakaba 1.14
6662 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
6663     $self->{next_char} = ord substr $$s, $i++, 1;
6664 wakaba 1.121
6665     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
6666     $p->{column}++;
6667 wakaba 1.4
6668 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
6669 wakaba 1.121 $p->{line}++;
6670     $p->{column} = 0;
6671 wakaba 1.79 !!!cp ('i1');
6672 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
6673 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6674 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
6675 wakaba 1.121 $p->{line}++;
6676     $p->{column} = 0;
6677 wakaba 1.79 !!!cp ('i2');
6678 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
6679     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6680 wakaba 1.79 !!!cp ('i3');
6681 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
6682 wakaba 1.79 !!!cp ('i4');
6683 wakaba 1.14 !!!parse-error (type => 'NULL');
6684 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6685 wakaba 1.3 }
6686     };
6687 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
6688     $p->{next_char} = -1;
6689 wakaba 1.3
6690     my $ponerror = $onerror || sub {
6691     my (%opt) = @_;
6692 wakaba 1.121 my $line = $opt{line};
6693     my $column = $opt{column};
6694     if (defined $opt{token} and defined $opt{token}->{line}) {
6695     $line = $opt{token}->{line};
6696     $column = $opt{token}->{column};
6697     }
6698     warn "Parse error ($opt{type}) at line $line column $column\n";
6699 wakaba 1.3 };
6700     $p->{parse_error} = sub {
6701 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
6702 wakaba 1.3 };
6703    
6704     $p->_initialize_tokenizer;
6705     $p->_initialize_tree_constructor;
6706    
6707     ## Step 2
6708 wakaba 1.71 my $node_ln = $node->manakai_local_name;
6709 wakaba 1.40 $p->{content_model} = {
6710     title => RCDATA_CONTENT_MODEL,
6711     textarea => RCDATA_CONTENT_MODEL,
6712     style => CDATA_CONTENT_MODEL,
6713     script => CDATA_CONTENT_MODEL,
6714     xmp => CDATA_CONTENT_MODEL,
6715     iframe => CDATA_CONTENT_MODEL,
6716     noembed => CDATA_CONTENT_MODEL,
6717     noframes => CDATA_CONTENT_MODEL,
6718     noscript => CDATA_CONTENT_MODEL,
6719     plaintext => PLAINTEXT_CONTENT_MODEL,
6720     }->{$node_ln};
6721     $p->{content_model} = PCDATA_CONTENT_MODEL
6722     unless defined $p->{content_model};
6723     ## ISSUE: What is "the name of the element"? local name?
6724 wakaba 1.3
6725 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
6726     ## TODO: Foreign element OK?
6727 wakaba 1.3
6728 wakaba 1.84 ## Step 3
6729 wakaba 1.3 my $root = $doc->create_element_ns
6730     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6731    
6732 wakaba 1.84 ## Step 4 # MUST
6733 wakaba 1.3 $doc->append_child ($root);
6734    
6735 wakaba 1.84 ## Step 5 # MUST
6736 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
6737 wakaba 1.3
6738     undef $p->{head_element};
6739    
6740 wakaba 1.84 ## Step 6 # MUST
6741 wakaba 1.3 $p->_reset_insertion_mode;
6742    
6743 wakaba 1.84 ## Step 7 # MUST
6744 wakaba 1.3 my $anode = $node;
6745     AN: while (defined $anode) {
6746     if ($anode->node_type == 1) {
6747     my $nsuri = $anode->namespace_uri;
6748     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6749 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
6750 wakaba 1.79 !!!cp ('i5');
6751 wakaba 1.3 $p->{form_element} = $anode;
6752     last AN;
6753     }
6754     }
6755     }
6756     $anode = $anode->parent_node;
6757     } # AN
6758    
6759 wakaba 1.84 ## Step 9 # MUST
6760 wakaba 1.3 {
6761     my $self = $p;
6762     !!!next-token;
6763     }
6764     $p->_tree_construction_main;
6765    
6766 wakaba 1.84 ## Step 10 # MUST
6767 wakaba 1.3 my @cn = @{$node->child_nodes};
6768     for (@cn) {
6769     $node->remove_child ($_);
6770     }
6771     ## ISSUE: mutation events? read-only?
6772    
6773 wakaba 1.84 ## Step 11 # MUST
6774 wakaba 1.3 @cn = @{$root->child_nodes};
6775     for (@cn) {
6776 wakaba 1.14 $this_doc->adopt_node ($_);
6777 wakaba 1.3 $node->append_child ($_);
6778     }
6779 wakaba 1.14 ## ISSUE: mutation events?
6780 wakaba 1.3
6781     $p->_terminate_tree_constructor;
6782 wakaba 1.121
6783     delete $p->{parse_error}; # delete loop
6784 wakaba 1.3 } else {
6785     die "$0: |set_inner_html| is not defined for node of type $nt";
6786     }
6787     } # set_inner_html
6788    
6789     } # tree construction stage
6790 wakaba 1.1
6791 wakaba 1.63 package Whatpm::HTML::RestartParser;
6792     push our @ISA, 'Error';
6793    
6794 wakaba 1.1 1;
6795 wakaba 1.125 # $Date: 2008/04/06 10:34:11 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24