/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.100 - (hide annotations) (download) (as text)
Sun Mar 9 04:08:41 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.99: +30 -10 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	9 Mar 2008 04:04:19 -0000
	* tree-test-1.dat: Test data on <script> or <style>
	in <table> are added.

2008-03-09  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	9 Mar 2008 04:04:52 -0000
	* HTML.pm.src: No foster parenting for <script> and <script>
	in non-tainted <table>s (HTML5 revision 1336).

2008-03-09  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.100 our $VERSION=do{my @r=(q$Revision: 1.99 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.70 ## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263)
12     ## TODO: 1252 parse error (revision 1264)
13     ## TODO: 8859-11 = 874 (revision 1271)
14    
15 wakaba 1.1 my $permitted_slash_tag_name = {
16     base => 1,
17     link => 1,
18     meta => 1,
19     hr => 1,
20     br => 1,
21 wakaba 1.71 img => 1,
22 wakaba 1.1 embed => 1,
23     param => 1,
24     area => 1,
25     col => 1,
26     input => 1,
27     };
28    
29 wakaba 1.4 my $c1_entity_char = {
30 wakaba 1.10 0x80 => 0x20AC,
31     0x81 => 0xFFFD,
32     0x82 => 0x201A,
33     0x83 => 0x0192,
34     0x84 => 0x201E,
35     0x85 => 0x2026,
36     0x86 => 0x2020,
37     0x87 => 0x2021,
38     0x88 => 0x02C6,
39     0x89 => 0x2030,
40     0x8A => 0x0160,
41     0x8B => 0x2039,
42     0x8C => 0x0152,
43     0x8D => 0xFFFD,
44     0x8E => 0x017D,
45     0x8F => 0xFFFD,
46     0x90 => 0xFFFD,
47     0x91 => 0x2018,
48     0x92 => 0x2019,
49     0x93 => 0x201C,
50     0x94 => 0x201D,
51     0x95 => 0x2022,
52     0x96 => 0x2013,
53     0x97 => 0x2014,
54     0x98 => 0x02DC,
55     0x99 => 0x2122,
56     0x9A => 0x0161,
57     0x9B => 0x203A,
58     0x9C => 0x0153,
59     0x9D => 0xFFFD,
60     0x9E => 0x017E,
61     0x9F => 0x0178,
62 wakaba 1.4 }; # $c1_entity_char
63 wakaba 1.1
64     my $special_category = {
65     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
66     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
67     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
68     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
69     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
70     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
71     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
72     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
73     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
74     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
75     };
76     my $scoping_category = {
77     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
78     table => 1, td => 1, th => 1,
79     };
80     my $formatting_category = {
81     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
82     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
83     };
84     # $phrasing_category: all other elements
85    
86 wakaba 1.63 sub parse_byte_string ($$$$;$) {
87     my $self = ref $_[0] ? shift : shift->new;
88     my $charset = shift;
89     my $bytes_s = ref $_[0] ? $_[0] : \($_[0]);
90     my $s;
91    
92     if (defined $charset) {
93 wakaba 1.64 require Encode; ## TODO: decode(utf8) don't delete BOM
94 wakaba 1.63 $s = \ (Encode::decode ($charset, $$bytes_s));
95 wakaba 1.64 $self->{input_encoding} = lc $charset; ## TODO: normalize name
96 wakaba 1.63 $self->{confident} = 1;
97     } else {
98 wakaba 1.65 ## TODO: Implement HTML5 detection algorithm
99     require Whatpm::Charset::UniversalCharDet;
100     $charset = Whatpm::Charset::UniversalCharDet->detect_byte_string
101     (substr ($$bytes_s, 0, 1024));
102     $charset ||= 'windows-1252';
103 wakaba 1.64 $s = \ (Encode::decode ($charset, $$bytes_s));
104     $self->{input_encoding} = $charset;
105 wakaba 1.63 $self->{confident} = 0;
106     }
107    
108     $self->{change_encoding} = sub {
109     my $self = shift;
110     my $charset = lc shift;
111     ## TODO: if $charset is supported
112     ## TODO: normalize charset name
113    
114     ## "Change the encoding" algorithm:
115    
116     ## Step 1
117     if ($charset eq 'utf-16') { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8?
118     $charset = 'utf-8';
119     }
120    
121     ## Step 2
122     if (defined $self->{input_encoding} and
123     $self->{input_encoding} eq $charset) {
124     $self->{confident} = 1;
125     return;
126     }
127    
128 wakaba 1.64 !!!parse-error (type => 'charset label detected:'.$self->{input_encoding}.
129     ':'.$charset, level => 'w');
130 wakaba 1.63
131     ## Step 3
132     # if (can) {
133     ## change the encoding on the fly.
134     #$self->{confident} = 1;
135     #return;
136     # }
137    
138     ## Step 4
139     throw Whatpm::HTML::RestartParser (charset => $charset);
140     }; # $self->{change_encoding}
141    
142     my @args = @_; shift @args; # $s
143     my $return;
144     try {
145     $return = $self->parse_char_string ($s, @args);
146     } catch Whatpm::HTML::RestartParser with {
147     my $charset = shift->{charset};
148     $s = \ (Encode::decode ($charset, $$bytes_s));
149 wakaba 1.64 $self->{input_encoding} = $charset; ## TODO: normalize
150 wakaba 1.63 $self->{confident} = 1;
151     $return = $self->parse_char_string ($s, @args);
152     };
153     return $return;
154     } # parse_byte_string
155    
156 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
157     ## and the HTML layer MUST ignore it. However, we does strip BOM in
158     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
159     ## because the core part of our HTML parser expects a string of character,
160     ## not a string of bytes or code units or anything which might contain a BOM.
161     ## Therefore, any parser interface that accepts a string of bytes,
162     ## such as |parse_byte_string| in this module, must ensure that it does
163     ## strip the BOM and never strip any ZWNBSP.
164    
165 wakaba 1.63 *parse_char_string = \&parse_string;
166    
167 wakaba 1.1 sub parse_string ($$$;$) {
168 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
169     my $s = ref $_[0] ? $_[0] : \($_[0]);
170 wakaba 1.1 $self->{document} = $_[1];
171 wakaba 1.63 @{$self->{document}->child_nodes} = ();
172 wakaba 1.1
173 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
174    
175 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
176 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
177     if defined $self->{input_encoding};
178 wakaba 1.63
179 wakaba 1.1 my $i = 0;
180 wakaba 1.3 my $line = 1;
181     my $column = 0;
182 wakaba 1.76 $self->{set_next_char} = sub {
183 wakaba 1.1 my $self = shift;
184 wakaba 1.13
185 wakaba 1.76 pop @{$self->{prev_char}};
186     unshift @{$self->{prev_char}}, $self->{next_char};
187 wakaba 1.13
188 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
189     $self->{next_char} = ord substr $$s, $i++, 1;
190 wakaba 1.3 $column++;
191 wakaba 1.1
192 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
193 wakaba 1.4 $line++;
194     $column = 0;
195 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
196 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
197 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
198 wakaba 1.3 $line++;
199 wakaba 1.4 $column = 0;
200 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
201     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
202     } elsif ($self->{next_char} == 0x0000) { # NULL
203 wakaba 1.8 !!!parse-error (type => 'NULL');
204 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
205 wakaba 1.1 }
206     };
207 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
208     $self->{next_char} = -1;
209 wakaba 1.1
210 wakaba 1.3 my $onerror = $_[2] || sub {
211     my (%opt) = @_;
212     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
213     };
214     $self->{parse_error} = sub {
215     $onerror->(@_, line => $line, column => $column);
216 wakaba 1.1 };
217    
218     $self->_initialize_tokenizer;
219     $self->_initialize_tree_constructor;
220     $self->_construct_tree;
221     $self->_terminate_tree_constructor;
222    
223     return $self->{document};
224     } # parse_string
225    
226     sub new ($) {
227     my $class = shift;
228     my $self = bless {}, $class;
229 wakaba 1.76 $self->{set_next_char} = sub {
230     $self->{next_char} = -1;
231 wakaba 1.1 };
232     $self->{parse_error} = sub {
233     #
234     };
235 wakaba 1.63 $self->{change_encoding} = sub {
236     # if ($_[0] is a supported encoding) {
237     # run "change the encoding" algorithm;
238     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
239     # }
240     };
241 wakaba 1.61 $self->{application_cache_selection} = sub {
242     #
243     };
244 wakaba 1.1 return $self;
245     } # new
246    
247 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
248     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
249     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
250    
251     sub PLAINTEXT_CONTENT_MODEL () { 0 }
252     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
253     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
254     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
255    
256 wakaba 1.57 sub DATA_STATE () { 0 }
257     sub ENTITY_DATA_STATE () { 1 }
258     sub TAG_OPEN_STATE () { 2 }
259     sub CLOSE_TAG_OPEN_STATE () { 3 }
260     sub TAG_NAME_STATE () { 4 }
261     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
262     sub ATTRIBUTE_NAME_STATE () { 6 }
263     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
264     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
265     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
266     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
267     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
268     sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
269     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
270     sub COMMENT_START_STATE () { 14 }
271     sub COMMENT_START_DASH_STATE () { 15 }
272     sub COMMENT_STATE () { 16 }
273     sub COMMENT_END_STATE () { 17 }
274     sub COMMENT_END_DASH_STATE () { 18 }
275     sub BOGUS_COMMENT_STATE () { 19 }
276     sub DOCTYPE_STATE () { 20 }
277     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
278     sub DOCTYPE_NAME_STATE () { 22 }
279     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
280     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
281     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
282     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
283     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
284     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
285     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
286     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
287     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
288     sub BOGUS_DOCTYPE_STATE () { 32 }
289 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
290 wakaba 1.57
291 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
292     sub COMMENT_TOKEN () { 2 }
293     sub START_TAG_TOKEN () { 3 }
294     sub END_TAG_TOKEN () { 4 }
295     sub END_OF_FILE_TOKEN () { 5 }
296     sub CHARACTER_TOKEN () { 6 }
297    
298 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
299     sub HEAD_IMS () { 0b1000 }
300     sub BODY_IMS () { 0b10000 }
301 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
302 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
303 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
304 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
305     sub FRAME_IMS () { 0b1000000000 }
306    
307 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
308    
309     ## NOTE: "after after body" insertion mode.
310 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
311 wakaba 1.84
312     ## NOTE: "after after frameset" insertion mode.
313 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
314 wakaba 1.84
315 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
316     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
317     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
318     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
319     sub IN_BODY_IM () { BODY_IMS }
320 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
321     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
322     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
323     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
324 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
325     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
326     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
327     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
328     sub IN_SELECT_IM () { 0b01 }
329     sub IN_COLUMN_GROUP_IM () { 0b10 }
330    
331 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
332    
333     sub _initialize_tokenizer ($) {
334     my $self = shift;
335 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
336 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
337 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
338     undef $self->{current_attribute};
339     undef $self->{last_emitted_start_tag_name};
340     undef $self->{last_attribute_value_state};
341     $self->{char} = [];
342 wakaba 1.76 # $self->{next_char}
343 wakaba 1.1 !!!next-input-character;
344     $self->{token} = [];
345 wakaba 1.18 # $self->{escape}
346 wakaba 1.1 } # _initialize_tokenizer
347    
348     ## A token has:
349 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
350     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
351     ## ->{name} (DOCTYPE_TOKEN)
352     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
353     ## ->{public_identifier} (DOCTYPE_TOKEN)
354     ## ->{system_identifier} (DOCTYPE_TOKEN)
355 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
356 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
357 wakaba 1.66 ## ->{name}
358     ## ->{value}
359     ## ->{has_reference} == 1 or 0
360 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
361 wakaba 1.1
362     ## Emitted token MUST immediately be handled by the tree construction state.
363    
364     ## Before each step, UA MAY check to see if either one of the scripts in
365     ## "list of scripts that will execute as soon as possible" or the first
366     ## script in the "list of scripts that will execute asynchronously",
367     ## has completed loading. If one has, then it MUST be executed
368     ## and removed from the list.
369    
370 wakaba 1.59 ## NOTE: HTML5 "Writing HTML documents" section, applied to
371     ## documents and not to user agents and conformance checkers,
372     ## contains some requirements that are not detected by the
373     ## parsing algorithm:
374     ## - Some requirements on character encoding declarations. ## TODO
375     ## - "Elements MUST NOT contain content that their content model disallows."
376     ## ... Some are parse error, some are not (will be reported by c.c.).
377     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
378     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
379     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
380    
381     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
382     ## be detected by the HTML5 parsing algorithm:
383     ## - Text,
384    
385 wakaba 1.1 sub _get_next_token ($) {
386     my $self = shift;
387     if (@{$self->{token}}) {
388     return shift @{$self->{token}};
389     }
390    
391     A: {
392 wakaba 1.57 if ($self->{state} == DATA_STATE) {
393 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
394 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
395     not $self->{escape}) {
396 wakaba 1.77 !!!cp (1);
397 wakaba 1.57 $self->{state} = ENTITY_DATA_STATE;
398 wakaba 1.1 !!!next-input-character;
399     redo A;
400     } else {
401 wakaba 1.77 !!!cp (2);
402 wakaba 1.1 #
403     }
404 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
405 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
406 wakaba 1.13 unless ($self->{escape}) {
407 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
408     $self->{prev_char}->[1] == 0x0021 and # !
409     $self->{prev_char}->[2] == 0x003C) { # <
410 wakaba 1.77 !!!cp (3);
411 wakaba 1.13 $self->{escape} = 1;
412 wakaba 1.77 } else {
413     !!!cp (4);
414 wakaba 1.13 }
415 wakaba 1.77 } else {
416     !!!cp (5);
417 wakaba 1.13 }
418     }
419    
420     #
421 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
422 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
423     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
424 wakaba 1.13 not $self->{escape})) {
425 wakaba 1.77 !!!cp (6);
426 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
427 wakaba 1.1 !!!next-input-character;
428     redo A;
429     } else {
430 wakaba 1.77 !!!cp (7);
431 wakaba 1.1 #
432     }
433 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
434 wakaba 1.13 if ($self->{escape} and
435 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
436 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
437     $self->{prev_char}->[1] == 0x002D) { # -
438 wakaba 1.77 !!!cp (8);
439 wakaba 1.13 delete $self->{escape};
440 wakaba 1.77 } else {
441     !!!cp (9);
442 wakaba 1.13 }
443 wakaba 1.77 } else {
444     !!!cp (10);
445 wakaba 1.13 }
446    
447     #
448 wakaba 1.76 } elsif ($self->{next_char} == -1) {
449 wakaba 1.77 !!!cp (11);
450 wakaba 1.55 !!!emit ({type => END_OF_FILE_TOKEN});
451 wakaba 1.1 last A; ## TODO: ok?
452 wakaba 1.77 } else {
453     !!!cp (12);
454 wakaba 1.1 }
455     # Anything else
456 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
457 wakaba 1.76 data => chr $self->{next_char}};
458 wakaba 1.1 ## Stay in the data state
459     !!!next-input-character;
460    
461     !!!emit ($token);
462    
463     redo A;
464 wakaba 1.57 } elsif ($self->{state} == ENTITY_DATA_STATE) {
465 wakaba 1.1 ## (cannot happen in CDATA state)
466    
467 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
468 wakaba 1.1
469 wakaba 1.57 $self->{state} = DATA_STATE;
470 wakaba 1.1 # next-input-character is already done
471    
472     unless (defined $token) {
473 wakaba 1.77 !!!cp (13);
474 wakaba 1.55 !!!emit ({type => CHARACTER_TOKEN, data => '&'});
475 wakaba 1.1 } else {
476 wakaba 1.77 !!!cp (14);
477 wakaba 1.1 !!!emit ($token);
478     }
479    
480     redo A;
481 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
482 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
483 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
484 wakaba 1.77 !!!cp (15);
485 wakaba 1.1 !!!next-input-character;
486 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
487 wakaba 1.1 redo A;
488     } else {
489 wakaba 1.77 !!!cp (16);
490 wakaba 1.1 ## reconsume
491 wakaba 1.57 $self->{state} = DATA_STATE;
492 wakaba 1.1
493 wakaba 1.55 !!!emit ({type => CHARACTER_TOKEN, data => '<'});
494 wakaba 1.1
495     redo A;
496     }
497 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
498 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
499 wakaba 1.77 !!!cp (17);
500 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
501 wakaba 1.1 !!!next-input-character;
502     redo A;
503 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
504 wakaba 1.77 !!!cp (18);
505 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
506 wakaba 1.1 !!!next-input-character;
507     redo A;
508 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
509     $self->{next_char} <= 0x005A) { # A..Z
510 wakaba 1.77 !!!cp (19);
511 wakaba 1.1 $self->{current_token}
512 wakaba 1.55 = {type => START_TAG_TOKEN,
513 wakaba 1.76 tag_name => chr ($self->{next_char} + 0x0020)};
514 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
515 wakaba 1.1 !!!next-input-character;
516     redo A;
517 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
518     $self->{next_char} <= 0x007A) { # a..z
519 wakaba 1.77 !!!cp (20);
520 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
521 wakaba 1.76 tag_name => chr ($self->{next_char})};
522 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
523 wakaba 1.1 !!!next-input-character;
524     redo A;
525 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
526 wakaba 1.77 !!!cp (21);
527 wakaba 1.3 !!!parse-error (type => 'empty start tag');
528 wakaba 1.57 $self->{state} = DATA_STATE;
529 wakaba 1.1 !!!next-input-character;
530    
531 wakaba 1.55 !!!emit ({type => CHARACTER_TOKEN, data => '<>'});
532 wakaba 1.1
533     redo A;
534 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
535 wakaba 1.77 !!!cp (22);
536 wakaba 1.3 !!!parse-error (type => 'pio');
537 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
538 wakaba 1.76 ## $self->{next_char} is intentionally left as is
539 wakaba 1.1 redo A;
540     } else {
541 wakaba 1.77 !!!cp (23);
542 wakaba 1.3 !!!parse-error (type => 'bare stago');
543 wakaba 1.57 $self->{state} = DATA_STATE;
544 wakaba 1.1 ## reconsume
545    
546 wakaba 1.55 !!!emit ({type => CHARACTER_TOKEN, data => '<'});
547 wakaba 1.1
548     redo A;
549     }
550     } else {
551 wakaba 1.40 die "$0: $self->{content_model} in tag open";
552 wakaba 1.1 }
553 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
554 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
555 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
556 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
557 wakaba 1.23 my @next_char;
558     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
559 wakaba 1.76 push @next_char, $self->{next_char};
560 wakaba 1.23 my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
561     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
562 wakaba 1.76 if ($self->{next_char} == $c or $self->{next_char} == $C) {
563 wakaba 1.77 !!!cp (24);
564 wakaba 1.23 !!!next-input-character;
565     next TAGNAME;
566     } else {
567 wakaba 1.77 !!!cp (25);
568 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
569 wakaba 1.23 !!!back-next-input-character (@next_char);
570 wakaba 1.57 $self->{state} = DATA_STATE;
571 wakaba 1.23
572 wakaba 1.55 !!!emit ({type => CHARACTER_TOKEN, data => '</'});
573 wakaba 1.23
574     redo A;
575     }
576     }
577 wakaba 1.76 push @next_char, $self->{next_char};
578 wakaba 1.23
579 wakaba 1.76 unless ($self->{next_char} == 0x0009 or # HT
580     $self->{next_char} == 0x000A or # LF
581     $self->{next_char} == 0x000B or # VT
582     $self->{next_char} == 0x000C or # FF
583     $self->{next_char} == 0x0020 or # SP
584     $self->{next_char} == 0x003E or # >
585     $self->{next_char} == 0x002F or # /
586     $self->{next_char} == -1) {
587 wakaba 1.77 !!!cp (26);
588 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
589 wakaba 1.1 !!!back-next-input-character (@next_char);
590 wakaba 1.57 $self->{state} = DATA_STATE;
591 wakaba 1.55 !!!emit ({type => CHARACTER_TOKEN, data => '</'});
592 wakaba 1.1 redo A;
593 wakaba 1.23 } else {
594 wakaba 1.77 !!!cp (27);
595 wakaba 1.76 $self->{next_char} = shift @next_char;
596 wakaba 1.23 !!!back-next-input-character (@next_char);
597     # and consume...
598 wakaba 1.1 }
599 wakaba 1.23 } else {
600     ## No start tag token has ever been emitted
601 wakaba 1.77 !!!cp (28);
602 wakaba 1.23 # next-input-character is already done
603 wakaba 1.57 $self->{state} = DATA_STATE;
604 wakaba 1.55 !!!emit ({type => CHARACTER_TOKEN, data => '</'});
605 wakaba 1.1 redo A;
606     }
607     }
608    
609 wakaba 1.76 if (0x0041 <= $self->{next_char} and
610     $self->{next_char} <= 0x005A) { # A..Z
611 wakaba 1.77 !!!cp (29);
612 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
613 wakaba 1.76 tag_name => chr ($self->{next_char} + 0x0020)};
614 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
615 wakaba 1.1 !!!next-input-character;
616     redo A;
617 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
618     $self->{next_char} <= 0x007A) { # a..z
619 wakaba 1.77 !!!cp (30);
620 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
621 wakaba 1.76 tag_name => chr ($self->{next_char})};
622 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
623 wakaba 1.1 !!!next-input-character;
624     redo A;
625 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
626 wakaba 1.77 !!!cp (31);
627 wakaba 1.3 !!!parse-error (type => 'empty end tag');
628 wakaba 1.57 $self->{state} = DATA_STATE;
629 wakaba 1.1 !!!next-input-character;
630     redo A;
631 wakaba 1.76 } elsif ($self->{next_char} == -1) {
632 wakaba 1.77 !!!cp (32);
633 wakaba 1.3 !!!parse-error (type => 'bare etago');
634 wakaba 1.57 $self->{state} = DATA_STATE;
635 wakaba 1.1 # reconsume
636    
637 wakaba 1.55 !!!emit ({type => CHARACTER_TOKEN, data => '</'});
638 wakaba 1.1
639     redo A;
640     } else {
641 wakaba 1.77 !!!cp (33);
642 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
643 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
644 wakaba 1.76 ## $self->{next_char} is intentionally left as is
645 wakaba 1.1 redo A;
646     }
647 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
648 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
649     $self->{next_char} == 0x000A or # LF
650     $self->{next_char} == 0x000B or # VT
651     $self->{next_char} == 0x000C or # FF
652     $self->{next_char} == 0x0020) { # SP
653 wakaba 1.77 !!!cp (34);
654 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
655 wakaba 1.1 !!!next-input-character;
656     redo A;
657 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
658 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
659 wakaba 1.77 !!!cp (35);
660 wakaba 1.28 $self->{current_token}->{first_start_tag}
661     = not defined $self->{last_emitted_start_tag_name};
662 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
663 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
664 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
665 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
666     # ## NOTE: This should never be reached.
667     # !!! cp (36);
668     # !!! parse-error (type => 'end tag attribute');
669     #} else {
670 wakaba 1.77 !!!cp (37);
671 wakaba 1.78 #}
672 wakaba 1.1 } else {
673     die "$0: $self->{current_token}->{type}: Unknown token type";
674     }
675 wakaba 1.57 $self->{state} = DATA_STATE;
676 wakaba 1.1 !!!next-input-character;
677    
678     !!!emit ($self->{current_token}); # start tag or end tag
679    
680     redo A;
681 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
682     $self->{next_char} <= 0x005A) { # A..Z
683 wakaba 1.77 !!!cp (38);
684 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
685 wakaba 1.1 # start tag or end tag
686     ## Stay in this state
687     !!!next-input-character;
688     redo A;
689 wakaba 1.76 } elsif ($self->{next_char} == -1) {
690 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
691 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
692 wakaba 1.77 !!!cp (39);
693 wakaba 1.28 $self->{current_token}->{first_start_tag}
694     = not defined $self->{last_emitted_start_tag_name};
695 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
696 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
697 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
698 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
699     # ## NOTE: This state should never be reached.
700     # !!! cp (40);
701     # !!! parse-error (type => 'end tag attribute');
702     #} else {
703 wakaba 1.77 !!!cp (41);
704 wakaba 1.78 #}
705 wakaba 1.1 } else {
706     die "$0: $self->{current_token}->{type}: Unknown token type";
707     }
708 wakaba 1.57 $self->{state} = DATA_STATE;
709 wakaba 1.1 # reconsume
710    
711     !!!emit ($self->{current_token}); # start tag or end tag
712    
713     redo A;
714 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
715 wakaba 1.1 !!!next-input-character;
716 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
717 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
718 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
719     # permitted slash
720 wakaba 1.77 !!!cp (42);
721 wakaba 1.1 #
722     } else {
723 wakaba 1.77 !!!cp (43);
724 wakaba 1.3 !!!parse-error (type => 'nestc');
725 wakaba 1.1 }
726 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
727 wakaba 1.1 # next-input-character is already done
728     redo A;
729     } else {
730 wakaba 1.77 !!!cp (44);
731 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
732 wakaba 1.1 # start tag or end tag
733     ## Stay in the state
734     !!!next-input-character;
735     redo A;
736     }
737 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
738 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
739     $self->{next_char} == 0x000A or # LF
740     $self->{next_char} == 0x000B or # VT
741     $self->{next_char} == 0x000C or # FF
742     $self->{next_char} == 0x0020) { # SP
743 wakaba 1.77 !!!cp (45);
744 wakaba 1.1 ## Stay in the state
745     !!!next-input-character;
746     redo A;
747 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
748 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
749 wakaba 1.77 !!!cp (46);
750 wakaba 1.28 $self->{current_token}->{first_start_tag}
751     = not defined $self->{last_emitted_start_tag_name};
752 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
753 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
754 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
755 wakaba 1.1 if ($self->{current_token}->{attributes}) {
756 wakaba 1.77 !!!cp (47);
757 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
758 wakaba 1.77 } else {
759     !!!cp (48);
760 wakaba 1.1 }
761     } else {
762     die "$0: $self->{current_token}->{type}: Unknown token type";
763     }
764 wakaba 1.57 $self->{state} = DATA_STATE;
765 wakaba 1.1 !!!next-input-character;
766    
767     !!!emit ($self->{current_token}); # start tag or end tag
768    
769     redo A;
770 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
771     $self->{next_char} <= 0x005A) { # A..Z
772 wakaba 1.77 !!!cp (49);
773 wakaba 1.76 $self->{current_attribute} = {name => chr ($self->{next_char} + 0x0020),
774 wakaba 1.1 value => ''};
775 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
776 wakaba 1.1 !!!next-input-character;
777     redo A;
778 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
779 wakaba 1.1 !!!next-input-character;
780 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
781 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
782 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
783     # permitted slash
784 wakaba 1.77 !!!cp (50);
785 wakaba 1.1 #
786     } else {
787 wakaba 1.77 !!!cp (51);
788 wakaba 1.3 !!!parse-error (type => 'nestc');
789 wakaba 1.1 }
790     ## Stay in the state
791     # next-input-character is already done
792     redo A;
793 wakaba 1.76 } elsif ($self->{next_char} == -1) {
794 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
795 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
796 wakaba 1.77 !!!cp (52);
797 wakaba 1.28 $self->{current_token}->{first_start_tag}
798     = not defined $self->{last_emitted_start_tag_name};
799 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
800 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
801 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
802 wakaba 1.1 if ($self->{current_token}->{attributes}) {
803 wakaba 1.77 !!!cp (53);
804 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
805 wakaba 1.77 } else {
806     !!!cp (54);
807 wakaba 1.1 }
808     } else {
809     die "$0: $self->{current_token}->{type}: Unknown token type";
810     }
811 wakaba 1.57 $self->{state} = DATA_STATE;
812 wakaba 1.1 # reconsume
813    
814     !!!emit ($self->{current_token}); # start tag or end tag
815    
816     redo A;
817     } else {
818 wakaba 1.72 if ({
819     0x0022 => 1, # "
820     0x0027 => 1, # '
821     0x003D => 1, # =
822 wakaba 1.76 }->{$self->{next_char}}) {
823 wakaba 1.77 !!!cp (55);
824 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
825 wakaba 1.77 } else {
826     !!!cp (56);
827 wakaba 1.72 }
828 wakaba 1.76 $self->{current_attribute} = {name => chr ($self->{next_char}),
829 wakaba 1.1 value => ''};
830 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
831 wakaba 1.1 !!!next-input-character;
832     redo A;
833     }
834 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
835 wakaba 1.1 my $before_leave = sub {
836     if (exists $self->{current_token}->{attributes} # start tag or end tag
837     ->{$self->{current_attribute}->{name}}) { # MUST
838 wakaba 1.77 !!!cp (57);
839 wakaba 1.39 !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name});
840 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
841     } else {
842 wakaba 1.77 !!!cp (58);
843 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
844     = $self->{current_attribute};
845     }
846     }; # $before_leave
847    
848 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
849     $self->{next_char} == 0x000A or # LF
850     $self->{next_char} == 0x000B or # VT
851     $self->{next_char} == 0x000C or # FF
852     $self->{next_char} == 0x0020) { # SP
853 wakaba 1.77 !!!cp (59);
854 wakaba 1.1 $before_leave->();
855 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
856 wakaba 1.1 !!!next-input-character;
857     redo A;
858 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
859 wakaba 1.77 !!!cp (60);
860 wakaba 1.1 $before_leave->();
861 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
862 wakaba 1.1 !!!next-input-character;
863     redo A;
864 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
865 wakaba 1.1 $before_leave->();
866 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
867 wakaba 1.77 !!!cp (61);
868 wakaba 1.28 $self->{current_token}->{first_start_tag}
869     = not defined $self->{last_emitted_start_tag_name};
870 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
871 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
872 wakaba 1.77 !!!cp (62);
873 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
874 wakaba 1.1 if ($self->{current_token}->{attributes}) {
875 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
876 wakaba 1.1 }
877     } else {
878     die "$0: $self->{current_token}->{type}: Unknown token type";
879     }
880 wakaba 1.57 $self->{state} = DATA_STATE;
881 wakaba 1.1 !!!next-input-character;
882    
883     !!!emit ($self->{current_token}); # start tag or end tag
884    
885     redo A;
886 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
887     $self->{next_char} <= 0x005A) { # A..Z
888 wakaba 1.77 !!!cp (63);
889 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
890 wakaba 1.1 ## Stay in the state
891     !!!next-input-character;
892     redo A;
893 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
894 wakaba 1.1 $before_leave->();
895     !!!next-input-character;
896 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
897 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
898 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
899     # permitted slash
900 wakaba 1.77 !!!cp (64);
901 wakaba 1.1 #
902     } else {
903 wakaba 1.77 !!!cp (65);
904 wakaba 1.3 !!!parse-error (type => 'nestc');
905 wakaba 1.1 }
906 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
907 wakaba 1.1 # next-input-character is already done
908     redo A;
909 wakaba 1.76 } elsif ($self->{next_char} == -1) {
910 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
911 wakaba 1.1 $before_leave->();
912 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
913 wakaba 1.77 !!!cp (66);
914 wakaba 1.28 $self->{current_token}->{first_start_tag}
915     = not defined $self->{last_emitted_start_tag_name};
916 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
917 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
918 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
919 wakaba 1.1 if ($self->{current_token}->{attributes}) {
920 wakaba 1.77 !!!cp (67);
921 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
922 wakaba 1.77 } else {
923 wakaba 1.78 ## NOTE: This state should never be reached.
924 wakaba 1.77 !!!cp (68);
925 wakaba 1.1 }
926     } else {
927     die "$0: $self->{current_token}->{type}: Unknown token type";
928     }
929 wakaba 1.57 $self->{state} = DATA_STATE;
930 wakaba 1.1 # reconsume
931    
932     !!!emit ($self->{current_token}); # start tag or end tag
933    
934     redo A;
935     } else {
936 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
937     $self->{next_char} == 0x0027) { # '
938 wakaba 1.77 !!!cp (69);
939 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
940 wakaba 1.77 } else {
941     !!!cp (70);
942 wakaba 1.72 }
943 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
944 wakaba 1.1 ## Stay in the state
945     !!!next-input-character;
946     redo A;
947     }
948 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
949 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
950     $self->{next_char} == 0x000A or # LF
951     $self->{next_char} == 0x000B or # VT
952     $self->{next_char} == 0x000C or # FF
953     $self->{next_char} == 0x0020) { # SP
954 wakaba 1.77 !!!cp (71);
955 wakaba 1.1 ## Stay in the state
956     !!!next-input-character;
957     redo A;
958 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
959 wakaba 1.77 !!!cp (72);
960 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
961 wakaba 1.1 !!!next-input-character;
962     redo A;
963 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
964 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
965 wakaba 1.77 !!!cp (73);
966 wakaba 1.28 $self->{current_token}->{first_start_tag}
967     = not defined $self->{last_emitted_start_tag_name};
968 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
969 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
970 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
971 wakaba 1.1 if ($self->{current_token}->{attributes}) {
972 wakaba 1.77 !!!cp (74);
973 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
974 wakaba 1.77 } else {
975 wakaba 1.78 ## NOTE: This state should never be reached.
976 wakaba 1.77 !!!cp (75);
977 wakaba 1.1 }
978     } else {
979     die "$0: $self->{current_token}->{type}: Unknown token type";
980     }
981 wakaba 1.57 $self->{state} = DATA_STATE;
982 wakaba 1.1 !!!next-input-character;
983    
984     !!!emit ($self->{current_token}); # start tag or end tag
985    
986     redo A;
987 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
988     $self->{next_char} <= 0x005A) { # A..Z
989 wakaba 1.77 !!!cp (76);
990 wakaba 1.76 $self->{current_attribute} = {name => chr ($self->{next_char} + 0x0020),
991 wakaba 1.1 value => ''};
992 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
993 wakaba 1.1 !!!next-input-character;
994     redo A;
995 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
996 wakaba 1.1 !!!next-input-character;
997 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
998 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
999 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1000     # permitted slash
1001 wakaba 1.77 !!!cp (77);
1002 wakaba 1.1 #
1003     } else {
1004 wakaba 1.77 !!!cp (78);
1005 wakaba 1.3 !!!parse-error (type => 'nestc');
1006 wakaba 1.33 ## TODO: Different error type for <aa / bb> than <aa/>
1007 wakaba 1.1 }
1008 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1009 wakaba 1.1 # next-input-character is already done
1010     redo A;
1011 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1012 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1013 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1014 wakaba 1.77 !!!cp (79);
1015 wakaba 1.28 $self->{current_token}->{first_start_tag}
1016     = not defined $self->{last_emitted_start_tag_name};
1017 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1018 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1019 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1020 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1021 wakaba 1.77 !!!cp (80);
1022 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1023 wakaba 1.77 } else {
1024 wakaba 1.78 ## NOTE: This state should never be reached.
1025 wakaba 1.77 !!!cp (81);
1026 wakaba 1.1 }
1027     } else {
1028     die "$0: $self->{current_token}->{type}: Unknown token type";
1029     }
1030 wakaba 1.57 $self->{state} = DATA_STATE;
1031 wakaba 1.1 # reconsume
1032    
1033     !!!emit ($self->{current_token}); # start tag or end tag
1034    
1035     redo A;
1036     } else {
1037 wakaba 1.77 !!!cp (82);
1038 wakaba 1.76 $self->{current_attribute} = {name => chr ($self->{next_char}),
1039 wakaba 1.1 value => ''};
1040 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1041 wakaba 1.1 !!!next-input-character;
1042     redo A;
1043     }
1044 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1045 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1046     $self->{next_char} == 0x000A or # LF
1047     $self->{next_char} == 0x000B or # VT
1048     $self->{next_char} == 0x000C or # FF
1049     $self->{next_char} == 0x0020) { # SP
1050 wakaba 1.77 !!!cp (83);
1051 wakaba 1.1 ## Stay in the state
1052     !!!next-input-character;
1053     redo A;
1054 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1055 wakaba 1.77 !!!cp (84);
1056 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1057 wakaba 1.1 !!!next-input-character;
1058     redo A;
1059 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1060 wakaba 1.77 !!!cp (85);
1061 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1062 wakaba 1.1 ## reconsume
1063     redo A;
1064 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1065 wakaba 1.77 !!!cp (86);
1066 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1067 wakaba 1.1 !!!next-input-character;
1068     redo A;
1069 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1070 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1071 wakaba 1.77 !!!cp (87);
1072 wakaba 1.28 $self->{current_token}->{first_start_tag}
1073     = not defined $self->{last_emitted_start_tag_name};
1074 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1075 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1076 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1077 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1078 wakaba 1.77 !!!cp (88);
1079 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1080 wakaba 1.77 } else {
1081 wakaba 1.78 ## NOTE: This state should never be reached.
1082 wakaba 1.77 !!!cp (89);
1083 wakaba 1.1 }
1084     } else {
1085     die "$0: $self->{current_token}->{type}: Unknown token type";
1086     }
1087 wakaba 1.57 $self->{state} = DATA_STATE;
1088 wakaba 1.1 !!!next-input-character;
1089    
1090     !!!emit ($self->{current_token}); # start tag or end tag
1091    
1092     redo A;
1093 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1094 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1095 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1096 wakaba 1.77 !!!cp (90);
1097 wakaba 1.28 $self->{current_token}->{first_start_tag}
1098     = not defined $self->{last_emitted_start_tag_name};
1099 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1100 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1101 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1102 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1103 wakaba 1.77 !!!cp (91);
1104 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1105 wakaba 1.77 } else {
1106 wakaba 1.78 ## NOTE: This state should never be reached.
1107 wakaba 1.77 !!!cp (92);
1108 wakaba 1.1 }
1109     } else {
1110     die "$0: $self->{current_token}->{type}: Unknown token type";
1111     }
1112 wakaba 1.57 $self->{state} = DATA_STATE;
1113 wakaba 1.1 ## reconsume
1114    
1115     !!!emit ($self->{current_token}); # start tag or end tag
1116    
1117     redo A;
1118     } else {
1119 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1120 wakaba 1.77 !!!cp (93);
1121 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1122 wakaba 1.77 } else {
1123     !!!cp (94);
1124 wakaba 1.72 }
1125 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1126 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1127 wakaba 1.1 !!!next-input-character;
1128     redo A;
1129     }
1130 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1131 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1132 wakaba 1.77 !!!cp (95);
1133 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1134 wakaba 1.1 !!!next-input-character;
1135     redo A;
1136 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1137 wakaba 1.77 !!!cp (96);
1138 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1139     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1140 wakaba 1.1 !!!next-input-character;
1141     redo A;
1142 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1143 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1144 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1145 wakaba 1.77 !!!cp (97);
1146 wakaba 1.28 $self->{current_token}->{first_start_tag}
1147     = not defined $self->{last_emitted_start_tag_name};
1148 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1149 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1150 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1151 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1152 wakaba 1.77 !!!cp (98);
1153 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1154 wakaba 1.77 } else {
1155 wakaba 1.78 ## NOTE: This state should never be reached.
1156 wakaba 1.77 !!!cp (99);
1157 wakaba 1.1 }
1158     } else {
1159     die "$0: $self->{current_token}->{type}: Unknown token type";
1160     }
1161 wakaba 1.57 $self->{state} = DATA_STATE;
1162 wakaba 1.1 ## reconsume
1163    
1164     !!!emit ($self->{current_token}); # start tag or end tag
1165    
1166     redo A;
1167     } else {
1168 wakaba 1.77 !!!cp (100);
1169 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1170 wakaba 1.1 ## Stay in the state
1171     !!!next-input-character;
1172     redo A;
1173     }
1174 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1175 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1176 wakaba 1.77 !!!cp (101);
1177 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1178 wakaba 1.1 !!!next-input-character;
1179     redo A;
1180 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1181 wakaba 1.77 !!!cp (102);
1182 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1183     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1184 wakaba 1.1 !!!next-input-character;
1185     redo A;
1186 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1187 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1188 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1189 wakaba 1.77 !!!cp (103);
1190 wakaba 1.28 $self->{current_token}->{first_start_tag}
1191     = not defined $self->{last_emitted_start_tag_name};
1192 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1193 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1194 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1195 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1196 wakaba 1.77 !!!cp (104);
1197 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1198 wakaba 1.77 } else {
1199 wakaba 1.78 ## NOTE: This state should never be reached.
1200 wakaba 1.77 !!!cp (105);
1201 wakaba 1.1 }
1202     } else {
1203     die "$0: $self->{current_token}->{type}: Unknown token type";
1204     }
1205 wakaba 1.57 $self->{state} = DATA_STATE;
1206 wakaba 1.1 ## reconsume
1207    
1208     !!!emit ($self->{current_token}); # start tag or end tag
1209    
1210     redo A;
1211     } else {
1212 wakaba 1.77 !!!cp (106);
1213 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1214 wakaba 1.1 ## Stay in the state
1215     !!!next-input-character;
1216     redo A;
1217     }
1218 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1219 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1220     $self->{next_char} == 0x000A or # LF
1221     $self->{next_char} == 0x000B or # HT
1222     $self->{next_char} == 0x000C or # FF
1223     $self->{next_char} == 0x0020) { # SP
1224 wakaba 1.77 !!!cp (107);
1225 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1226 wakaba 1.1 !!!next-input-character;
1227     redo A;
1228 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1229 wakaba 1.77 !!!cp (108);
1230 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1231     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1232 wakaba 1.1 !!!next-input-character;
1233     redo A;
1234 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1235 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1236 wakaba 1.77 !!!cp (109);
1237 wakaba 1.28 $self->{current_token}->{first_start_tag}
1238     = not defined $self->{last_emitted_start_tag_name};
1239 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1240 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1241 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1242 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1243 wakaba 1.77 !!!cp (110);
1244 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1245 wakaba 1.77 } else {
1246 wakaba 1.78 ## NOTE: This state should never be reached.
1247 wakaba 1.77 !!!cp (111);
1248 wakaba 1.1 }
1249     } else {
1250     die "$0: $self->{current_token}->{type}: Unknown token type";
1251     }
1252 wakaba 1.57 $self->{state} = DATA_STATE;
1253 wakaba 1.1 !!!next-input-character;
1254    
1255     !!!emit ($self->{current_token}); # start tag or end tag
1256    
1257     redo A;
1258 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1259 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1260 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1261 wakaba 1.77 !!!cp (112);
1262 wakaba 1.28 $self->{current_token}->{first_start_tag}
1263     = not defined $self->{last_emitted_start_tag_name};
1264 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1265 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1266 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1267 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1268 wakaba 1.77 !!!cp (113);
1269 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1270 wakaba 1.77 } else {
1271 wakaba 1.78 ## NOTE: This state should never be reached.
1272 wakaba 1.77 !!!cp (114);
1273 wakaba 1.1 }
1274     } else {
1275     die "$0: $self->{current_token}->{type}: Unknown token type";
1276     }
1277 wakaba 1.57 $self->{state} = DATA_STATE;
1278 wakaba 1.1 ## reconsume
1279    
1280     !!!emit ($self->{current_token}); # start tag or end tag
1281    
1282     redo A;
1283     } else {
1284 wakaba 1.72 if ({
1285     0x0022 => 1, # "
1286     0x0027 => 1, # '
1287     0x003D => 1, # =
1288 wakaba 1.76 }->{$self->{next_char}}) {
1289 wakaba 1.77 !!!cp (115);
1290 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1291 wakaba 1.77 } else {
1292     !!!cp (116);
1293 wakaba 1.72 }
1294 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1295 wakaba 1.1 ## Stay in the state
1296     !!!next-input-character;
1297     redo A;
1298     }
1299 wakaba 1.57 } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1300 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity
1301     (1,
1302     $self->{last_attribute_value_state}
1303     == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # "
1304     $self->{last_attribute_value_state}
1305     == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # '
1306     -1);
1307 wakaba 1.1
1308     unless (defined $token) {
1309 wakaba 1.77 !!!cp (117);
1310 wakaba 1.1 $self->{current_attribute}->{value} .= '&';
1311     } else {
1312 wakaba 1.77 !!!cp (118);
1313 wakaba 1.1 $self->{current_attribute}->{value} .= $token->{data};
1314 wakaba 1.66 $self->{current_attribute}->{has_reference} = $token->{has_reference};
1315 wakaba 1.1 ## ISSUE: spec says "append the returned character token to the current attribute's value"
1316     }
1317    
1318     $self->{state} = $self->{last_attribute_value_state};
1319     # next-input-character is already done
1320     redo A;
1321 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1322 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1323     $self->{next_char} == 0x000A or # LF
1324     $self->{next_char} == 0x000B or # VT
1325     $self->{next_char} == 0x000C or # FF
1326     $self->{next_char} == 0x0020) { # SP
1327 wakaba 1.77 !!!cp (118);
1328 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1329     !!!next-input-character;
1330     redo A;
1331 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1332 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1333 wakaba 1.77 !!!cp (119);
1334 wakaba 1.72 $self->{current_token}->{first_start_tag}
1335     = not defined $self->{last_emitted_start_tag_name};
1336     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1337     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1338     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1339     if ($self->{current_token}->{attributes}) {
1340 wakaba 1.77 !!!cp (120);
1341 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1342 wakaba 1.77 } else {
1343 wakaba 1.78 ## NOTE: This state should never be reached.
1344 wakaba 1.77 !!!cp (121);
1345 wakaba 1.72 }
1346     } else {
1347     die "$0: $self->{current_token}->{type}: Unknown token type";
1348     }
1349     $self->{state} = DATA_STATE;
1350     !!!next-input-character;
1351    
1352     !!!emit ($self->{current_token}); # start tag or end tag
1353    
1354     redo A;
1355 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1356 wakaba 1.72 !!!next-input-character;
1357 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
1358 wakaba 1.72 $self->{current_token}->{type} == START_TAG_TOKEN and
1359     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1360     # permitted slash
1361 wakaba 1.77 !!!cp (122);
1362 wakaba 1.72 #
1363     } else {
1364 wakaba 1.77 !!!cp (123);
1365 wakaba 1.72 !!!parse-error (type => 'nestc');
1366     }
1367     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1368     # next-input-character is already done
1369     redo A;
1370     } else {
1371 wakaba 1.77 !!!cp (124);
1372 wakaba 1.72 !!!parse-error (type => 'no space between attributes');
1373     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1374     ## reconsume
1375     redo A;
1376     }
1377 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1378 wakaba 1.1 ## (only happen if PCDATA state)
1379    
1380 wakaba 1.55 my $token = {type => COMMENT_TOKEN, data => ''};
1381 wakaba 1.1
1382     BC: {
1383 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1384 wakaba 1.77 !!!cp (124);
1385 wakaba 1.57 $self->{state} = DATA_STATE;
1386 wakaba 1.1 !!!next-input-character;
1387    
1388     !!!emit ($token);
1389    
1390     redo A;
1391 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1392 wakaba 1.77 !!!cp (125);
1393 wakaba 1.57 $self->{state} = DATA_STATE;
1394 wakaba 1.1 ## reconsume
1395    
1396     !!!emit ($token);
1397    
1398     redo A;
1399     } else {
1400 wakaba 1.77 !!!cp (126);
1401 wakaba 1.76 $token->{data} .= chr ($self->{next_char});
1402 wakaba 1.1 !!!next-input-character;
1403     redo BC;
1404     }
1405     } # BC
1406 wakaba 1.77
1407     die "$0: _get_next_token: unexpected case [BC]";
1408 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1409 wakaba 1.1 ## (only happen if PCDATA state)
1410    
1411     my @next_char;
1412 wakaba 1.76 push @next_char, $self->{next_char};
1413 wakaba 1.1
1414 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1415 wakaba 1.1 !!!next-input-character;
1416 wakaba 1.76 push @next_char, $self->{next_char};
1417     if ($self->{next_char} == 0x002D) { # -
1418 wakaba 1.77 !!!cp (127);
1419 wakaba 1.55 $self->{current_token} = {type => COMMENT_TOKEN, data => ''};
1420 wakaba 1.57 $self->{state} = COMMENT_START_STATE;
1421 wakaba 1.1 !!!next-input-character;
1422     redo A;
1423 wakaba 1.77 } else {
1424     !!!cp (128);
1425 wakaba 1.1 }
1426 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
1427     $self->{next_char} == 0x0064) { # d
1428 wakaba 1.1 !!!next-input-character;
1429 wakaba 1.76 push @next_char, $self->{next_char};
1430     if ($self->{next_char} == 0x004F or # O
1431     $self->{next_char} == 0x006F) { # o
1432 wakaba 1.1 !!!next-input-character;
1433 wakaba 1.76 push @next_char, $self->{next_char};
1434     if ($self->{next_char} == 0x0043 or # C
1435     $self->{next_char} == 0x0063) { # c
1436 wakaba 1.1 !!!next-input-character;
1437 wakaba 1.76 push @next_char, $self->{next_char};
1438     if ($self->{next_char} == 0x0054 or # T
1439     $self->{next_char} == 0x0074) { # t
1440 wakaba 1.1 !!!next-input-character;
1441 wakaba 1.76 push @next_char, $self->{next_char};
1442     if ($self->{next_char} == 0x0059 or # Y
1443     $self->{next_char} == 0x0079) { # y
1444 wakaba 1.1 !!!next-input-character;
1445 wakaba 1.76 push @next_char, $self->{next_char};
1446     if ($self->{next_char} == 0x0050 or # P
1447     $self->{next_char} == 0x0070) { # p
1448 wakaba 1.1 !!!next-input-character;
1449 wakaba 1.76 push @next_char, $self->{next_char};
1450     if ($self->{next_char} == 0x0045 or # E
1451     $self->{next_char} == 0x0065) { # e
1452 wakaba 1.77 !!!cp (129);
1453     ## TODO: What a stupid code this is!
1454 wakaba 1.57 $self->{state} = DOCTYPE_STATE;
1455 wakaba 1.1 !!!next-input-character;
1456     redo A;
1457 wakaba 1.77 } else {
1458     !!!cp (130);
1459 wakaba 1.1 }
1460 wakaba 1.77 } else {
1461     !!!cp (131);
1462 wakaba 1.1 }
1463 wakaba 1.77 } else {
1464     !!!cp (132);
1465 wakaba 1.1 }
1466 wakaba 1.77 } else {
1467     !!!cp (133);
1468 wakaba 1.1 }
1469 wakaba 1.77 } else {
1470     !!!cp (134);
1471 wakaba 1.1 }
1472 wakaba 1.77 } else {
1473     !!!cp (135);
1474 wakaba 1.1 }
1475 wakaba 1.77 } else {
1476     !!!cp (136);
1477 wakaba 1.1 }
1478    
1479 wakaba 1.30 !!!parse-error (type => 'bogus comment');
1480 wakaba 1.76 $self->{next_char} = shift @next_char;
1481 wakaba 1.1 !!!back-next-input-character (@next_char);
1482 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1483 wakaba 1.1 redo A;
1484    
1485     ## ISSUE: typos in spec: chacacters, is is a parse error
1486     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1487 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
1488 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1489 wakaba 1.77 !!!cp (137);
1490 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
1491 wakaba 1.23 !!!next-input-character;
1492     redo A;
1493 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1494 wakaba 1.77 !!!cp (138);
1495 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1496 wakaba 1.57 $self->{state} = DATA_STATE;
1497 wakaba 1.23 !!!next-input-character;
1498    
1499     !!!emit ($self->{current_token}); # comment
1500    
1501     redo A;
1502 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1503 wakaba 1.77 !!!cp (139);
1504 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1505 wakaba 1.57 $self->{state} = DATA_STATE;
1506 wakaba 1.23 ## reconsume
1507    
1508     !!!emit ($self->{current_token}); # comment
1509    
1510     redo A;
1511     } else {
1512 wakaba 1.77 !!!cp (140);
1513 wakaba 1.23 $self->{current_token}->{data} # comment
1514 wakaba 1.76 .= chr ($self->{next_char});
1515 wakaba 1.57 $self->{state} = COMMENT_STATE;
1516 wakaba 1.23 !!!next-input-character;
1517     redo A;
1518     }
1519 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
1520 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1521 wakaba 1.77 !!!cp (141);
1522 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1523 wakaba 1.23 !!!next-input-character;
1524     redo A;
1525 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1526 wakaba 1.77 !!!cp (142);
1527 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1528 wakaba 1.57 $self->{state} = DATA_STATE;
1529 wakaba 1.23 !!!next-input-character;
1530    
1531     !!!emit ($self->{current_token}); # comment
1532    
1533     redo A;
1534 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1535 wakaba 1.77 !!!cp (143);
1536 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1537 wakaba 1.57 $self->{state} = DATA_STATE;
1538 wakaba 1.23 ## reconsume
1539    
1540     !!!emit ($self->{current_token}); # comment
1541    
1542     redo A;
1543     } else {
1544 wakaba 1.77 !!!cp (144);
1545 wakaba 1.23 $self->{current_token}->{data} # comment
1546 wakaba 1.76 .= '-' . chr ($self->{next_char});
1547 wakaba 1.57 $self->{state} = COMMENT_STATE;
1548 wakaba 1.23 !!!next-input-character;
1549     redo A;
1550     }
1551 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
1552 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1553 wakaba 1.77 !!!cp (145);
1554 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
1555 wakaba 1.1 !!!next-input-character;
1556     redo A;
1557 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1558 wakaba 1.77 !!!cp (146);
1559 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1560 wakaba 1.57 $self->{state} = DATA_STATE;
1561 wakaba 1.1 ## reconsume
1562    
1563     !!!emit ($self->{current_token}); # comment
1564    
1565     redo A;
1566     } else {
1567 wakaba 1.77 !!!cp (147);
1568 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1569 wakaba 1.1 ## Stay in the state
1570     !!!next-input-character;
1571     redo A;
1572     }
1573 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
1574 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1575 wakaba 1.77 !!!cp (148);
1576 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1577 wakaba 1.1 !!!next-input-character;
1578     redo A;
1579 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1580 wakaba 1.77 !!!cp (149);
1581 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1582 wakaba 1.57 $self->{state} = DATA_STATE;
1583 wakaba 1.1 ## reconsume
1584    
1585     !!!emit ($self->{current_token}); # comment
1586    
1587     redo A;
1588     } else {
1589 wakaba 1.77 !!!cp (150);
1590 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
1591 wakaba 1.57 $self->{state} = COMMENT_STATE;
1592 wakaba 1.1 !!!next-input-character;
1593     redo A;
1594     }
1595 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
1596 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1597 wakaba 1.77 !!!cp (151);
1598 wakaba 1.57 $self->{state} = DATA_STATE;
1599 wakaba 1.1 !!!next-input-character;
1600    
1601     !!!emit ($self->{current_token}); # comment
1602    
1603     redo A;
1604 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
1605 wakaba 1.77 !!!cp (152);
1606 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1607 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1608     ## Stay in the state
1609     !!!next-input-character;
1610     redo A;
1611 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1612 wakaba 1.77 !!!cp (153);
1613 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1614 wakaba 1.57 $self->{state} = DATA_STATE;
1615 wakaba 1.1 ## reconsume
1616    
1617     !!!emit ($self->{current_token}); # comment
1618    
1619     redo A;
1620     } else {
1621 wakaba 1.77 !!!cp (154);
1622 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1623 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
1624 wakaba 1.57 $self->{state} = COMMENT_STATE;
1625 wakaba 1.1 !!!next-input-character;
1626     redo A;
1627     }
1628 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
1629 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1630     $self->{next_char} == 0x000A or # LF
1631     $self->{next_char} == 0x000B or # VT
1632     $self->{next_char} == 0x000C or # FF
1633     $self->{next_char} == 0x0020) { # SP
1634 wakaba 1.77 !!!cp (155);
1635 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1636 wakaba 1.1 !!!next-input-character;
1637     redo A;
1638     } else {
1639 wakaba 1.77 !!!cp (156);
1640 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1641 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1642 wakaba 1.1 ## reconsume
1643     redo A;
1644     }
1645 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
1646 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1647     $self->{next_char} == 0x000A or # LF
1648     $self->{next_char} == 0x000B or # VT
1649     $self->{next_char} == 0x000C or # FF
1650     $self->{next_char} == 0x0020) { # SP
1651 wakaba 1.77 !!!cp (157);
1652 wakaba 1.1 ## Stay in the state
1653     !!!next-input-character;
1654     redo A;
1655 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1656 wakaba 1.77 !!!cp (158);
1657 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1658 wakaba 1.57 $self->{state} = DATA_STATE;
1659 wakaba 1.1 !!!next-input-character;
1660    
1661 wakaba 1.75 !!!emit ({type => DOCTYPE_TOKEN, quirks => 1});
1662 wakaba 1.1
1663     redo A;
1664 wakaba 1.77 } elsif ($self->{next_char} == -1) {
1665     !!!cp (159);
1666 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1667 wakaba 1.57 $self->{state} = DATA_STATE;
1668 wakaba 1.1 ## reconsume
1669    
1670 wakaba 1.75 !!!emit ({type => DOCTYPE_TOKEN, quirks => 1});
1671 wakaba 1.1
1672     redo A;
1673     } else {
1674 wakaba 1.77 !!!cp (160);
1675 wakaba 1.18 $self->{current_token}
1676 wakaba 1.55 = {type => DOCTYPE_TOKEN,
1677 wakaba 1.76 name => chr ($self->{next_char}),
1678 wakaba 1.75 #quirks => 0,
1679     };
1680 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1681 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
1682 wakaba 1.1 !!!next-input-character;
1683     redo A;
1684     }
1685 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
1686 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
1687 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1688     $self->{next_char} == 0x000A or # LF
1689     $self->{next_char} == 0x000B or # VT
1690     $self->{next_char} == 0x000C or # FF
1691     $self->{next_char} == 0x0020) { # SP
1692 wakaba 1.77 !!!cp (161);
1693 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
1694 wakaba 1.1 !!!next-input-character;
1695     redo A;
1696 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1697 wakaba 1.77 !!!cp (162);
1698 wakaba 1.57 $self->{state} = DATA_STATE;
1699 wakaba 1.1 !!!next-input-character;
1700    
1701     !!!emit ($self->{current_token}); # DOCTYPE
1702    
1703     redo A;
1704 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1705 wakaba 1.77 !!!cp (163);
1706 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1707 wakaba 1.57 $self->{state} = DATA_STATE;
1708 wakaba 1.1 ## reconsume
1709    
1710 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1711 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1712 wakaba 1.1
1713     redo A;
1714     } else {
1715 wakaba 1.77 !!!cp (164);
1716 wakaba 1.1 $self->{current_token}->{name}
1717 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
1718 wakaba 1.1 ## Stay in the state
1719     !!!next-input-character;
1720     redo A;
1721     }
1722 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
1723 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1724     $self->{next_char} == 0x000A or # LF
1725     $self->{next_char} == 0x000B or # VT
1726     $self->{next_char} == 0x000C or # FF
1727     $self->{next_char} == 0x0020) { # SP
1728 wakaba 1.77 !!!cp (165);
1729 wakaba 1.1 ## Stay in the state
1730     !!!next-input-character;
1731     redo A;
1732 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1733 wakaba 1.77 !!!cp (166);
1734 wakaba 1.57 $self->{state} = DATA_STATE;
1735 wakaba 1.1 !!!next-input-character;
1736    
1737     !!!emit ($self->{current_token}); # DOCTYPE
1738    
1739     redo A;
1740 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1741 wakaba 1.77 !!!cp (167);
1742 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1743 wakaba 1.57 $self->{state} = DATA_STATE;
1744 wakaba 1.1 ## reconsume
1745    
1746 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1747 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1748    
1749     redo A;
1750 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
1751     $self->{next_char} == 0x0070) { # p
1752 wakaba 1.18 !!!next-input-character;
1753 wakaba 1.76 if ($self->{next_char} == 0x0055 or # U
1754     $self->{next_char} == 0x0075) { # u
1755 wakaba 1.18 !!!next-input-character;
1756 wakaba 1.76 if ($self->{next_char} == 0x0042 or # B
1757     $self->{next_char} == 0x0062) { # b
1758 wakaba 1.18 !!!next-input-character;
1759 wakaba 1.76 if ($self->{next_char} == 0x004C or # L
1760     $self->{next_char} == 0x006C) { # l
1761 wakaba 1.18 !!!next-input-character;
1762 wakaba 1.76 if ($self->{next_char} == 0x0049 or # I
1763     $self->{next_char} == 0x0069) { # i
1764 wakaba 1.18 !!!next-input-character;
1765 wakaba 1.76 if ($self->{next_char} == 0x0043 or # C
1766     $self->{next_char} == 0x0063) { # c
1767 wakaba 1.77 !!!cp (168);
1768 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1769 wakaba 1.18 !!!next-input-character;
1770     redo A;
1771 wakaba 1.77 } else {
1772     !!!cp (169);
1773 wakaba 1.18 }
1774 wakaba 1.77 } else {
1775     !!!cp (170);
1776 wakaba 1.18 }
1777 wakaba 1.77 } else {
1778     !!!cp (171);
1779 wakaba 1.18 }
1780 wakaba 1.77 } else {
1781     !!!cp (172);
1782 wakaba 1.18 }
1783 wakaba 1.77 } else {
1784     !!!cp (173);
1785 wakaba 1.18 }
1786    
1787     #
1788 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
1789     $self->{next_char} == 0x0073) { # s
1790 wakaba 1.18 !!!next-input-character;
1791 wakaba 1.76 if ($self->{next_char} == 0x0059 or # Y
1792     $self->{next_char} == 0x0079) { # y
1793 wakaba 1.18 !!!next-input-character;
1794 wakaba 1.76 if ($self->{next_char} == 0x0053 or # S
1795     $self->{next_char} == 0x0073) { # s
1796 wakaba 1.18 !!!next-input-character;
1797 wakaba 1.76 if ($self->{next_char} == 0x0054 or # T
1798     $self->{next_char} == 0x0074) { # t
1799 wakaba 1.18 !!!next-input-character;
1800 wakaba 1.76 if ($self->{next_char} == 0x0045 or # E
1801     $self->{next_char} == 0x0065) { # e
1802 wakaba 1.18 !!!next-input-character;
1803 wakaba 1.76 if ($self->{next_char} == 0x004D or # M
1804     $self->{next_char} == 0x006D) { # m
1805 wakaba 1.77 !!!cp (174);
1806 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
1807 wakaba 1.18 !!!next-input-character;
1808     redo A;
1809 wakaba 1.77 } else {
1810     !!!cp (175);
1811 wakaba 1.18 }
1812 wakaba 1.77 } else {
1813     !!!cp (176);
1814 wakaba 1.18 }
1815 wakaba 1.77 } else {
1816     !!!cp (177);
1817 wakaba 1.18 }
1818 wakaba 1.77 } else {
1819     !!!cp (178);
1820 wakaba 1.18 }
1821 wakaba 1.77 } else {
1822     !!!cp (179);
1823 wakaba 1.18 }
1824    
1825     #
1826     } else {
1827 wakaba 1.77 !!!cp (180);
1828 wakaba 1.18 !!!next-input-character;
1829     #
1830     }
1831    
1832     !!!parse-error (type => 'string after DOCTYPE name');
1833 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1834 wakaba 1.73
1835 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
1836 wakaba 1.18 # next-input-character is already done
1837     redo A;
1838 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
1839 wakaba 1.18 if ({
1840     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1841     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1842 wakaba 1.76 }->{$self->{next_char}}) {
1843 wakaba 1.77 !!!cp (181);
1844 wakaba 1.18 ## Stay in the state
1845     !!!next-input-character;
1846     redo A;
1847 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
1848 wakaba 1.77 !!!cp (182);
1849 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1850 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
1851 wakaba 1.18 !!!next-input-character;
1852     redo A;
1853 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
1854 wakaba 1.77 !!!cp (183);
1855 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1856 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
1857 wakaba 1.18 !!!next-input-character;
1858     redo A;
1859 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
1860 wakaba 1.77 !!!cp (184);
1861 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
1862    
1863 wakaba 1.57 $self->{state} = DATA_STATE;
1864 wakaba 1.18 !!!next-input-character;
1865    
1866 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1867 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1868    
1869     redo A;
1870 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1871 wakaba 1.77 !!!cp (185);
1872 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
1873    
1874 wakaba 1.57 $self->{state} = DATA_STATE;
1875 wakaba 1.18 ## reconsume
1876    
1877 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1878 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1879    
1880     redo A;
1881     } else {
1882 wakaba 1.77 !!!cp (186);
1883 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
1884 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1885 wakaba 1.73
1886 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
1887 wakaba 1.18 !!!next-input-character;
1888     redo A;
1889     }
1890 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
1891 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1892 wakaba 1.77 !!!cp (187);
1893 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1894 wakaba 1.18 !!!next-input-character;
1895     redo A;
1896 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1897 wakaba 1.77 !!!cp (188);
1898 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
1899    
1900     $self->{state} = DATA_STATE;
1901     !!!next-input-character;
1902    
1903 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1904 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
1905    
1906     redo A;
1907 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1908 wakaba 1.77 !!!cp (189);
1909 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
1910    
1911 wakaba 1.57 $self->{state} = DATA_STATE;
1912 wakaba 1.18 ## reconsume
1913    
1914 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1915 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1916    
1917     redo A;
1918     } else {
1919 wakaba 1.77 !!!cp (190);
1920 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
1921 wakaba 1.76 .= chr $self->{next_char};
1922 wakaba 1.18 ## Stay in the state
1923     !!!next-input-character;
1924     redo A;
1925     }
1926 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
1927 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1928 wakaba 1.77 !!!cp (191);
1929 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1930 wakaba 1.18 !!!next-input-character;
1931     redo A;
1932 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1933 wakaba 1.77 !!!cp (192);
1934 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
1935    
1936     $self->{state} = DATA_STATE;
1937     !!!next-input-character;
1938    
1939 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1940 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
1941    
1942     redo A;
1943 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1944 wakaba 1.77 !!!cp (193);
1945 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
1946    
1947 wakaba 1.57 $self->{state} = DATA_STATE;
1948 wakaba 1.18 ## reconsume
1949    
1950 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1951 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1952    
1953     redo A;
1954     } else {
1955 wakaba 1.77 !!!cp (194);
1956 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
1957 wakaba 1.76 .= chr $self->{next_char};
1958 wakaba 1.18 ## Stay in the state
1959     !!!next-input-character;
1960     redo A;
1961     }
1962 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
1963 wakaba 1.18 if ({
1964     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1965     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1966 wakaba 1.76 }->{$self->{next_char}}) {
1967 wakaba 1.77 !!!cp (195);
1968 wakaba 1.18 ## Stay in the state
1969     !!!next-input-character;
1970     redo A;
1971 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1972 wakaba 1.77 !!!cp (196);
1973 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1974 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
1975 wakaba 1.18 !!!next-input-character;
1976     redo A;
1977 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1978 wakaba 1.77 !!!cp (197);
1979 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1980 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
1981 wakaba 1.18 !!!next-input-character;
1982     redo A;
1983 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1984 wakaba 1.77 !!!cp (198);
1985 wakaba 1.57 $self->{state} = DATA_STATE;
1986 wakaba 1.18 !!!next-input-character;
1987    
1988     !!!emit ($self->{current_token}); # DOCTYPE
1989    
1990     redo A;
1991 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1992 wakaba 1.77 !!!cp (199);
1993 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
1994    
1995 wakaba 1.57 $self->{state} = DATA_STATE;
1996 wakaba 1.26 ## reconsume
1997 wakaba 1.18
1998 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1999 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2000    
2001     redo A;
2002     } else {
2003 wakaba 1.77 !!!cp (200);
2004 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2005 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2006 wakaba 1.73
2007 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2008 wakaba 1.18 !!!next-input-character;
2009     redo A;
2010     }
2011 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2012 wakaba 1.18 if ({
2013     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2014     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2015 wakaba 1.76 }->{$self->{next_char}}) {
2016 wakaba 1.77 !!!cp (201);
2017 wakaba 1.18 ## Stay in the state
2018     !!!next-input-character;
2019     redo A;
2020 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2021 wakaba 1.77 !!!cp (202);
2022 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2023 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2024 wakaba 1.18 !!!next-input-character;
2025     redo A;
2026 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2027 wakaba 1.77 !!!cp (203);
2028 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2029 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2030 wakaba 1.18 !!!next-input-character;
2031     redo A;
2032 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2033 wakaba 1.77 !!!cp (204);
2034 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2035 wakaba 1.57 $self->{state} = DATA_STATE;
2036 wakaba 1.18 !!!next-input-character;
2037    
2038 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2039 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2040    
2041     redo A;
2042 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2043 wakaba 1.77 !!!cp (205);
2044 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2045    
2046 wakaba 1.57 $self->{state} = DATA_STATE;
2047 wakaba 1.26 ## reconsume
2048 wakaba 1.18
2049 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2050 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2051    
2052     redo A;
2053     } else {
2054 wakaba 1.77 !!!cp (206);
2055 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2056 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2057 wakaba 1.73
2058 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2059 wakaba 1.18 !!!next-input-character;
2060     redo A;
2061     }
2062 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2063 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2064 wakaba 1.77 !!!cp (207);
2065 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2066 wakaba 1.18 !!!next-input-character;
2067     redo A;
2068 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2069 wakaba 1.77 !!!cp (208);
2070 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2071    
2072     $self->{state} = DATA_STATE;
2073     !!!next-input-character;
2074    
2075 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2076 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2077    
2078     redo A;
2079 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2080 wakaba 1.77 !!!cp (209);
2081 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2082    
2083 wakaba 1.57 $self->{state} = DATA_STATE;
2084 wakaba 1.18 ## reconsume
2085    
2086 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2087 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2088    
2089     redo A;
2090     } else {
2091 wakaba 1.77 !!!cp (210);
2092 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2093 wakaba 1.76 .= chr $self->{next_char};
2094 wakaba 1.18 ## Stay in the state
2095     !!!next-input-character;
2096     redo A;
2097     }
2098 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2099 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2100 wakaba 1.77 !!!cp (211);
2101 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2102 wakaba 1.18 !!!next-input-character;
2103     redo A;
2104 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2105 wakaba 1.77 !!!cp (212);
2106 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2107    
2108     $self->{state} = DATA_STATE;
2109     !!!next-input-character;
2110    
2111 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2112 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2113    
2114     redo A;
2115 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2116 wakaba 1.77 !!!cp (213);
2117 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2118    
2119 wakaba 1.57 $self->{state} = DATA_STATE;
2120 wakaba 1.18 ## reconsume
2121    
2122 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2123 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2124    
2125     redo A;
2126     } else {
2127 wakaba 1.77 !!!cp (214);
2128 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2129 wakaba 1.76 .= chr $self->{next_char};
2130 wakaba 1.18 ## Stay in the state
2131     !!!next-input-character;
2132     redo A;
2133     }
2134 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2135 wakaba 1.18 if ({
2136     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2137     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2138 wakaba 1.76 }->{$self->{next_char}}) {
2139 wakaba 1.77 !!!cp (215);
2140 wakaba 1.18 ## Stay in the state
2141     !!!next-input-character;
2142     redo A;
2143 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2144 wakaba 1.77 !!!cp (216);
2145 wakaba 1.57 $self->{state} = DATA_STATE;
2146 wakaba 1.18 !!!next-input-character;
2147    
2148     !!!emit ($self->{current_token}); # DOCTYPE
2149    
2150     redo A;
2151 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2152 wakaba 1.77 !!!cp (217);
2153 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2154    
2155 wakaba 1.57 $self->{state} = DATA_STATE;
2156 wakaba 1.26 ## reconsume
2157 wakaba 1.18
2158 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2159 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2160    
2161     redo A;
2162     } else {
2163 wakaba 1.77 !!!cp (218);
2164 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2165 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2166 wakaba 1.73
2167 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2168 wakaba 1.1 !!!next-input-character;
2169     redo A;
2170     }
2171 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2172 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2173 wakaba 1.77 !!!cp (219);
2174 wakaba 1.57 $self->{state} = DATA_STATE;
2175 wakaba 1.1 !!!next-input-character;
2176    
2177     !!!emit ($self->{current_token}); # DOCTYPE
2178    
2179     redo A;
2180 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2181 wakaba 1.77 !!!cp (220);
2182 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2183 wakaba 1.57 $self->{state} = DATA_STATE;
2184 wakaba 1.1 ## reconsume
2185    
2186     !!!emit ($self->{current_token}); # DOCTYPE
2187    
2188     redo A;
2189     } else {
2190 wakaba 1.77 !!!cp (221);
2191 wakaba 1.1 ## Stay in the state
2192     !!!next-input-character;
2193     redo A;
2194     }
2195     } else {
2196     die "$0: $self->{state}: Unknown state";
2197     }
2198     } # A
2199    
2200     die "$0: _get_next_token: unexpected case";
2201     } # _get_next_token
2202    
2203 wakaba 1.72 sub _tokenize_attempt_to_consume_an_entity ($$$) {
2204     my ($self, $in_attr, $additional) = @_;
2205 wakaba 1.20
2206     if ({
2207     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2208     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2209 wakaba 1.72 $additional => 1,
2210 wakaba 1.76 }->{$self->{next_char}}) {
2211 wakaba 1.78 !!!cp (1001);
2212 wakaba 1.20 ## Don't consume
2213     ## No error
2214     return undef;
2215 wakaba 1.76 } elsif ($self->{next_char} == 0x0023) { # #
2216 wakaba 1.1 !!!next-input-character;
2217 wakaba 1.76 if ($self->{next_char} == 0x0078 or # x
2218     $self->{next_char} == 0x0058) { # X
2219 wakaba 1.26 my $code;
2220 wakaba 1.1 X: {
2221 wakaba 1.76 my $x_char = $self->{next_char};
2222 wakaba 1.1 !!!next-input-character;
2223 wakaba 1.76 if (0x0030 <= $self->{next_char} and
2224     $self->{next_char} <= 0x0039) { # 0..9
2225 wakaba 1.78 !!!cp (1002);
2226 wakaba 1.26 $code ||= 0;
2227     $code *= 0x10;
2228 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2229 wakaba 1.1 redo X;
2230 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
2231     $self->{next_char} <= 0x0066) { # a..f
2232 wakaba 1.78 !!!cp (1003);
2233 wakaba 1.26 $code ||= 0;
2234     $code *= 0x10;
2235 wakaba 1.76 $code += $self->{next_char} - 0x0060 + 9;
2236 wakaba 1.1 redo X;
2237 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
2238     $self->{next_char} <= 0x0046) { # A..F
2239 wakaba 1.78 !!!cp (1004);
2240 wakaba 1.26 $code ||= 0;
2241     $code *= 0x10;
2242 wakaba 1.76 $code += $self->{next_char} - 0x0040 + 9;
2243 wakaba 1.1 redo X;
2244 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2245 wakaba 1.78 !!!cp (1005);
2246 wakaba 1.3 !!!parse-error (type => 'bare hcro');
2247 wakaba 1.76 !!!back-next-input-character ($x_char, $self->{next_char});
2248     $self->{next_char} = 0x0023; # #
2249 wakaba 1.1 return undef;
2250 wakaba 1.76 } elsif ($self->{next_char} == 0x003B) { # ;
2251 wakaba 1.78 !!!cp (1006);
2252 wakaba 1.1 !!!next-input-character;
2253     } else {
2254 wakaba 1.78 !!!cp (1007);
2255 wakaba 1.3 !!!parse-error (type => 'no refc');
2256 wakaba 1.1 }
2257    
2258 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2259 wakaba 1.78 !!!cp (1008);
2260 wakaba 1.26 !!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code);
2261     $code = 0xFFFD;
2262     } elsif ($code > 0x10FFFF) {
2263 wakaba 1.78 !!!cp (1009);
2264 wakaba 1.26 !!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code);
2265     $code = 0xFFFD;
2266     } elsif ($code == 0x000D) {
2267 wakaba 1.78 !!!cp (1010);
2268 wakaba 1.26 !!!parse-error (type => 'CR character reference');
2269     $code = 0x000A;
2270     } elsif (0x80 <= $code and $code <= 0x9F) {
2271 wakaba 1.78 !!!cp (1011);
2272 wakaba 1.30 !!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code);
2273 wakaba 1.26 $code = $c1_entity_char->{$code};
2274 wakaba 1.1 }
2275    
2276 wakaba 1.66 return {type => CHARACTER_TOKEN, data => chr $code,
2277     has_reference => 1};
2278 wakaba 1.1 } # X
2279 wakaba 1.76 } elsif (0x0030 <= $self->{next_char} and
2280     $self->{next_char} <= 0x0039) { # 0..9
2281     my $code = $self->{next_char} - 0x0030;
2282 wakaba 1.1 !!!next-input-character;
2283    
2284 wakaba 1.76 while (0x0030 <= $self->{next_char} and
2285     $self->{next_char} <= 0x0039) { # 0..9
2286 wakaba 1.78 !!!cp (1012);
2287 wakaba 1.1 $code *= 10;
2288 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2289 wakaba 1.1
2290     !!!next-input-character;
2291     }
2292    
2293 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2294 wakaba 1.78 !!!cp (1013);
2295 wakaba 1.1 !!!next-input-character;
2296     } else {
2297 wakaba 1.78 !!!cp (1014);
2298 wakaba 1.3 !!!parse-error (type => 'no refc');
2299 wakaba 1.1 }
2300    
2301 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2302 wakaba 1.78 !!!cp (1015);
2303 wakaba 1.26 !!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code);
2304     $code = 0xFFFD;
2305     } elsif ($code > 0x10FFFF) {
2306 wakaba 1.78 !!!cp (1016);
2307 wakaba 1.26 !!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code);
2308     $code = 0xFFFD;
2309     } elsif ($code == 0x000D) {
2310 wakaba 1.78 !!!cp (1017);
2311 wakaba 1.26 !!!parse-error (type => 'CR character reference');
2312     $code = 0x000A;
2313 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2314 wakaba 1.78 !!!cp (1018);
2315 wakaba 1.30 !!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code);
2316 wakaba 1.4 $code = $c1_entity_char->{$code};
2317 wakaba 1.1 }
2318    
2319 wakaba 1.66 return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1};
2320 wakaba 1.1 } else {
2321 wakaba 1.78 !!!cp (1019);
2322 wakaba 1.3 !!!parse-error (type => 'bare nero');
2323 wakaba 1.76 !!!back-next-input-character ($self->{next_char});
2324     $self->{next_char} = 0x0023; # #
2325 wakaba 1.1 return undef;
2326     }
2327 wakaba 1.76 } elsif ((0x0041 <= $self->{next_char} and
2328     $self->{next_char} <= 0x005A) or
2329     (0x0061 <= $self->{next_char} and
2330     $self->{next_char} <= 0x007A)) {
2331     my $entity_name = chr $self->{next_char};
2332 wakaba 1.1 !!!next-input-character;
2333    
2334     my $value = $entity_name;
2335 wakaba 1.37 my $match = 0;
2336 wakaba 1.16 require Whatpm::_NamedEntityList;
2337     our $EntityChar;
2338 wakaba 1.1
2339     while (length $entity_name < 10 and
2340     ## NOTE: Some number greater than the maximum length of entity name
2341 wakaba 1.76 ((0x0041 <= $self->{next_char} and # a
2342     $self->{next_char} <= 0x005A) or # x
2343     (0x0061 <= $self->{next_char} and # a
2344     $self->{next_char} <= 0x007A) or # z
2345     (0x0030 <= $self->{next_char} and # 0
2346     $self->{next_char} <= 0x0039) or # 9
2347     $self->{next_char} == 0x003B)) { # ;
2348     $entity_name .= chr $self->{next_char};
2349 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2350 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2351 wakaba 1.78 !!!cp (1020);
2352 wakaba 1.26 $value = $EntityChar->{$entity_name};
2353 wakaba 1.16 $match = 1;
2354     !!!next-input-character;
2355     last;
2356 wakaba 1.37 } else {
2357 wakaba 1.78 !!!cp (1021);
2358 wakaba 1.26 $value = $EntityChar->{$entity_name};
2359     $match = -1;
2360 wakaba 1.37 !!!next-input-character;
2361 wakaba 1.16 }
2362 wakaba 1.1 } else {
2363 wakaba 1.78 !!!cp (1022);
2364 wakaba 1.76 $value .= chr $self->{next_char};
2365 wakaba 1.37 $match *= 2;
2366     !!!next-input-character;
2367 wakaba 1.1 }
2368     }
2369    
2370 wakaba 1.16 if ($match > 0) {
2371 wakaba 1.78 !!!cp (1023);
2372 wakaba 1.66 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1};
2373 wakaba 1.16 } elsif ($match < 0) {
2374 wakaba 1.30 !!!parse-error (type => 'no refc');
2375 wakaba 1.37 if ($in_attr and $match < -1) {
2376 wakaba 1.78 !!!cp (1024);
2377 wakaba 1.55 return {type => CHARACTER_TOKEN, data => '&'.$entity_name};
2378 wakaba 1.37 } else {
2379 wakaba 1.78 !!!cp (1025);
2380 wakaba 1.66 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1};
2381 wakaba 1.37 }
2382 wakaba 1.1 } else {
2383 wakaba 1.78 !!!cp (1026);
2384 wakaba 1.3 !!!parse-error (type => 'bare ero');
2385 wakaba 1.66 ## NOTE: "No characters are consumed" in the spec.
2386 wakaba 1.55 return {type => CHARACTER_TOKEN, data => '&'.$value};
2387 wakaba 1.1 }
2388     } else {
2389 wakaba 1.78 !!!cp (1027);
2390 wakaba 1.1 ## no characters are consumed
2391 wakaba 1.3 !!!parse-error (type => 'bare ero');
2392 wakaba 1.1 return undef;
2393     }
2394     } # _tokenize_attempt_to_consume_an_entity
2395    
2396     sub _initialize_tree_constructor ($) {
2397     my $self = shift;
2398     ## NOTE: $self->{document} MUST be specified before this method is called
2399     $self->{document}->strict_error_checking (0);
2400     ## TODO: Turn mutation events off # MUST
2401     ## TODO: Turn loose Document option (manakai extension) on
2402 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2403 wakaba 1.1 } # _initialize_tree_constructor
2404    
2405     sub _terminate_tree_constructor ($) {
2406     my $self = shift;
2407     $self->{document}->strict_error_checking (1);
2408     ## TODO: Turn mutation events on
2409     } # _terminate_tree_constructor
2410    
2411     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2412    
2413 wakaba 1.3 { # tree construction stage
2414     my $token;
2415    
2416 wakaba 1.1 sub _construct_tree ($) {
2417     my ($self) = @_;
2418    
2419     ## When an interactive UA render the $self->{document} available
2420     ## to the user, or when it begin accepting user input, are
2421     ## not defined.
2422    
2423     ## Append a character: collect it and all subsequent consecutive
2424     ## characters and insert one Text node whose data is concatenation
2425     ## of all those characters. # MUST
2426    
2427     !!!next-token;
2428    
2429 wakaba 1.3 undef $self->{form_element};
2430     undef $self->{head_element};
2431     $self->{open_elements} = [];
2432     undef $self->{inner_html_node};
2433    
2434 wakaba 1.84 ## NOTE: The "initial" insertion mode.
2435 wakaba 1.3 $self->_tree_construction_initial; # MUST
2436 wakaba 1.84
2437     ## NOTE: The "before html" insertion mode.
2438 wakaba 1.3 $self->_tree_construction_root_element;
2439 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
2440    
2441     ## NOTE: The "before head" insertion mode and so on.
2442 wakaba 1.3 $self->_tree_construction_main;
2443     } # _construct_tree
2444    
2445     sub _tree_construction_initial ($) {
2446     my $self = shift;
2447 wakaba 1.84
2448     ## NOTE: "initial" insertion mode
2449    
2450 wakaba 1.18 INITIAL: {
2451 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2452 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2453     ## error, switch to a conformance checking mode for another
2454     ## language.
2455     my $doctype_name = $token->{name};
2456     $doctype_name = '' unless defined $doctype_name;
2457     $doctype_name =~ tr/a-z/A-Z/;
2458     if (not defined $token->{name} or # <!DOCTYPE>
2459     defined $token->{public_identifier} or
2460     defined $token->{system_identifier}) {
2461 wakaba 1.79 !!!cp ('t1');
2462 wakaba 1.18 !!!parse-error (type => 'not HTML5');
2463     } elsif ($doctype_name ne 'HTML') {
2464 wakaba 1.79 !!!cp ('t2');
2465 wakaba 1.18 ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2466     !!!parse-error (type => 'not HTML5');
2467 wakaba 1.79 } else {
2468     !!!cp ('t3');
2469 wakaba 1.18 }
2470    
2471     my $doctype = $self->{document}->create_document_type_definition
2472     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2473     $doctype->public_id ($token->{public_identifier})
2474     if defined $token->{public_identifier};
2475     $doctype->system_id ($token->{system_identifier})
2476     if defined $token->{system_identifier};
2477     ## NOTE: Other DocumentType attributes are null or empty lists.
2478     ## ISSUE: internalSubset = null??
2479     $self->{document}->append_child ($doctype);
2480    
2481 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
2482 wakaba 1.79 !!!cp ('t4');
2483 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2484     } elsif (defined $token->{public_identifier}) {
2485     my $pubid = $token->{public_identifier};
2486     $pubid =~ tr/a-z/A-z/;
2487     if ({
2488     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2489     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2490     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2491     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2492     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2493     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2494     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2495     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2496     "-//IETF//DTD HTML 2.0//EN" => 1,
2497     "-//IETF//DTD HTML 2.1E//EN" => 1,
2498     "-//IETF//DTD HTML 3.0//EN" => 1,
2499     "-//IETF//DTD HTML 3.0//EN//" => 1,
2500     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2501     "-//IETF//DTD HTML 3.2//EN" => 1,
2502     "-//IETF//DTD HTML 3//EN" => 1,
2503     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2504     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2505     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2506     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2507     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2508     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2509     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2510     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2511     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2512     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2513     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2514     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2515     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2516     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2517     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2518     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2519     "-//IETF//DTD HTML STRICT//EN" => 1,
2520     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2521     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2522     "-//IETF//DTD HTML//EN" => 1,
2523     "-//IETF//DTD HTML//EN//2.0" => 1,
2524     "-//IETF//DTD HTML//EN//3.0" => 1,
2525     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2526     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2527     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2528     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2529     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2530     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2531     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2532     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2533     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2534     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2535     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2536 wakaba 1.72 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//EN" => 1,
2537     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//EN" => 1,
2538     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//EN" => 1,
2539 wakaba 1.18 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2540     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2541     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2542     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2543     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2544     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2545     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2546     "-//W3C//DTD HTML 3.2//EN" => 1,
2547     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2548     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2549     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2550     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2551     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2552     "-//W3C//DTD W3 HTML//EN" => 1,
2553     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2554     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2555     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2556     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2557     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2558     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2559     "HTML" => 1,
2560     }->{$pubid}) {
2561 wakaba 1.79 !!!cp ('t5');
2562 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2563     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2564     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2565     if (defined $token->{system_identifier}) {
2566 wakaba 1.79 !!!cp ('t6');
2567 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2568     } else {
2569 wakaba 1.79 !!!cp ('t7');
2570 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
2571 wakaba 1.3 }
2572 wakaba 1.80 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 FRAMESET//EN" or
2573     $pubid eq "-//W3C//DTD XHTML 1.0 TRANSITIONAL//EN") {
2574 wakaba 1.79 !!!cp ('t8');
2575 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
2576 wakaba 1.79 } else {
2577     !!!cp ('t9');
2578 wakaba 1.18 }
2579 wakaba 1.79 } else {
2580     !!!cp ('t10');
2581 wakaba 1.18 }
2582     if (defined $token->{system_identifier}) {
2583     my $sysid = $token->{system_identifier};
2584     $sysid =~ tr/A-Z/a-z/;
2585     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2586 wakaba 1.80 ## TODO: Check the spec: PUBLIC "(limited quirks)" "(quirks)"
2587 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2588 wakaba 1.79 !!!cp ('t11');
2589     } else {
2590     !!!cp ('t12');
2591 wakaba 1.18 }
2592 wakaba 1.79 } else {
2593     !!!cp ('t13');
2594 wakaba 1.18 }
2595    
2596 wakaba 1.84 ## Go to the "before html" insertion mode.
2597 wakaba 1.18 !!!next-token;
2598     return;
2599     } elsif ({
2600 wakaba 1.55 START_TAG_TOKEN, 1,
2601     END_TAG_TOKEN, 1,
2602     END_OF_FILE_TOKEN, 1,
2603 wakaba 1.18 }->{$token->{type}}) {
2604 wakaba 1.79 !!!cp ('t14');
2605 wakaba 1.18 !!!parse-error (type => 'no DOCTYPE');
2606     $self->{document}->manakai_compat_mode ('quirks');
2607 wakaba 1.84 ## Go to the "before html" insertion mode.
2608 wakaba 1.18 ## reprocess
2609     return;
2610 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
2611 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2612     ## Ignore the token
2613 wakaba 1.26
2614 wakaba 1.18 unless (length $token->{data}) {
2615 wakaba 1.79 !!!cp ('t15');
2616 wakaba 1.84 ## Stay in the insertion mode.
2617 wakaba 1.18 !!!next-token;
2618     redo INITIAL;
2619 wakaba 1.79 } else {
2620     !!!cp ('t16');
2621 wakaba 1.3 }
2622 wakaba 1.79 } else {
2623     !!!cp ('t17');
2624 wakaba 1.3 }
2625 wakaba 1.18
2626     !!!parse-error (type => 'no DOCTYPE');
2627     $self->{document}->manakai_compat_mode ('quirks');
2628 wakaba 1.84 ## Go to the "before html" insertion mode.
2629 wakaba 1.18 ## reprocess
2630     return;
2631 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
2632 wakaba 1.79 !!!cp ('t18');
2633 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
2634     $self->{document}->append_child ($comment);
2635    
2636 wakaba 1.84 ## Stay in the insertion mode.
2637 wakaba 1.18 !!!next-token;
2638     redo INITIAL;
2639     } else {
2640 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
2641 wakaba 1.18 }
2642     } # INITIAL
2643 wakaba 1.79
2644     die "$0: _tree_construction_initial: This should be never reached";
2645 wakaba 1.3 } # _tree_construction_initial
2646    
2647     sub _tree_construction_root_element ($) {
2648     my $self = shift;
2649 wakaba 1.84
2650     ## NOTE: "before html" insertion mode.
2651 wakaba 1.3
2652     B: {
2653 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2654 wakaba 1.79 !!!cp ('t19');
2655 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
2656     ## Ignore the token
2657 wakaba 1.84 ## Stay in the insertion mode.
2658 wakaba 1.3 !!!next-token;
2659     redo B;
2660 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
2661 wakaba 1.79 !!!cp ('t20');
2662 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
2663     $self->{document}->append_child ($comment);
2664 wakaba 1.84 ## Stay in the insertion mode.
2665 wakaba 1.3 !!!next-token;
2666     redo B;
2667 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
2668 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2669     ## Ignore the token.
2670    
2671 wakaba 1.3 unless (length $token->{data}) {
2672 wakaba 1.79 !!!cp ('t21');
2673 wakaba 1.84 ## Stay in the insertion mode.
2674 wakaba 1.3 !!!next-token;
2675     redo B;
2676 wakaba 1.79 } else {
2677     !!!cp ('t22');
2678 wakaba 1.3 }
2679 wakaba 1.79 } else {
2680     !!!cp ('t23');
2681 wakaba 1.3 }
2682 wakaba 1.61
2683     $self->{application_cache_selection}->(undef);
2684    
2685     #
2686     } elsif ($token->{type} == START_TAG_TOKEN) {
2687 wakaba 1.84 if ($token->{tag_name} eq 'html') {
2688     my $root_element;
2689     !!!create-element ($root_element, $token->{tag_name}, $token->{attributes});
2690     $self->{document}->append_child ($root_element);
2691     push @{$self->{open_elements}}, [$root_element, 'html'];
2692    
2693     if ($token->{attributes}->{manifest}) {
2694     !!!cp ('t24');
2695     $self->{application_cache_selection}
2696     ->($token->{attributes}->{manifest}->{value});
2697     ## ISSUE: No relative reference resolution?
2698     } else {
2699     !!!cp ('t25');
2700     $self->{application_cache_selection}->(undef);
2701     }
2702    
2703     !!!next-token;
2704     return; ## Go to the "before head" insertion mode.
2705 wakaba 1.61 } else {
2706 wakaba 1.84 !!!cp ('t25.1');
2707     #
2708 wakaba 1.61 }
2709 wakaba 1.3 } elsif ({
2710 wakaba 1.55 END_TAG_TOKEN, 1,
2711     END_OF_FILE_TOKEN, 1,
2712 wakaba 1.3 }->{$token->{type}}) {
2713 wakaba 1.79 !!!cp ('t26');
2714 wakaba 1.3 #
2715     } else {
2716 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
2717 wakaba 1.3 }
2718 wakaba 1.61
2719 wakaba 1.84 my $root_element; !!!create-element ($root_element, 'html');
2720     $self->{document}->append_child ($root_element);
2721     push @{$self->{open_elements}}, [$root_element, 'html'];
2722    
2723     $self->{application_cache_selection}->(undef);
2724    
2725     ## NOTE: Reprocess the token.
2726     return; ## Go to the "before head" insertion mode.
2727    
2728     ## ISSUE: There is an issue in the spec
2729 wakaba 1.3 } # B
2730 wakaba 1.79
2731     die "$0: _tree_construction_root_element: This should never be reached";
2732 wakaba 1.3 } # _tree_construction_root_element
2733    
2734     sub _reset_insertion_mode ($) {
2735     my $self = shift;
2736    
2737     ## Step 1
2738     my $last;
2739    
2740     ## Step 2
2741     my $i = -1;
2742     my $node = $self->{open_elements}->[$i];
2743    
2744     ## Step 3
2745     S3: {
2746 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
2747     $last = 1;
2748     if (defined $self->{inner_html_node}) {
2749     if ($self->{inner_html_node}->[1] eq 'td' or
2750     $self->{inner_html_node}->[1] eq 'th') {
2751 wakaba 1.79 !!!cp ('t27');
2752 wakaba 1.29 #
2753     } else {
2754 wakaba 1.79 !!!cp ('t28');
2755 wakaba 1.29 $node = $self->{inner_html_node};
2756     }
2757 wakaba 1.3 }
2758     }
2759    
2760     ## Step 4..13
2761     my $new_mode = {
2762 wakaba 1.54 select => IN_SELECT_IM,
2763 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
2764     ## insertion mode to "in select" by themselves.
2765 wakaba 1.54 td => IN_CELL_IM,
2766     th => IN_CELL_IM,
2767     tr => IN_ROW_IM,
2768     tbody => IN_TABLE_BODY_IM,
2769     thead => IN_TABLE_BODY_IM,
2770     tfoot => IN_TABLE_BODY_IM,
2771     caption => IN_CAPTION_IM,
2772     colgroup => IN_COLUMN_GROUP_IM,
2773     table => IN_TABLE_IM,
2774     head => IN_BODY_IM, # not in head!
2775     body => IN_BODY_IM,
2776     frameset => IN_FRAMESET_IM,
2777 wakaba 1.3 }->{$node->[1]};
2778     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2779    
2780     ## Step 14
2781     if ($node->[1] eq 'html') {
2782     unless (defined $self->{head_element}) {
2783 wakaba 1.79 !!!cp ('t29');
2784 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
2785 wakaba 1.3 } else {
2786 wakaba 1.81 ## ISSUE: Can this state be reached?
2787 wakaba 1.79 !!!cp ('t30');
2788 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2789 wakaba 1.3 }
2790     return;
2791 wakaba 1.79 } else {
2792     !!!cp ('t31');
2793 wakaba 1.3 }
2794    
2795     ## Step 15
2796 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
2797 wakaba 1.3
2798     ## Step 16
2799     $i--;
2800     $node = $self->{open_elements}->[$i];
2801    
2802     ## Step 17
2803     redo S3;
2804     } # S3
2805 wakaba 1.79
2806     die "$0: _reset_insertion_mode: This line should never be reached";
2807 wakaba 1.3 } # _reset_insertion_mode
2808    
2809     sub _tree_construction_main ($) {
2810     my $self = shift;
2811    
2812 wakaba 1.1 my $active_formatting_elements = [];
2813    
2814     my $reconstruct_active_formatting_elements = sub { # MUST
2815     my $insert = shift;
2816    
2817     ## Step 1
2818     return unless @$active_formatting_elements;
2819    
2820     ## Step 3
2821     my $i = -1;
2822     my $entry = $active_formatting_elements->[$i];
2823    
2824     ## Step 2
2825     return if $entry->[0] eq '#marker';
2826 wakaba 1.3 for (@{$self->{open_elements}}) {
2827 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2828 wakaba 1.79 !!!cp ('t32');
2829 wakaba 1.1 return;
2830     }
2831     }
2832    
2833     S4: {
2834     ## Step 4
2835     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2836    
2837     ## Step 5
2838     $i--;
2839     $entry = $active_formatting_elements->[$i];
2840    
2841     ## Step 6
2842     if ($entry->[0] eq '#marker') {
2843 wakaba 1.81 !!!cp ('t33_1');
2844 wakaba 1.1 #
2845     } else {
2846     my $in_open_elements;
2847 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
2848 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2849 wakaba 1.79 !!!cp ('t33');
2850 wakaba 1.1 $in_open_elements = 1;
2851     last OE;
2852     }
2853     }
2854     if ($in_open_elements) {
2855 wakaba 1.79 !!!cp ('t34');
2856 wakaba 1.1 #
2857     } else {
2858 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
2859 wakaba 1.79 !!!cp ('t35');
2860 wakaba 1.1 redo S4;
2861     }
2862     }
2863    
2864     ## Step 7
2865     $i++;
2866     $entry = $active_formatting_elements->[$i];
2867     } # S4
2868    
2869     S7: {
2870     ## Step 8
2871     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2872    
2873     ## Step 9
2874     $insert->($clone->[0]);
2875 wakaba 1.3 push @{$self->{open_elements}}, $clone;
2876 wakaba 1.1
2877     ## Step 10
2878 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2879 wakaba 1.1
2880     ## Step 11
2881     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2882 wakaba 1.79 !!!cp ('t36');
2883 wakaba 1.1 ## Step 7'
2884     $i++;
2885     $entry = $active_formatting_elements->[$i];
2886    
2887     redo S7;
2888     }
2889 wakaba 1.79
2890     !!!cp ('t37');
2891 wakaba 1.1 } # S7
2892     }; # $reconstruct_active_formatting_elements
2893    
2894     my $clear_up_to_marker = sub {
2895     for (reverse 0..$#$active_formatting_elements) {
2896     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2897 wakaba 1.79 !!!cp ('t38');
2898 wakaba 1.1 splice @$active_formatting_elements, $_;
2899     return;
2900     }
2901     }
2902 wakaba 1.79
2903     !!!cp ('t39');
2904 wakaba 1.1 }; # $clear_up_to_marker
2905    
2906 wakaba 1.96 my $insert;
2907    
2908     my $parse_rcdata = sub ($) {
2909     my ($content_model_flag) = @_;
2910 wakaba 1.25
2911     ## Step 1
2912     my $start_tag_name = $token->{tag_name};
2913     my $el;
2914     !!!create-element ($el, $start_tag_name, $token->{attributes});
2915    
2916     ## Step 2
2917 wakaba 1.96 $insert->($el);
2918 wakaba 1.25
2919     ## Step 3
2920 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
2921 wakaba 1.13 delete $self->{escape}; # MUST
2922 wakaba 1.25
2923     ## Step 4
2924 wakaba 1.1 my $text = '';
2925     !!!next-token;
2926 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
2927 wakaba 1.79 !!!cp ('t40');
2928 wakaba 1.1 $text .= $token->{data};
2929     !!!next-token;
2930 wakaba 1.25 }
2931    
2932     ## Step 5
2933 wakaba 1.1 if (length $text) {
2934 wakaba 1.79 !!!cp ('t41');
2935 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
2936     $el->append_child ($text);
2937 wakaba 1.1 }
2938 wakaba 1.25
2939     ## Step 6
2940 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
2941 wakaba 1.25
2942     ## Step 7
2943 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
2944     $token->{tag_name} eq $start_tag_name) {
2945     !!!cp ('t42');
2946 wakaba 1.1 ## Ignore the token
2947     } else {
2948 wakaba 1.96 ## NOTE: An end-of-file token.
2949     if ($content_model_flag == CDATA_CONTENT_MODEL) {
2950     !!!cp ('t43');
2951     !!!parse-error (type => 'in CDATA:#'.$token->{type});
2952     } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
2953     !!!cp ('t44');
2954     !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2955     } else {
2956     die "$0: $content_model_flag in parse_rcdata";
2957     }
2958 wakaba 1.1 }
2959     !!!next-token;
2960 wakaba 1.25 }; # $parse_rcdata
2961 wakaba 1.1
2962 wakaba 1.96 my $script_start_tag = sub () {
2963 wakaba 1.1 my $script_el;
2964     !!!create-element ($script_el, 'script', $token->{attributes});
2965     ## TODO: mark as "parser-inserted"
2966    
2967 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
2968 wakaba 1.13 delete $self->{escape}; # MUST
2969 wakaba 1.1
2970     my $text = '';
2971     !!!next-token;
2972 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
2973 wakaba 1.79 !!!cp ('t45');
2974 wakaba 1.1 $text .= $token->{data};
2975     !!!next-token;
2976     } # stop if non-character token or tokenizer stops tokenising
2977     if (length $text) {
2978 wakaba 1.79 !!!cp ('t46');
2979 wakaba 1.1 $script_el->manakai_append_text ($text);
2980     }
2981    
2982 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
2983 wakaba 1.1
2984 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
2985 wakaba 1.1 $token->{tag_name} eq 'script') {
2986 wakaba 1.79 !!!cp ('t47');
2987 wakaba 1.1 ## Ignore the token
2988     } else {
2989 wakaba 1.79 !!!cp ('t48');
2990 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2991 wakaba 1.1 ## ISSUE: And ignore?
2992     ## TODO: mark as "already executed"
2993     }
2994    
2995 wakaba 1.3 if (defined $self->{inner_html_node}) {
2996 wakaba 1.79 !!!cp ('t49');
2997 wakaba 1.3 ## TODO: mark as "already executed"
2998     } else {
2999 wakaba 1.79 !!!cp ('t50');
3000 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3001     ## TODO: insertion point = just before the next input character
3002 wakaba 1.25
3003     $insert->($script_el);
3004 wakaba 1.1
3005     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3006    
3007     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3008     }
3009    
3010     !!!next-token;
3011     }; # $script_start_tag
3012    
3013     my $formatting_end_tag = sub {
3014     my $tag_name = shift;
3015    
3016     FET: {
3017     ## Step 1
3018     my $formatting_element;
3019     my $formatting_element_i_in_active;
3020     AFE: for (reverse 0..$#$active_formatting_elements) {
3021     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
3022 wakaba 1.79 !!!cp ('t51');
3023 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
3024     $formatting_element_i_in_active = $_;
3025     last AFE;
3026     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
3027 wakaba 1.79 !!!cp ('t52');
3028 wakaba 1.1 last AFE;
3029     }
3030     } # AFE
3031     unless (defined $formatting_element) {
3032 wakaba 1.79 !!!cp ('t53');
3033 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
3034 wakaba 1.1 ## Ignore the token
3035     !!!next-token;
3036     return;
3037     }
3038     ## has an element in scope
3039     my $in_scope = 1;
3040     my $formatting_element_i_in_open;
3041 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3042     my $node = $self->{open_elements}->[$_];
3043 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3044     if ($in_scope) {
3045 wakaba 1.79 !!!cp ('t54');
3046 wakaba 1.1 $formatting_element_i_in_open = $_;
3047     last INSCOPE;
3048     } else { # in open elements but not in scope
3049 wakaba 1.79 !!!cp ('t55');
3050 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3051 wakaba 1.1 ## Ignore the token
3052     !!!next-token;
3053     return;
3054     }
3055     } elsif ({
3056     table => 1, caption => 1, td => 1, th => 1,
3057     button => 1, marquee => 1, object => 1, html => 1,
3058     }->{$node->[1]}) {
3059 wakaba 1.79 !!!cp ('t56');
3060 wakaba 1.1 $in_scope = 0;
3061     }
3062     } # INSCOPE
3063     unless (defined $formatting_element_i_in_open) {
3064 wakaba 1.79 !!!cp ('t57');
3065 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3066 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3067     !!!next-token; ## TODO: ok?
3068     return;
3069     }
3070 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3071 wakaba 1.79 !!!cp ('t58');
3072 wakaba 1.4 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3073 wakaba 1.1 }
3074    
3075     ## Step 2
3076     my $furthest_block;
3077     my $furthest_block_i_in_open;
3078 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3079     my $node = $self->{open_elements}->[$_];
3080 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
3081     #not $phrasing_category->{$node->[1]} and
3082     ($special_category->{$node->[1]} or
3083     $scoping_category->{$node->[1]})) {
3084 wakaba 1.79 !!!cp ('t59');
3085 wakaba 1.1 $furthest_block = $node;
3086     $furthest_block_i_in_open = $_;
3087     } elsif ($node->[0] eq $formatting_element->[0]) {
3088 wakaba 1.79 !!!cp ('t60');
3089 wakaba 1.1 last OE;
3090     }
3091     } # OE
3092    
3093     ## Step 3
3094     unless (defined $furthest_block) { # MUST
3095 wakaba 1.79 !!!cp ('t61');
3096 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3097 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3098     !!!next-token;
3099     return;
3100     }
3101    
3102     ## Step 4
3103 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3104 wakaba 1.1
3105     ## Step 5
3106     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3107     if (defined $furthest_block_parent) {
3108 wakaba 1.79 !!!cp ('t62');
3109 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
3110     }
3111    
3112     ## Step 6
3113     my $bookmark_prev_el
3114     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3115     ->[0];
3116    
3117     ## Step 7
3118     my $node = $furthest_block;
3119     my $node_i_in_open = $furthest_block_i_in_open;
3120     my $last_node = $furthest_block;
3121     S7: {
3122     ## Step 1
3123     $node_i_in_open--;
3124 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3125 wakaba 1.1
3126     ## Step 2
3127     my $node_i_in_active;
3128     S7S2: {
3129     for (reverse 0..$#$active_formatting_elements) {
3130     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3131 wakaba 1.79 !!!cp ('t63');
3132 wakaba 1.1 $node_i_in_active = $_;
3133     last S7S2;
3134     }
3135     }
3136 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3137 wakaba 1.1 redo S7;
3138     } # S7S2
3139    
3140     ## Step 3
3141     last S7 if $node->[0] eq $formatting_element->[0];
3142    
3143     ## Step 4
3144     if ($last_node->[0] eq $furthest_block->[0]) {
3145 wakaba 1.79 !!!cp ('t64');
3146 wakaba 1.1 $bookmark_prev_el = $node->[0];
3147     }
3148    
3149     ## Step 5
3150     if ($node->[0]->has_child_nodes ()) {
3151 wakaba 1.79 !!!cp ('t65');
3152 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
3153     $active_formatting_elements->[$node_i_in_active] = $clone;
3154 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3155 wakaba 1.1 $node = $clone;
3156     }
3157    
3158     ## Step 6
3159     $node->[0]->append_child ($last_node->[0]);
3160    
3161     ## Step 7
3162     $last_node = $node;
3163    
3164     ## Step 8
3165     redo S7;
3166     } # S7
3167    
3168     ## Step 8
3169     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3170    
3171     ## Step 9
3172     my $clone = [$formatting_element->[0]->clone_node (0),
3173     $formatting_element->[1]];
3174    
3175     ## Step 10
3176     my @cn = @{$furthest_block->[0]->child_nodes};
3177     $clone->[0]->append_child ($_) for @cn;
3178    
3179     ## Step 11
3180     $furthest_block->[0]->append_child ($clone->[0]);
3181    
3182     ## Step 12
3183     my $i;
3184     AFE: for (reverse 0..$#$active_formatting_elements) {
3185     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3186 wakaba 1.79 !!!cp ('t66');
3187 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
3188     $i-- and last AFE if defined $i;
3189     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3190 wakaba 1.79 !!!cp ('t67');
3191 wakaba 1.1 $i = $_;
3192     }
3193     } # AFE
3194     splice @$active_formatting_elements, $i + 1, 0, $clone;
3195    
3196     ## Step 13
3197     undef $i;
3198 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3199     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3200 wakaba 1.79 !!!cp ('t68');
3201 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
3202 wakaba 1.1 $i-- and last OE if defined $i;
3203 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3204 wakaba 1.79 !!!cp ('t69');
3205 wakaba 1.1 $i = $_;
3206     }
3207     } # OE
3208 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3209 wakaba 1.1
3210     ## Step 14
3211     redo FET;
3212     } # FET
3213     }; # $formatting_end_tag
3214    
3215 wakaba 1.95 ## NOTE: $open_tables->[-1]->[0] is the "current table".
3216     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
3217     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
3218    
3219 wakaba 1.96 $insert = my $insert_to_current = sub {
3220 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3221 wakaba 1.1 }; # $insert_to_current
3222    
3223     my $insert_to_foster = sub {
3224 wakaba 1.95 my $child = shift;
3225     if ({
3226     table => 1, tbody => 1, tfoot => 1, thead => 1, tr => 1,
3227     }->{$self->{open_elements}->[-1]->[1]}) {
3228     # MUST
3229     my $foster_parent_element;
3230     my $next_sibling;
3231 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3232     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3233     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3234 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3235 wakaba 1.79 !!!cp ('t70');
3236 wakaba 1.1 $foster_parent_element = $parent;
3237 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3238 wakaba 1.1 } else {
3239 wakaba 1.79 !!!cp ('t71');
3240 wakaba 1.1 $foster_parent_element
3241 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3242 wakaba 1.1 }
3243     last OE;
3244     }
3245     } # OE
3246 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3247 wakaba 1.1 unless defined $foster_parent_element;
3248     $foster_parent_element->insert_before
3249     ($child, $next_sibling);
3250 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
3251     } else {
3252     !!!cp ('t72');
3253     $self->{open_elements}->[-1]->[0]->append_child ($child);
3254     }
3255 wakaba 1.1 }; # $insert_to_foster
3256    
3257 wakaba 1.52 B: {
3258 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3259 wakaba 1.79 !!!cp ('t73');
3260 wakaba 1.52 !!!parse-error (type => 'DOCTYPE in the middle');
3261     ## Ignore the token
3262     ## Stay in the phase
3263     !!!next-token;
3264     redo B;
3265 wakaba 1.55 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3266 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
3267 wakaba 1.79 !!!cp ('t74');
3268 wakaba 1.52 #
3269     } else {
3270     ## Generate implied end tags
3271 wakaba 1.86 while ({
3272     dd => 1, dt => 1, li => 1, p => 1,
3273     }->{$self->{open_elements}->[-1]->[1]}) {
3274 wakaba 1.79 !!!cp ('t75');
3275 wakaba 1.86 pop @{$self->{open_elements}};
3276 wakaba 1.52 }
3277    
3278     if (@{$self->{open_elements}} > 2 or
3279     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
3280 wakaba 1.79 !!!cp ('t76');
3281 wakaba 1.52 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3282     } elsif (defined $self->{inner_html_node} and
3283     @{$self->{open_elements}} > 1 and
3284     $self->{open_elements}->[1]->[1] ne 'body') {
3285 wakaba 1.81 ## ISSUE: This case is never reached.
3286 wakaba 1.79 !!!cp ('t77');
3287 wakaba 1.52 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3288 wakaba 1.79 } else {
3289     !!!cp ('t78');
3290 wakaba 1.34 }
3291    
3292 wakaba 1.52 ## ISSUE: There is an issue in the spec.
3293     }
3294    
3295     ## Stop parsing
3296     last B;
3297 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
3298 wakaba 1.52 $token->{tag_name} eq 'html') {
3299 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
3300 wakaba 1.79 !!!cp ('t79');
3301 wakaba 1.52 !!!parse-error (type => 'after html:html');
3302 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
3303     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
3304 wakaba 1.79 !!!cp ('t80');
3305 wakaba 1.52 !!!parse-error (type => 'after html:html');
3306 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
3307 wakaba 1.79 } else {
3308     !!!cp ('t81');
3309 wakaba 1.52 }
3310    
3311 wakaba 1.84 !!!cp ('t82');
3312     !!!parse-error (type => 'not first start tag');
3313 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
3314     for my $attr_name (keys %{$token->{attributes}}) {
3315     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3316 wakaba 1.79 !!!cp ('t84');
3317 wakaba 1.52 $top_el->set_attribute_ns
3318     (undef, [undef, $attr_name],
3319     $token->{attributes}->{$attr_name}->{value});
3320     }
3321     }
3322     !!!next-token;
3323     redo B;
3324 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3325 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
3326 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
3327 wakaba 1.79 !!!cp ('t85');
3328 wakaba 1.52 $self->{document}->append_child ($comment);
3329 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
3330 wakaba 1.79 !!!cp ('t86');
3331 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
3332     } else {
3333 wakaba 1.79 !!!cp ('t87');
3334 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3335     }
3336     !!!next-token;
3337     redo B;
3338 wakaba 1.56 } elsif ($self->{insertion_mode} & HEAD_IMS) {
3339 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3340 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3341 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3342     !!!cp ('t88.2');
3343     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3344     } else {
3345     !!!cp ('t88.1');
3346     ## Ignore the token.
3347     !!!next-token;
3348     redo B;
3349     }
3350 wakaba 1.52 unless (length $token->{data}) {
3351 wakaba 1.79 !!!cp ('t88');
3352 wakaba 1.52 !!!next-token;
3353     redo B;
3354 wakaba 1.1 }
3355     }
3356 wakaba 1.52
3357 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3358 wakaba 1.79 !!!cp ('t89');
3359 wakaba 1.52 ## As if <head>
3360     !!!create-element ($self->{head_element}, 'head');
3361     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3362     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3363    
3364     ## Reprocess in the "in head" insertion mode...
3365     pop @{$self->{open_elements}};
3366    
3367     ## Reprocess in the "after head" insertion mode...
3368 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3369 wakaba 1.79 !!!cp ('t90');
3370 wakaba 1.52 ## As if </noscript>
3371     pop @{$self->{open_elements}};
3372     !!!parse-error (type => 'in noscript:#character');
3373 wakaba 1.1
3374 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
3375     ## As if </head>
3376     pop @{$self->{open_elements}};
3377    
3378     ## Reprocess in the "after head" insertion mode...
3379 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3380 wakaba 1.79 !!!cp ('t91');
3381 wakaba 1.52 pop @{$self->{open_elements}};
3382    
3383     ## Reprocess in the "after head" insertion mode...
3384 wakaba 1.79 } else {
3385     !!!cp ('t92');
3386 wakaba 1.1 }
3387 wakaba 1.52
3388     ## "after head" insertion mode
3389     ## As if <body>
3390     !!!insert-element ('body');
3391 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3392 wakaba 1.52 ## reprocess
3393     redo B;
3394 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
3395 wakaba 1.52 if ($token->{tag_name} eq 'head') {
3396 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3397 wakaba 1.79 !!!cp ('t93');
3398 wakaba 1.52 !!!create-element ($self->{head_element}, $token->{tag_name}, $token->{attributes});
3399     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3400     push @{$self->{open_elements}}, [$self->{head_element}, $token->{tag_name}];
3401 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3402 wakaba 1.52 !!!next-token;
3403     redo B;
3404 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3405 wakaba 1.79 !!!cp ('t94');
3406 wakaba 1.54 #
3407     } else {
3408 wakaba 1.79 !!!cp ('t95');
3409 wakaba 1.52 !!!parse-error (type => 'in head:head'); # or in head noscript
3410     ## Ignore the token
3411     !!!next-token;
3412     redo B;
3413     }
3414 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3415 wakaba 1.79 !!!cp ('t96');
3416 wakaba 1.52 ## As if <head>
3417     !!!create-element ($self->{head_element}, 'head');
3418     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3419     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3420    
3421 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3422 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
3423 wakaba 1.79 } else {
3424     !!!cp ('t97');
3425 wakaba 1.1 }
3426 wakaba 1.52
3427 wakaba 1.49 if ($token->{tag_name} eq 'base') {
3428 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3429 wakaba 1.79 !!!cp ('t98');
3430 wakaba 1.49 ## As if </noscript>
3431     pop @{$self->{open_elements}};
3432     !!!parse-error (type => 'in noscript:base');
3433    
3434 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3435 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3436 wakaba 1.79 } else {
3437     !!!cp ('t99');
3438 wakaba 1.49 }
3439    
3440     ## NOTE: There is a "as if in head" code clone.
3441 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3442 wakaba 1.79 !!!cp ('t100');
3443 wakaba 1.49 !!!parse-error (type => 'after head:'.$token->{tag_name});
3444     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3445 wakaba 1.79 } else {
3446     !!!cp ('t101');
3447 wakaba 1.49 }
3448     !!!insert-element ($token->{tag_name}, $token->{attributes});
3449     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3450 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3451 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3452 wakaba 1.49 !!!next-token;
3453     redo B;
3454     } elsif ($token->{tag_name} eq 'link') {
3455 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3456 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3457 wakaba 1.79 !!!cp ('t102');
3458 wakaba 1.25 !!!parse-error (type => 'after head:'.$token->{tag_name});
3459     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3460 wakaba 1.79 } else {
3461     !!!cp ('t103');
3462 wakaba 1.25 }
3463     !!!insert-element ($token->{tag_name}, $token->{attributes});
3464     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3465 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3466 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3467 wakaba 1.1 !!!next-token;
3468 wakaba 1.25 redo B;
3469 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
3470     ## NOTE: There is a "as if in head" code clone.
3471 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3472 wakaba 1.79 !!!cp ('t104');
3473 wakaba 1.34 !!!parse-error (type => 'after head:'.$token->{tag_name});
3474     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3475 wakaba 1.79 } else {
3476     !!!cp ('t105');
3477 wakaba 1.34 }
3478     !!!insert-element ($token->{tag_name}, $token->{attributes});
3479 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3480 wakaba 1.34
3481     unless ($self->{confident}) {
3482     if ($token->{attributes}->{charset}) { ## TODO: And if supported
3483 wakaba 1.79 !!!cp ('t106');
3484 wakaba 1.63 $self->{change_encoding}
3485     ->($self, $token->{attributes}->{charset}->{value});
3486 wakaba 1.66
3487     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
3488     ->set_user_data (manakai_has_reference =>
3489     $token->{attributes}->{charset}
3490     ->{has_reference});
3491 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
3492 wakaba 1.35 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
3493 wakaba 1.63 if ($token->{attributes}->{content}->{value}
3494 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
3495     [\x09-\x0D\x20]*=
3496 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
3497     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
3498 wakaba 1.79 !!!cp ('t107');
3499 wakaba 1.63 $self->{change_encoding}
3500     ->($self, defined $1 ? $1 : defined $2 ? $2 : $3);
3501 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
3502     ->set_user_data (manakai_has_reference =>
3503     $token->{attributes}->{content}
3504     ->{has_reference});
3505 wakaba 1.79 } else {
3506     !!!cp ('t108');
3507 wakaba 1.63 }
3508 wakaba 1.34 }
3509 wakaba 1.66 } else {
3510     if ($token->{attributes}->{charset}) {
3511 wakaba 1.79 !!!cp ('t109');
3512 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
3513     ->set_user_data (manakai_has_reference =>
3514     $token->{attributes}->{charset}
3515     ->{has_reference});
3516     }
3517 wakaba 1.68 if ($token->{attributes}->{content}) {
3518 wakaba 1.79 !!!cp ('t110');
3519 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
3520     ->set_user_data (manakai_has_reference =>
3521     $token->{attributes}->{content}
3522     ->{has_reference});
3523     }
3524 wakaba 1.34 }
3525    
3526 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3527 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3528 wakaba 1.34 !!!next-token;
3529     redo B;
3530 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
3531 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3532 wakaba 1.79 !!!cp ('t111');
3533 wakaba 1.49 ## As if </noscript>
3534     pop @{$self->{open_elements}};
3535     !!!parse-error (type => 'in noscript:title');
3536    
3537 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3538 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3539 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3540 wakaba 1.79 !!!cp ('t112');
3541 wakaba 1.25 !!!parse-error (type => 'after head:'.$token->{tag_name});
3542     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3543 wakaba 1.79 } else {
3544     !!!cp ('t113');
3545 wakaba 1.25 }
3546 wakaba 1.49
3547     ## NOTE: There is a "as if in head" code clone.
3548 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
3549     : $self->{open_elements}->[-1]->[0];
3550 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
3551 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3552 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3553 wakaba 1.25 redo B;
3554     } elsif ($token->{tag_name} eq 'style') {
3555     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
3556 wakaba 1.54 ## insertion mode IN_HEAD_IM)
3557 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3558 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3559 wakaba 1.79 !!!cp ('t114');
3560 wakaba 1.25 !!!parse-error (type => 'after head:'.$token->{tag_name});
3561     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3562 wakaba 1.79 } else {
3563     !!!cp ('t115');
3564 wakaba 1.25 }
3565 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
3566 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3567 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3568 wakaba 1.25 redo B;
3569     } elsif ($token->{tag_name} eq 'noscript') {
3570 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
3571 wakaba 1.79 !!!cp ('t116');
3572 wakaba 1.25 ## NOTE: and scripting is disalbed
3573     !!!insert-element ($token->{tag_name}, $token->{attributes});
3574 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
3575 wakaba 1.1 !!!next-token;
3576 wakaba 1.25 redo B;
3577 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3578 wakaba 1.79 !!!cp ('t117');
3579 wakaba 1.30 !!!parse-error (type => 'in noscript:noscript');
3580 wakaba 1.1 ## Ignore the token
3581 wakaba 1.41 !!!next-token;
3582 wakaba 1.25 redo B;
3583 wakaba 1.1 } else {
3584 wakaba 1.79 !!!cp ('t118');
3585 wakaba 1.25 #
3586 wakaba 1.1 }
3587 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
3588 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3589 wakaba 1.79 !!!cp ('t119');
3590 wakaba 1.49 ## As if </noscript>
3591     pop @{$self->{open_elements}};
3592     !!!parse-error (type => 'in noscript:script');
3593    
3594 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3595 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3596 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3597 wakaba 1.79 !!!cp ('t120');
3598 wakaba 1.25 !!!parse-error (type => 'after head:'.$token->{tag_name});
3599     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3600 wakaba 1.79 } else {
3601     !!!cp ('t121');
3602 wakaba 1.25 }
3603 wakaba 1.49
3604 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3605 wakaba 1.100 $script_start_tag->();
3606     pop @{$self->{open_elements}} # <head>
3607 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3608 wakaba 1.1 redo B;
3609 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
3610 wakaba 1.25 $token->{tag_name} eq 'frameset') {
3611 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3612 wakaba 1.79 !!!cp ('t122');
3613 wakaba 1.49 ## As if </noscript>
3614     pop @{$self->{open_elements}};
3615     !!!parse-error (type => 'in noscript:'.$token->{tag_name});
3616    
3617     ## Reprocess in the "in head" insertion mode...
3618     ## As if </head>
3619     pop @{$self->{open_elements}};
3620    
3621     ## Reprocess in the "after head" insertion mode...
3622 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3623 wakaba 1.79 !!!cp ('t124');
3624 wakaba 1.49 pop @{$self->{open_elements}};
3625    
3626     ## Reprocess in the "after head" insertion mode...
3627 wakaba 1.79 } else {
3628     !!!cp ('t125');
3629 wakaba 1.49 }
3630    
3631     ## "after head" insertion mode
3632     !!!insert-element ($token->{tag_name}, $token->{attributes});
3633 wakaba 1.54 if ($token->{tag_name} eq 'body') {
3634 wakaba 1.79 !!!cp ('t126');
3635 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3636     } elsif ($token->{tag_name} eq 'frameset') {
3637 wakaba 1.79 !!!cp ('t127');
3638 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
3639     } else {
3640     die "$0: tag name: $self->{tag_name}";
3641     }
3642 wakaba 1.1 !!!next-token;
3643     redo B;
3644     } else {
3645 wakaba 1.79 !!!cp ('t128');
3646 wakaba 1.1 #
3647     }
3648 wakaba 1.49
3649 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3650 wakaba 1.79 !!!cp ('t129');
3651 wakaba 1.49 ## As if </noscript>
3652     pop @{$self->{open_elements}};
3653     !!!parse-error (type => 'in noscript:/'.$token->{tag_name});
3654    
3655     ## Reprocess in the "in head" insertion mode...
3656     ## As if </head>
3657 wakaba 1.25 pop @{$self->{open_elements}};
3658 wakaba 1.49
3659     ## Reprocess in the "after head" insertion mode...
3660 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3661 wakaba 1.79 !!!cp ('t130');
3662 wakaba 1.49 ## As if </head>
3663 wakaba 1.25 pop @{$self->{open_elements}};
3664 wakaba 1.49
3665     ## Reprocess in the "after head" insertion mode...
3666 wakaba 1.79 } else {
3667     !!!cp ('t131');
3668 wakaba 1.49 }
3669    
3670     ## "after head" insertion mode
3671     ## As if <body>
3672     !!!insert-element ('body');
3673 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3674 wakaba 1.49 ## reprocess
3675     redo B;
3676 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
3677 wakaba 1.49 if ($token->{tag_name} eq 'head') {
3678 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3679 wakaba 1.79 !!!cp ('t132');
3680 wakaba 1.50 ## As if <head>
3681     !!!create-element ($self->{head_element}, 'head');
3682     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3683     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3684    
3685     ## Reprocess in the "in head" insertion mode...
3686     pop @{$self->{open_elements}};
3687 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3688 wakaba 1.50 !!!next-token;
3689     redo B;
3690 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3691 wakaba 1.79 !!!cp ('t133');
3692 wakaba 1.49 ## As if </noscript>
3693     pop @{$self->{open_elements}};
3694 wakaba 1.82 !!!parse-error (type => 'in noscript:/head');
3695 wakaba 1.49
3696     ## Reprocess in the "in head" insertion mode...
3697 wakaba 1.50 pop @{$self->{open_elements}};
3698 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3699 wakaba 1.50 !!!next-token;
3700     redo B;
3701 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3702 wakaba 1.79 !!!cp ('t134');
3703 wakaba 1.49 pop @{$self->{open_elements}};
3704 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3705 wakaba 1.49 !!!next-token;
3706     redo B;
3707     } else {
3708 wakaba 1.79 !!!cp ('t135');
3709 wakaba 1.49 #
3710     }
3711     } elsif ($token->{tag_name} eq 'noscript') {
3712 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3713 wakaba 1.79 !!!cp ('t136');
3714 wakaba 1.49 pop @{$self->{open_elements}};
3715 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3716 wakaba 1.49 !!!next-token;
3717     redo B;
3718 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3719 wakaba 1.79 !!!cp ('t137');
3720 wakaba 1.50 !!!parse-error (type => 'unmatched end tag:noscript');
3721     ## Ignore the token ## ISSUE: An issue in the spec.
3722     !!!next-token;
3723     redo B;
3724 wakaba 1.49 } else {
3725 wakaba 1.79 !!!cp ('t138');
3726 wakaba 1.49 #
3727     }
3728     } elsif ({
3729 wakaba 1.31 body => 1, html => 1,
3730     }->{$token->{tag_name}}) {
3731 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3732 wakaba 1.79 !!!cp ('t139');
3733 wakaba 1.50 ## As if <head>
3734     !!!create-element ($self->{head_element}, 'head');
3735     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3736     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3737    
3738 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3739 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
3740 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3741 wakaba 1.79 !!!cp ('t140');
3742 wakaba 1.49 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3743     ## Ignore the token
3744     !!!next-token;
3745     redo B;
3746 wakaba 1.79 } else {
3747     !!!cp ('t141');
3748 wakaba 1.49 }
3749 wakaba 1.50
3750     #
3751 wakaba 1.49 } elsif ({
3752 wakaba 1.31 p => 1, br => 1,
3753     }->{$token->{tag_name}}) {
3754 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3755 wakaba 1.79 !!!cp ('t142');
3756 wakaba 1.50 ## As if <head>
3757     !!!create-element ($self->{head_element}, 'head');
3758     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3759     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3760    
3761 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3762 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
3763 wakaba 1.79 } else {
3764     !!!cp ('t143');
3765 wakaba 1.50 }
3766    
3767 wakaba 1.1 #
3768 wakaba 1.25 } else {
3769 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3770 wakaba 1.79 !!!cp ('t144');
3771 wakaba 1.54 #
3772     } else {
3773 wakaba 1.79 !!!cp ('t145');
3774 wakaba 1.49 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3775     ## Ignore the token
3776     !!!next-token;
3777     redo B;
3778     }
3779     }
3780    
3781 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3782 wakaba 1.79 !!!cp ('t146');
3783 wakaba 1.49 ## As if </noscript>
3784     pop @{$self->{open_elements}};
3785     !!!parse-error (type => 'in noscript:/'.$token->{tag_name});
3786    
3787     ## Reprocess in the "in head" insertion mode...
3788     ## As if </head>
3789     pop @{$self->{open_elements}};
3790    
3791     ## Reprocess in the "after head" insertion mode...
3792 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3793 wakaba 1.79 !!!cp ('t147');
3794 wakaba 1.49 ## As if </head>
3795     pop @{$self->{open_elements}};
3796    
3797     ## Reprocess in the "after head" insertion mode...
3798 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3799 wakaba 1.82 ## ISSUE: This case cannot be reached?
3800 wakaba 1.79 !!!cp ('t148');
3801 wakaba 1.50 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3802     ## Ignore the token ## ISSUE: An issue in the spec.
3803     !!!next-token;
3804     redo B;
3805 wakaba 1.79 } else {
3806     !!!cp ('t149');
3807 wakaba 1.1 }
3808    
3809 wakaba 1.49 ## "after head" insertion mode
3810     ## As if <body>
3811 wakaba 1.52 !!!insert-element ('body');
3812 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3813 wakaba 1.52 ## reprocess
3814     redo B;
3815     } else {
3816     die "$0: $token->{type}: Unknown token type";
3817     }
3818    
3819     ## ISSUE: An issue in the spec.
3820 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
3821 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3822 wakaba 1.79 !!!cp ('t150');
3823 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
3824     $reconstruct_active_formatting_elements->($insert_to_current);
3825    
3826     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3827    
3828     !!!next-token;
3829     redo B;
3830 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
3831 wakaba 1.52 if ({
3832     caption => 1, col => 1, colgroup => 1, tbody => 1,
3833     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3834     }->{$token->{tag_name}}) {
3835 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
3836 wakaba 1.52 ## have an element in table scope
3837     my $tn;
3838     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3839     my $node = $self->{open_elements}->[$_];
3840     if ($node->[1] eq 'td' or $node->[1] eq 'th') {
3841 wakaba 1.79 !!!cp ('t151');
3842 wakaba 1.52 $tn = $node->[1];
3843     last INSCOPE;
3844     } elsif ({
3845     table => 1, html => 1,
3846     }->{$node->[1]}) {
3847 wakaba 1.79 !!!cp ('t152');
3848 wakaba 1.52 last INSCOPE;
3849     }
3850     } # INSCOPE
3851     unless (defined $tn) {
3852 wakaba 1.79 !!!cp ('t153');
3853 wakaba 1.82 ## TODO: This error type is wrong.
3854 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3855     ## Ignore the token
3856     !!!next-token;
3857     redo B;
3858     }
3859    
3860 wakaba 1.79 !!!cp ('t154');
3861 wakaba 1.52 ## Close the cell
3862     !!!back-token; # <?>
3863 wakaba 1.55 $token = {type => END_TAG_TOKEN, tag_name => $tn};
3864 wakaba 1.52 redo B;
3865 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
3866 wakaba 1.52 !!!parse-error (type => 'not closed:caption');
3867    
3868     ## As if </caption>
3869     ## have a table element in table scope
3870     my $i;
3871     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3872     my $node = $self->{open_elements}->[$_];
3873     if ($node->[1] eq 'caption') {
3874 wakaba 1.79 !!!cp ('t155');
3875 wakaba 1.52 $i = $_;
3876     last INSCOPE;
3877     } elsif ({
3878     table => 1, html => 1,
3879     }->{$node->[1]}) {
3880 wakaba 1.79 !!!cp ('t156');
3881 wakaba 1.52 last INSCOPE;
3882     }
3883     } # INSCOPE
3884     unless (defined $i) {
3885 wakaba 1.79 !!!cp ('t157');
3886 wakaba 1.83 ## TODO: this type is wrong.
3887 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:caption');
3888     ## Ignore the token
3889     !!!next-token;
3890     redo B;
3891     }
3892    
3893     ## generate implied end tags
3894 wakaba 1.86 while ({
3895     dd => 1, dt => 1, li => 1, p => 1,
3896     }->{$self->{open_elements}->[-1]->[1]}) {
3897 wakaba 1.79 !!!cp ('t158');
3898 wakaba 1.86 pop @{$self->{open_elements}};
3899 wakaba 1.52 }
3900    
3901     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3902 wakaba 1.79 !!!cp ('t159');
3903 wakaba 1.52 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3904 wakaba 1.79 } else {
3905     !!!cp ('t160');
3906 wakaba 1.52 }
3907    
3908     splice @{$self->{open_elements}}, $i;
3909    
3910     $clear_up_to_marker->();
3911    
3912 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3913 wakaba 1.52
3914     ## reprocess
3915     redo B;
3916     } else {
3917 wakaba 1.79 !!!cp ('t161');
3918 wakaba 1.52 #
3919     }
3920     } else {
3921 wakaba 1.79 !!!cp ('t162');
3922 wakaba 1.52 #
3923     }
3924 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
3925 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
3926 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
3927 wakaba 1.43 ## have an element in table scope
3928 wakaba 1.52 my $i;
3929 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3930     my $node = $self->{open_elements}->[$_];
3931 wakaba 1.52 if ($node->[1] eq $token->{tag_name}) {
3932 wakaba 1.79 !!!cp ('t163');
3933 wakaba 1.52 $i = $_;
3934 wakaba 1.43 last INSCOPE;
3935     } elsif ({
3936     table => 1, html => 1,
3937     }->{$node->[1]}) {
3938 wakaba 1.79 !!!cp ('t164');
3939 wakaba 1.43 last INSCOPE;
3940     }
3941     } # INSCOPE
3942 wakaba 1.52 unless (defined $i) {
3943 wakaba 1.79 !!!cp ('t165');
3944 wakaba 1.43 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3945     ## Ignore the token
3946     !!!next-token;
3947     redo B;
3948     }
3949    
3950 wakaba 1.52 ## generate implied end tags
3951 wakaba 1.86 while ({
3952     dd => 1, dt => 1, li => 1, p => 1,
3953     }->{$self->{open_elements}->[-1]->[1]}) {
3954 wakaba 1.79 !!!cp ('t166');
3955 wakaba 1.86 pop @{$self->{open_elements}};
3956 wakaba 1.52 }
3957 wakaba 1.86
3958 wakaba 1.52 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
3959 wakaba 1.79 !!!cp ('t167');
3960 wakaba 1.52 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3961 wakaba 1.79 } else {
3962     !!!cp ('t168');
3963 wakaba 1.52 }
3964    
3965     splice @{$self->{open_elements}}, $i;
3966    
3967     $clear_up_to_marker->();
3968    
3969 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
3970 wakaba 1.52
3971     !!!next-token;
3972 wakaba 1.43 redo B;
3973 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
3974 wakaba 1.79 !!!cp ('t169');
3975 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3976     ## Ignore the token
3977     !!!next-token;
3978     redo B;
3979     } else {
3980 wakaba 1.79 !!!cp ('t170');
3981 wakaba 1.52 #
3982     }
3983     } elsif ($token->{tag_name} eq 'caption') {
3984 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
3985 wakaba 1.43 ## have a table element in table scope
3986     my $i;
3987     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3988     my $node = $self->{open_elements}->[$_];
3989 wakaba 1.52 if ($node->[1] eq $token->{tag_name}) {
3990 wakaba 1.79 !!!cp ('t171');
3991 wakaba 1.43 $i = $_;
3992     last INSCOPE;
3993     } elsif ({
3994     table => 1, html => 1,
3995     }->{$node->[1]}) {
3996 wakaba 1.79 !!!cp ('t172');
3997 wakaba 1.43 last INSCOPE;
3998     }
3999     } # INSCOPE
4000     unless (defined $i) {
4001 wakaba 1.79 !!!cp ('t173');
4002 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4003 wakaba 1.43 ## Ignore the token
4004     !!!next-token;
4005     redo B;
4006     }
4007    
4008     ## generate implied end tags
4009 wakaba 1.86 while ({
4010     dd => 1, dt => 1, li => 1, p => 1,
4011     }->{$self->{open_elements}->[-1]->[1]}) {
4012 wakaba 1.79 !!!cp ('t174');
4013 wakaba 1.86 pop @{$self->{open_elements}};
4014 wakaba 1.43 }
4015 wakaba 1.52
4016     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4017 wakaba 1.79 !!!cp ('t175');
4018 wakaba 1.52 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4019 wakaba 1.79 } else {
4020     !!!cp ('t176');
4021 wakaba 1.52 }
4022    
4023     splice @{$self->{open_elements}}, $i;
4024    
4025     $clear_up_to_marker->();
4026    
4027 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4028 wakaba 1.52
4029     !!!next-token;
4030     redo B;
4031 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
4032 wakaba 1.79 !!!cp ('t177');
4033 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4034     ## Ignore the token
4035     !!!next-token;
4036     redo B;
4037     } else {
4038 wakaba 1.79 !!!cp ('t178');
4039 wakaba 1.52 #
4040     }
4041     } elsif ({
4042     table => 1, tbody => 1, tfoot => 1,
4043     thead => 1, tr => 1,
4044     }->{$token->{tag_name}} and
4045 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
4046 wakaba 1.52 ## have an element in table scope
4047     my $i;
4048     my $tn;
4049     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4050     my $node = $self->{open_elements}->[$_];
4051     if ($node->[1] eq $token->{tag_name}) {
4052 wakaba 1.79 !!!cp ('t179');
4053 wakaba 1.52 $i = $_;
4054     last INSCOPE;
4055     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4056 wakaba 1.79 !!!cp ('t180');
4057 wakaba 1.52 $tn = $node->[1];
4058     ## NOTE: There is exactly one |td| or |th| element
4059     ## in scope in the stack of open elements by definition.
4060     } elsif ({
4061     table => 1, html => 1,
4062     }->{$node->[1]}) {
4063 wakaba 1.79 !!!cp ('t181');
4064 wakaba 1.52 last INSCOPE;
4065     }
4066     } # INSCOPE
4067     unless (defined $i) {
4068 wakaba 1.79 !!!cp ('t182');
4069 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4070     ## Ignore the token
4071     !!!next-token;
4072     redo B;
4073 wakaba 1.79 } else {
4074     !!!cp ('t183');
4075 wakaba 1.52 }
4076    
4077     ## Close the cell
4078     !!!back-token; # </?>
4079 wakaba 1.55 $token = {type => END_TAG_TOKEN, tag_name => $tn};
4080 wakaba 1.52 redo B;
4081     } elsif ($token->{tag_name} eq 'table' and
4082 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4083 wakaba 1.52 !!!parse-error (type => 'not closed:caption');
4084    
4085     ## As if </caption>
4086     ## have a table element in table scope
4087     my $i;
4088     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4089     my $node = $self->{open_elements}->[$_];
4090     if ($node->[1] eq 'caption') {
4091 wakaba 1.79 !!!cp ('t184');
4092 wakaba 1.52 $i = $_;
4093     last INSCOPE;
4094     } elsif ({
4095     table => 1, html => 1,
4096     }->{$node->[1]}) {
4097 wakaba 1.79 !!!cp ('t185');
4098 wakaba 1.52 last INSCOPE;
4099     }
4100     } # INSCOPE
4101     unless (defined $i) {
4102 wakaba 1.79 !!!cp ('t186');
4103 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:caption');
4104     ## Ignore the token
4105     !!!next-token;
4106     redo B;
4107     }
4108    
4109     ## generate implied end tags
4110 wakaba 1.86 while ({
4111     dd => 1, dt => 1, li => 1, p => 1,
4112     }->{$self->{open_elements}->[-1]->[1]}) {
4113 wakaba 1.79 !!!cp ('t187');
4114 wakaba 1.86 pop @{$self->{open_elements}};
4115 wakaba 1.52 }
4116    
4117     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4118 wakaba 1.79 !!!cp ('t188');
4119 wakaba 1.52 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4120 wakaba 1.79 } else {
4121     !!!cp ('t189');
4122 wakaba 1.52 }
4123    
4124     splice @{$self->{open_elements}}, $i;
4125    
4126     $clear_up_to_marker->();
4127    
4128 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4129 wakaba 1.52
4130     ## reprocess
4131     redo B;
4132     } elsif ({
4133     body => 1, col => 1, colgroup => 1, html => 1,
4134     }->{$token->{tag_name}}) {
4135 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
4136 wakaba 1.79 !!!cp ('t190');
4137 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4138     ## Ignore the token
4139     !!!next-token;
4140     redo B;
4141     } else {
4142 wakaba 1.79 !!!cp ('t191');
4143 wakaba 1.52 #
4144     }
4145     } elsif ({
4146     tbody => 1, tfoot => 1,
4147     thead => 1, tr => 1,
4148     }->{$token->{tag_name}} and
4149 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4150 wakaba 1.79 !!!cp ('t192');
4151 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4152     ## Ignore the token
4153     !!!next-token;
4154     redo B;
4155     } else {
4156 wakaba 1.79 !!!cp ('t193');
4157 wakaba 1.52 #
4158     }
4159     } else {
4160     die "$0: $token->{type}: Unknown token type";
4161     }
4162    
4163     $insert = $insert_to_current;
4164     #
4165 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
4166 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
4167 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
4168     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4169     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4170 wakaba 1.52
4171 wakaba 1.95 unless (length $token->{data}) {
4172     !!!cp ('t194');
4173     !!!next-token;
4174     redo B;
4175     } else {
4176     !!!cp ('t195');
4177     }
4178     }
4179 wakaba 1.52
4180     !!!parse-error (type => 'in table:#character');
4181    
4182     ## As if in body, but insert into foster parent element
4183     ## ISSUE: Spec says that "whenever a node would be inserted
4184     ## into the current node" while characters might not be
4185     ## result in a new Text node.
4186     $reconstruct_active_formatting_elements->($insert_to_foster);
4187    
4188     if ({
4189     table => 1, tbody => 1, tfoot => 1,
4190     thead => 1, tr => 1,
4191     }->{$self->{open_elements}->[-1]->[1]}) {
4192     # MUST
4193     my $foster_parent_element;
4194     my $next_sibling;
4195     my $prev_sibling;
4196     OE: for (reverse 0..$#{$self->{open_elements}}) {
4197     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4198     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4199     if (defined $parent and $parent->node_type == 1) {
4200 wakaba 1.79 !!!cp ('t196');
4201 wakaba 1.52 $foster_parent_element = $parent;
4202     $next_sibling = $self->{open_elements}->[$_]->[0];
4203     $prev_sibling = $next_sibling->previous_sibling;
4204     } else {
4205 wakaba 1.79 !!!cp ('t197');
4206 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4207     $prev_sibling = $foster_parent_element->last_child;
4208     }
4209     last OE;
4210     }
4211     } # OE
4212     $foster_parent_element = $self->{open_elements}->[0]->[0] and
4213     $prev_sibling = $foster_parent_element->last_child
4214     unless defined $foster_parent_element;
4215     if (defined $prev_sibling and
4216     $prev_sibling->node_type == 3) {
4217 wakaba 1.79 !!!cp ('t198');
4218 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
4219     } else {
4220 wakaba 1.79 !!!cp ('t199');
4221 wakaba 1.52 $foster_parent_element->insert_before
4222     ($self->{document}->create_text_node ($token->{data}),
4223     $next_sibling);
4224     }
4225 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
4226     } else {
4227     !!!cp ('t200');
4228     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4229     }
4230 wakaba 1.52
4231 wakaba 1.95 !!!next-token;
4232     redo B;
4233 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
4234 wakaba 1.52 if ({
4235 wakaba 1.54 tr => ($self->{insertion_mode} != IN_ROW_IM),
4236 wakaba 1.52 th => 1, td => 1,
4237     }->{$token->{tag_name}}) {
4238 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_IM) {
4239 wakaba 1.52 ## Clear back to table context
4240     while ($self->{open_elements}->[-1]->[1] ne 'table' and
4241     $self->{open_elements}->[-1]->[1] ne 'html') {
4242 wakaba 1.79 !!!cp ('t201');
4243 wakaba 1.52 pop @{$self->{open_elements}};
4244 wakaba 1.43 }
4245    
4246 wakaba 1.52 !!!insert-element ('tbody');
4247 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4248 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4249     }
4250    
4251 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4252 wakaba 1.52 unless ($token->{tag_name} eq 'tr') {
4253 wakaba 1.79 !!!cp ('t202');
4254 wakaba 1.52 !!!parse-error (type => 'missing start tag:tr');
4255     }
4256 wakaba 1.43
4257 wakaba 1.52 ## Clear back to table body context
4258     while (not {
4259     tbody => 1, tfoot => 1, thead => 1, html => 1,
4260     }->{$self->{open_elements}->[-1]->[1]}) {
4261 wakaba 1.79 !!!cp ('t203');
4262 wakaba 1.83 ## ISSUE: Can this case be reached?
4263 wakaba 1.52 pop @{$self->{open_elements}};
4264     }
4265 wakaba 1.43
4266 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4267 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4268 wakaba 1.79 !!!cp ('t204');
4269 wakaba 1.52 !!!insert-element ($token->{tag_name}, $token->{attributes});
4270     !!!next-token;
4271     redo B;
4272     } else {
4273 wakaba 1.79 !!!cp ('t205');
4274 wakaba 1.52 !!!insert-element ('tr');
4275     ## reprocess in the "in row" insertion mode
4276     }
4277 wakaba 1.79 } else {
4278     !!!cp ('t206');
4279 wakaba 1.52 }
4280    
4281     ## Clear back to table row context
4282     while (not {
4283     tr => 1, html => 1,
4284     }->{$self->{open_elements}->[-1]->[1]}) {
4285 wakaba 1.79 !!!cp ('t207');
4286 wakaba 1.52 pop @{$self->{open_elements}};
4287 wakaba 1.43 }
4288 wakaba 1.52
4289     !!!insert-element ($token->{tag_name}, $token->{attributes});
4290 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
4291 wakaba 1.52
4292     push @$active_formatting_elements, ['#marker', ''];
4293    
4294     !!!next-token;
4295     redo B;
4296     } elsif ({
4297     caption => 1, col => 1, colgroup => 1,
4298     tbody => 1, tfoot => 1, thead => 1,
4299 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
4300 wakaba 1.52 }->{$token->{tag_name}}) {
4301 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4302 wakaba 1.52 ## As if </tr>
4303 wakaba 1.43 ## have an element in table scope
4304     my $i;
4305     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4306     my $node = $self->{open_elements}->[$_];
4307 wakaba 1.52 if ($node->[1] eq 'tr') {
4308 wakaba 1.79 !!!cp ('t208');
4309 wakaba 1.43 $i = $_;
4310     last INSCOPE;
4311     } elsif ({
4312 wakaba 1.83 html => 1,
4313    
4314     ## NOTE: This element does not appear here, maybe.
4315     table => 1,
4316 wakaba 1.43 }->{$node->[1]}) {
4317 wakaba 1.79 !!!cp ('t209');
4318 wakaba 1.43 last INSCOPE;
4319     }
4320     } # INSCOPE
4321 wakaba 1.79 unless (defined $i) {
4322     !!!cp ('t210');
4323 wakaba 1.83 ## TODO: This type is wrong.
4324 wakaba 1.79 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4325 wakaba 1.52 ## Ignore the token
4326     !!!next-token;
4327 wakaba 1.43 redo B;
4328     }
4329    
4330 wakaba 1.52 ## Clear back to table row context
4331     while (not {
4332     tr => 1, html => 1,
4333     }->{$self->{open_elements}->[-1]->[1]}) {
4334 wakaba 1.79 !!!cp ('t211');
4335 wakaba 1.83 ## ISSUE: Can this case be reached?
4336 wakaba 1.52 pop @{$self->{open_elements}};
4337 wakaba 1.1 }
4338 wakaba 1.43
4339 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4340 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4341 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4342 wakaba 1.79 !!!cp ('t212');
4343 wakaba 1.52 ## reprocess
4344     redo B;
4345     } else {
4346 wakaba 1.79 !!!cp ('t213');
4347 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4348     }
4349 wakaba 1.1 }
4350 wakaba 1.52
4351 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4352 wakaba 1.52 ## have an element in table scope
4353 wakaba 1.43 my $i;
4354     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4355     my $node = $self->{open_elements}->[$_];
4356 wakaba 1.52 if ({
4357     tbody => 1, thead => 1, tfoot => 1,
4358     }->{$node->[1]}) {
4359 wakaba 1.79 !!!cp ('t214');
4360 wakaba 1.43 $i = $_;
4361     last INSCOPE;
4362     } elsif ({
4363     table => 1, html => 1,
4364     }->{$node->[1]}) {
4365 wakaba 1.79 !!!cp ('t215');
4366 wakaba 1.43 last INSCOPE;
4367     }
4368     } # INSCOPE
4369 wakaba 1.52 unless (defined $i) {
4370 wakaba 1.79 !!!cp ('t216');
4371 wakaba 1.82 ## TODO: This erorr type ios wrong.
4372 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4373     ## Ignore the token
4374     !!!next-token;
4375 wakaba 1.43 redo B;
4376     }
4377 wakaba 1.52
4378     ## Clear back to table body context
4379     while (not {
4380     tbody => 1, tfoot => 1, thead => 1, html => 1,
4381     }->{$self->{open_elements}->[-1]->[1]}) {
4382 wakaba 1.79 !!!cp ('t217');
4383 wakaba 1.83 ## ISSUE: Can this state be reached?
4384 wakaba 1.52 pop @{$self->{open_elements}};
4385 wakaba 1.43 }
4386    
4387 wakaba 1.52 ## As if <{current node}>
4388     ## have an element in table scope
4389     ## true by definition
4390 wakaba 1.43
4391 wakaba 1.52 ## Clear back to table body context
4392     ## nop by definition
4393 wakaba 1.43
4394 wakaba 1.52 pop @{$self->{open_elements}};
4395 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4396 wakaba 1.52 ## reprocess in "in table" insertion mode...
4397 wakaba 1.79 } else {
4398     !!!cp ('t218');
4399 wakaba 1.52 }
4400    
4401     if ($token->{tag_name} eq 'col') {
4402     ## Clear back to table context
4403     while ($self->{open_elements}->[-1]->[1] ne 'table' and
4404     $self->{open_elements}->[-1]->[1] ne 'html') {
4405 wakaba 1.79 !!!cp ('t219');
4406 wakaba 1.83 ## ISSUE: Can this state be reached?
4407 wakaba 1.52 pop @{$self->{open_elements}};
4408     }
4409 wakaba 1.43
4410 wakaba 1.52 !!!insert-element ('colgroup');
4411 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
4412 wakaba 1.52 ## reprocess
4413 wakaba 1.43 redo B;
4414 wakaba 1.52 } elsif ({
4415     caption => 1,
4416     colgroup => 1,
4417     tbody => 1, tfoot => 1, thead => 1,
4418     }->{$token->{tag_name}}) {
4419     ## Clear back to table context
4420     while ($self->{open_elements}->[-1]->[1] ne 'table' and
4421     $self->{open_elements}->[-1]->[1] ne 'html') {
4422 wakaba 1.79 !!!cp ('t220');
4423 wakaba 1.83 ## ISSUE: Can this state be reached?
4424 wakaba 1.52 pop @{$self->{open_elements}};
4425 wakaba 1.1 }
4426 wakaba 1.52
4427     push @$active_formatting_elements, ['#marker', '']
4428     if $token->{tag_name} eq 'caption';
4429    
4430     !!!insert-element ($token->{tag_name}, $token->{attributes});
4431     $self->{insertion_mode} = {
4432 wakaba 1.54 caption => IN_CAPTION_IM,
4433     colgroup => IN_COLUMN_GROUP_IM,
4434     tbody => IN_TABLE_BODY_IM,
4435     tfoot => IN_TABLE_BODY_IM,
4436     thead => IN_TABLE_BODY_IM,
4437 wakaba 1.52 }->{$token->{tag_name}};
4438 wakaba 1.1 !!!next-token;
4439     redo B;
4440 wakaba 1.52 } else {
4441     die "$0: in table: <>: $token->{tag_name}";
4442 wakaba 1.1 }
4443 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
4444     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4445 wakaba 1.1
4446 wakaba 1.52 ## As if </table>
4447 wakaba 1.1 ## have a table element in table scope
4448     my $i;
4449 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4450     my $node = $self->{open_elements}->[$_];
4451 wakaba 1.52 if ($node->[1] eq 'table') {
4452 wakaba 1.79 !!!cp ('t221');
4453 wakaba 1.1 $i = $_;
4454     last INSCOPE;
4455     } elsif ({
4456 wakaba 1.83 #table => 1,
4457     html => 1,
4458 wakaba 1.1 }->{$node->[1]}) {
4459 wakaba 1.79 !!!cp ('t222');
4460 wakaba 1.1 last INSCOPE;
4461     }
4462     } # INSCOPE
4463     unless (defined $i) {
4464 wakaba 1.79 !!!cp ('t223');
4465 wakaba 1.83 ## TODO: The following is wrong, maybe.
4466 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:table');
4467     ## Ignore tokens </table><table>
4468 wakaba 1.1 !!!next-token;
4469     redo B;
4470     }
4471    
4472     ## generate implied end tags
4473 wakaba 1.86 while ({
4474     dd => 1, dt => 1, li => 1, p => 1,
4475     }->{$self->{open_elements}->[-1]->[1]}) {
4476 wakaba 1.79 !!!cp ('t224');
4477 wakaba 1.86 pop @{$self->{open_elements}};
4478 wakaba 1.1 }
4479    
4480 wakaba 1.52 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4481 wakaba 1.79 !!!cp ('t225');
4482 wakaba 1.83 ## ISSUE: Can this case be reached?
4483 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4484 wakaba 1.79 } else {
4485     !!!cp ('t226');
4486 wakaba 1.1 }
4487    
4488 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4489 wakaba 1.95 pop @{$open_tables};
4490 wakaba 1.1
4491 wakaba 1.52 $self->_reset_insertion_mode;
4492 wakaba 1.1
4493     ## reprocess
4494     redo B;
4495 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
4496     if (not $open_tables->[-1]->[1]) { # tainted
4497     !!!cp ('t227.8');
4498     ## NOTE: This is a "as if in head" code clone.
4499     $parse_rcdata->(CDATA_CONTENT_MODEL);
4500     redo B;
4501     } else {
4502     !!!cp ('t227.7');
4503     #
4504     }
4505     } elsif ($token->{tag_name} eq 'script') {
4506     if (not $open_tables->[-1]->[1]) { # tainted
4507     !!!cp ('t227.6');
4508     ## NOTE: This is a "as if in head" code clone.
4509     $script_start_tag->();
4510     redo B;
4511     } else {
4512     !!!cp ('t227.5');
4513     #
4514     }
4515 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
4516     if (not $open_tables->[-1]->[1]) { # tainted
4517     if ($token->{attributes}->{type}) { ## TODO: case
4518     my $type = lc $token->{attributes}->{type}->{value};
4519     if ($type eq 'hidden') {
4520     !!!cp ('t227.3');
4521     !!!parse-error (type => 'in table:'.$token->{tag_name});
4522    
4523     !!!insert-element ($token->{tag_name}, $token->{attributes});
4524    
4525     ## TODO: form element pointer
4526    
4527     pop @{$self->{open_elements}};
4528    
4529     !!!next-token;
4530     redo B;
4531     } else {
4532     !!!cp ('t227.2');
4533     #
4534     }
4535     } else {
4536     !!!cp ('t227.1');
4537     #
4538     }
4539     } else {
4540     !!!cp ('t227.4');
4541     #
4542     }
4543 wakaba 1.58 } else {
4544 wakaba 1.79 !!!cp ('t227');
4545 wakaba 1.58 #
4546     }
4547 wakaba 1.98
4548     !!!parse-error (type => 'in table:'.$token->{tag_name});
4549    
4550     $insert = $insert_to_foster;
4551     #
4552 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
4553 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
4554 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
4555 wakaba 1.52 ## have an element in table scope
4556     my $i;
4557     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4558     my $node = $self->{open_elements}->[$_];
4559     if ($node->[1] eq $token->{tag_name}) {
4560 wakaba 1.79 !!!cp ('t228');
4561 wakaba 1.52 $i = $_;
4562     last INSCOPE;
4563     } elsif ({
4564     table => 1, html => 1,
4565     }->{$node->[1]}) {
4566 wakaba 1.79 !!!cp ('t229');
4567 wakaba 1.52 last INSCOPE;
4568     }
4569     } # INSCOPE
4570     unless (defined $i) {
4571 wakaba 1.79 !!!cp ('t230');
4572 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4573     ## Ignore the token
4574 wakaba 1.42 !!!next-token;
4575     redo B;
4576 wakaba 1.79 } else {
4577     !!!cp ('t232');
4578 wakaba 1.42 }
4579    
4580 wakaba 1.52 ## Clear back to table row context
4581     while (not {
4582     tr => 1, html => 1,
4583     }->{$self->{open_elements}->[-1]->[1]}) {
4584 wakaba 1.79 !!!cp ('t231');
4585 wakaba 1.83 ## ISSUE: Can this state be reached?
4586 wakaba 1.52 pop @{$self->{open_elements}};
4587     }
4588 wakaba 1.42
4589 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4590 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4591 wakaba 1.52 !!!next-token;
4592     redo B;
4593     } elsif ($token->{tag_name} eq 'table') {
4594 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4595 wakaba 1.52 ## As if </tr>
4596     ## have an element in table scope
4597     my $i;
4598     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4599     my $node = $self->{open_elements}->[$_];
4600     if ($node->[1] eq 'tr') {
4601 wakaba 1.79 !!!cp ('t233');
4602 wakaba 1.52 $i = $_;
4603     last INSCOPE;
4604     } elsif ({
4605     table => 1, html => 1,
4606     }->{$node->[1]}) {
4607 wakaba 1.79 !!!cp ('t234');
4608 wakaba 1.52 last INSCOPE;
4609 wakaba 1.42 }
4610 wakaba 1.52 } # INSCOPE
4611     unless (defined $i) {
4612 wakaba 1.79 !!!cp ('t235');
4613 wakaba 1.83 ## TODO: The following is wrong.
4614 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4615     ## Ignore the token
4616     !!!next-token;
4617     redo B;
4618 wakaba 1.42 }
4619 wakaba 1.52
4620     ## Clear back to table row context
4621     while (not {
4622     tr => 1, html => 1,
4623     }->{$self->{open_elements}->[-1]->[1]}) {
4624 wakaba 1.79 !!!cp ('t236');
4625 wakaba 1.83 ## ISSUE: Can this state be reached?
4626 wakaba 1.46 pop @{$self->{open_elements}};
4627 wakaba 1.1 }
4628 wakaba 1.46
4629 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4630 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4631 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
4632 wakaba 1.1 }
4633    
4634 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4635 wakaba 1.52 ## have an element in table scope
4636     my $i;
4637     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4638     my $node = $self->{open_elements}->[$_];
4639     if ({
4640     tbody => 1, thead => 1, tfoot => 1,
4641     }->{$node->[1]}) {
4642 wakaba 1.79 !!!cp ('t237');
4643 wakaba 1.52 $i = $_;
4644     last INSCOPE;
4645     } elsif ({
4646     table => 1, html => 1,
4647     }->{$node->[1]}) {
4648 wakaba 1.79 !!!cp ('t238');
4649 wakaba 1.52 last INSCOPE;
4650     }
4651     } # INSCOPE
4652     unless (defined $i) {
4653 wakaba 1.79 !!!cp ('t239');
4654 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4655     ## Ignore the token
4656     !!!next-token;
4657     redo B;
4658 wakaba 1.47 }
4659    
4660     ## Clear back to table body context
4661     while (not {
4662     tbody => 1, tfoot => 1, thead => 1, html => 1,
4663     }->{$self->{open_elements}->[-1]->[1]}) {
4664 wakaba 1.79 !!!cp ('t240');
4665 wakaba 1.47 pop @{$self->{open_elements}};
4666     }
4667    
4668 wakaba 1.52 ## As if <{current node}>
4669     ## have an element in table scope
4670     ## true by definition
4671    
4672     ## Clear back to table body context
4673     ## nop by definition
4674    
4675     pop @{$self->{open_elements}};
4676 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4677 wakaba 1.52 ## reprocess in the "in table" insertion mode...
4678     }
4679    
4680 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
4681     ## When you edit the code fragment below, please ensure that
4682     ## the code for <table> in the "in table" insertion mode
4683     ## is synced with it.
4684    
4685 wakaba 1.52 ## have a table element in table scope
4686     my $i;
4687     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4688     my $node = $self->{open_elements}->[$_];
4689     if ($node->[1] eq $token->{tag_name}) {
4690 wakaba 1.79 !!!cp ('t241');
4691 wakaba 1.52 $i = $_;
4692     last INSCOPE;
4693     } elsif ({
4694     table => 1, html => 1,
4695     }->{$node->[1]}) {
4696 wakaba 1.79 !!!cp ('t242');
4697 wakaba 1.52 last INSCOPE;
4698 wakaba 1.47 }
4699 wakaba 1.52 } # INSCOPE
4700     unless (defined $i) {
4701 wakaba 1.79 !!!cp ('t243');
4702 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4703     ## Ignore the token
4704     !!!next-token;
4705     redo B;
4706 wakaba 1.3 }
4707 wakaba 1.52
4708     splice @{$self->{open_elements}}, $i;
4709 wakaba 1.95 pop @{$open_tables};
4710 wakaba 1.1
4711 wakaba 1.52 $self->_reset_insertion_mode;
4712 wakaba 1.47
4713     !!!next-token;
4714     redo B;
4715     } elsif ({
4716 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
4717 wakaba 1.52 }->{$token->{tag_name}} and
4718 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
4719 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4720 wakaba 1.52 ## have an element in table scope
4721     my $i;
4722     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4723     my $node = $self->{open_elements}->[$_];
4724     if ($node->[1] eq $token->{tag_name}) {
4725 wakaba 1.79 !!!cp ('t247');
4726 wakaba 1.52 $i = $_;
4727     last INSCOPE;
4728     } elsif ({
4729     table => 1, html => 1,
4730     }->{$node->[1]}) {
4731 wakaba 1.79 !!!cp ('t248');
4732 wakaba 1.52 last INSCOPE;
4733     }
4734     } # INSCOPE
4735     unless (defined $i) {
4736 wakaba 1.79 !!!cp ('t249');
4737 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4738     ## Ignore the token
4739     !!!next-token;
4740     redo B;
4741     }
4742    
4743 wakaba 1.48 ## As if </tr>
4744     ## have an element in table scope
4745     my $i;
4746     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4747     my $node = $self->{open_elements}->[$_];
4748     if ($node->[1] eq 'tr') {
4749 wakaba 1.79 !!!cp ('t250');
4750 wakaba 1.48 $i = $_;
4751     last INSCOPE;
4752     } elsif ({
4753     table => 1, html => 1,
4754     }->{$node->[1]}) {
4755 wakaba 1.79 !!!cp ('t251');
4756 wakaba 1.48 last INSCOPE;
4757     }
4758     } # INSCOPE
4759 wakaba 1.52 unless (defined $i) {
4760 wakaba 1.79 !!!cp ('t252');
4761 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:tr');
4762     ## Ignore the token
4763     !!!next-token;
4764     redo B;
4765     }
4766 wakaba 1.48
4767     ## Clear back to table row context
4768     while (not {
4769     tr => 1, html => 1,
4770     }->{$self->{open_elements}->[-1]->[1]}) {
4771 wakaba 1.79 !!!cp ('t253');
4772 wakaba 1.83 ## ISSUE: Can this case be reached?
4773 wakaba 1.48 pop @{$self->{open_elements}};
4774     }
4775    
4776     pop @{$self->{open_elements}}; # tr
4777 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4778 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4779     }
4780    
4781     ## have an element in table scope
4782     my $i;
4783     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4784     my $node = $self->{open_elements}->[$_];
4785     if ($node->[1] eq $token->{tag_name}) {
4786 wakaba 1.79 !!!cp ('t254');
4787 wakaba 1.52 $i = $_;
4788     last INSCOPE;
4789     } elsif ({
4790     table => 1, html => 1,
4791     }->{$node->[1]}) {
4792 wakaba 1.79 !!!cp ('t255');
4793 wakaba 1.52 last INSCOPE;
4794     }
4795     } # INSCOPE
4796     unless (defined $i) {
4797 wakaba 1.79 !!!cp ('t256');
4798 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4799     ## Ignore the token
4800     !!!next-token;
4801     redo B;
4802     }
4803    
4804     ## Clear back to table body context
4805     while (not {
4806     tbody => 1, tfoot => 1, thead => 1, html => 1,
4807     }->{$self->{open_elements}->[-1]->[1]}) {
4808 wakaba 1.79 !!!cp ('t257');
4809 wakaba 1.83 ## ISSUE: Can this case be reached?
4810 wakaba 1.52 pop @{$self->{open_elements}};
4811     }
4812    
4813     pop @{$self->{open_elements}};
4814 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4815 wakaba 1.52 !!!next-token;
4816     redo B;
4817     } elsif ({
4818     body => 1, caption => 1, col => 1, colgroup => 1,
4819     html => 1, td => 1, th => 1,
4820 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
4821     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
4822 wakaba 1.52 }->{$token->{tag_name}}) {
4823 wakaba 1.79 !!!cp ('t258');
4824 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4825     ## Ignore the token
4826     !!!next-token;
4827     redo B;
4828 wakaba 1.58 } else {
4829 wakaba 1.79 !!!cp ('t259');
4830 wakaba 1.58 !!!parse-error (type => 'in table:/'.$token->{tag_name});
4831 wakaba 1.52
4832 wakaba 1.58 $insert = $insert_to_foster;
4833     #
4834     }
4835     } else {
4836     die "$0: $token->{type}: Unknown token type";
4837     }
4838 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
4839 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4840 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4841     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4842     unless (length $token->{data}) {
4843 wakaba 1.79 !!!cp ('t260');
4844 wakaba 1.52 !!!next-token;
4845     redo B;
4846     }
4847     }
4848    
4849 wakaba 1.79 !!!cp ('t261');
4850 wakaba 1.52 #
4851 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4852 wakaba 1.52 if ($token->{tag_name} eq 'col') {
4853 wakaba 1.79 !!!cp ('t262');
4854 wakaba 1.52 !!!insert-element ($token->{tag_name}, $token->{attributes});
4855     pop @{$self->{open_elements}};
4856     !!!next-token;
4857     redo B;
4858     } else {
4859 wakaba 1.79 !!!cp ('t263');
4860 wakaba 1.52 #
4861     }
4862 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4863 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
4864     if ($self->{open_elements}->[-1]->[1] eq 'html') {
4865 wakaba 1.79 !!!cp ('t264');
4866 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:colgroup');
4867     ## Ignore the token
4868     !!!next-token;
4869     redo B;
4870     } else {
4871 wakaba 1.79 !!!cp ('t265');
4872 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
4873 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4874 wakaba 1.52 !!!next-token;
4875     redo B;
4876     }
4877     } elsif ($token->{tag_name} eq 'col') {
4878 wakaba 1.79 !!!cp ('t266');
4879 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:col');
4880     ## Ignore the token
4881     !!!next-token;
4882     redo B;
4883     } else {
4884 wakaba 1.79 !!!cp ('t267');
4885 wakaba 1.52 #
4886     }
4887     } else {
4888 wakaba 1.83 die "$0: $token->{type}: Unknown token type";
4889 wakaba 1.52 }
4890    
4891     ## As if </colgroup>
4892     if ($self->{open_elements}->[-1]->[1] eq 'html') {
4893 wakaba 1.79 !!!cp ('t269');
4894 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:colgroup');
4895     ## Ignore the token
4896     !!!next-token;
4897     redo B;
4898     } else {
4899 wakaba 1.79 !!!cp ('t270');
4900 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
4901 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4902 wakaba 1.52 ## reprocess
4903     redo B;
4904     }
4905 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_SELECT_IM) {
4906 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
4907 wakaba 1.79 !!!cp ('t271');
4908 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4909     !!!next-token;
4910     redo B;
4911     } elsif ($token->{type} == START_TAG_TOKEN) {
4912 wakaba 1.52 if ($token->{tag_name} eq 'option') {
4913     if ($self->{open_elements}->[-1]->[1] eq 'option') {
4914 wakaba 1.79 !!!cp ('t272');
4915 wakaba 1.52 ## As if </option>
4916     pop @{$self->{open_elements}};
4917 wakaba 1.79 } else {
4918     !!!cp ('t273');
4919 wakaba 1.52 }
4920    
4921     !!!insert-element ($token->{tag_name}, $token->{attributes});
4922     !!!next-token;
4923     redo B;
4924     } elsif ($token->{tag_name} eq 'optgroup') {
4925     if ($self->{open_elements}->[-1]->[1] eq 'option') {
4926 wakaba 1.79 !!!cp ('t274');
4927 wakaba 1.52 ## As if </option>
4928     pop @{$self->{open_elements}};
4929 wakaba 1.79 } else {
4930     !!!cp ('t275');
4931 wakaba 1.52 }
4932    
4933     if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4934 wakaba 1.79 !!!cp ('t276');
4935 wakaba 1.52 ## As if </optgroup>
4936     pop @{$self->{open_elements}};
4937 wakaba 1.79 } else {
4938     !!!cp ('t277');
4939 wakaba 1.52 }
4940    
4941     !!!insert-element ($token->{tag_name}, $token->{attributes});
4942     !!!next-token;
4943     redo B;
4944     } elsif ($token->{tag_name} eq 'select') {
4945 wakaba 1.83 ## TODO: The type below is not good - <select> is replaced by </select>
4946 wakaba 1.52 !!!parse-error (type => 'not closed:select');
4947     ## As if </select> instead
4948     ## have an element in table scope
4949     my $i;
4950     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4951     my $node = $self->{open_elements}->[$_];
4952     if ($node->[1] eq $token->{tag_name}) {
4953 wakaba 1.79 !!!cp ('t278');
4954 wakaba 1.52 $i = $_;
4955     last INSCOPE;
4956     } elsif ({
4957     table => 1, html => 1,
4958     }->{$node->[1]}) {
4959 wakaba 1.79 !!!cp ('t279');
4960 wakaba 1.52 last INSCOPE;
4961 wakaba 1.47 }
4962 wakaba 1.52 } # INSCOPE
4963     unless (defined $i) {
4964 wakaba 1.79 !!!cp ('t280');
4965 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:select');
4966     ## Ignore the token
4967     !!!next-token;
4968     redo B;
4969 wakaba 1.47 }
4970 wakaba 1.52
4971 wakaba 1.79 !!!cp ('t281');
4972 wakaba 1.52 splice @{$self->{open_elements}}, $i;
4973    
4974     $self->_reset_insertion_mode;
4975 wakaba 1.47
4976 wakaba 1.52 !!!next-token;
4977     redo B;
4978 wakaba 1.58 } else {
4979 wakaba 1.79 !!!cp ('t282');
4980 wakaba 1.58 !!!parse-error (type => 'in select:'.$token->{tag_name});
4981     ## Ignore the token
4982     !!!next-token;
4983     redo B;
4984     }
4985     } elsif ($token->{type} == END_TAG_TOKEN) {
4986 wakaba 1.52 if ($token->{tag_name} eq 'optgroup') {
4987     if ($self->{open_elements}->[-1]->[1] eq 'option' and
4988     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4989 wakaba 1.79 !!!cp ('t283');
4990 wakaba 1.52 ## As if </option>
4991     splice @{$self->{open_elements}}, -2;
4992     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4993 wakaba 1.79 !!!cp ('t284');
4994 wakaba 1.52 pop @{$self->{open_elements}};
4995     } else {
4996 wakaba 1.79 !!!cp ('t285');
4997 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4998     ## Ignore the token
4999     }
5000     !!!next-token;
5001     redo B;
5002     } elsif ($token->{tag_name} eq 'option') {
5003     if ($self->{open_elements}->[-1]->[1] eq 'option') {
5004 wakaba 1.79 !!!cp ('t286');
5005 wakaba 1.47 pop @{$self->{open_elements}};
5006 wakaba 1.52 } else {
5007 wakaba 1.79 !!!cp ('t287');
5008 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
5009     ## Ignore the token
5010 wakaba 1.1 }
5011 wakaba 1.52 !!!next-token;
5012     redo B;
5013     } elsif ($token->{tag_name} eq 'select') {
5014     ## have an element in table scope
5015     my $i;
5016     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5017     my $node = $self->{open_elements}->[$_];
5018     if ($node->[1] eq $token->{tag_name}) {
5019 wakaba 1.79 !!!cp ('t288');
5020 wakaba 1.52 $i = $_;
5021     last INSCOPE;
5022     } elsif ({
5023     table => 1, html => 1,
5024     }->{$node->[1]}) {
5025 wakaba 1.79 !!!cp ('t289');
5026 wakaba 1.52 last INSCOPE;
5027 wakaba 1.48 }
5028 wakaba 1.52 } # INSCOPE
5029     unless (defined $i) {
5030 wakaba 1.79 !!!cp ('t290');
5031 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
5032     ## Ignore the token
5033     !!!next-token;
5034 wakaba 1.48 redo B;
5035 wakaba 1.52 }
5036    
5037 wakaba 1.79 !!!cp ('t291');
5038 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5039    
5040     $self->_reset_insertion_mode;
5041    
5042     !!!next-token;
5043     redo B;
5044     } elsif ({
5045     caption => 1, table => 1, tbody => 1,
5046     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
5047     }->{$token->{tag_name}}) {
5048 wakaba 1.83 ## TODO: The following is wrong?
5049 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
5050    
5051     ## have an element in table scope
5052     my $i;
5053     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5054     my $node = $self->{open_elements}->[$_];
5055     if ($node->[1] eq $token->{tag_name}) {
5056 wakaba 1.79 !!!cp ('t292');
5057 wakaba 1.52 $i = $_;
5058     last INSCOPE;
5059     } elsif ({
5060     table => 1, html => 1,
5061     }->{$node->[1]}) {
5062 wakaba 1.79 !!!cp ('t293');
5063 wakaba 1.52 last INSCOPE;
5064 wakaba 1.1 }
5065 wakaba 1.52 } # INSCOPE
5066     unless (defined $i) {
5067 wakaba 1.79 !!!cp ('t294');
5068 wakaba 1.52 ## Ignore the token
5069 wakaba 1.1 !!!next-token;
5070     redo B;
5071     }
5072 wakaba 1.52
5073     ## As if </select>
5074     ## have an element in table scope
5075     undef $i;
5076 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5077     my $node = $self->{open_elements}->[$_];
5078 wakaba 1.52 if ($node->[1] eq 'select') {
5079 wakaba 1.79 !!!cp ('t295');
5080 wakaba 1.1 $i = $_;
5081     last INSCOPE;
5082     } elsif ({
5083     table => 1, html => 1,
5084 wakaba 1.52 }->{$node->[1]}) {
5085 wakaba 1.83 ## ISSUE: Can this state be reached?
5086 wakaba 1.79 !!!cp ('t296');
5087 wakaba 1.52 last INSCOPE;
5088     }
5089     } # INSCOPE
5090     unless (defined $i) {
5091 wakaba 1.79 !!!cp ('t297');
5092 wakaba 1.83 ## TODO: The following error type is correct?
5093 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:select');
5094     ## Ignore the </select> token
5095     !!!next-token; ## TODO: ok?
5096     redo B;
5097     }
5098    
5099 wakaba 1.79 !!!cp ('t298');
5100 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5101    
5102     $self->_reset_insertion_mode;
5103    
5104     ## reprocess
5105     redo B;
5106 wakaba 1.58 } else {
5107 wakaba 1.79 !!!cp ('t299');
5108 wakaba 1.58 !!!parse-error (type => 'in select:/'.$token->{tag_name});
5109 wakaba 1.52 ## Ignore the token
5110     !!!next-token;
5111     redo B;
5112 wakaba 1.58 }
5113     } else {
5114     die "$0: $token->{type}: Unknown token type";
5115     }
5116 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
5117 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5118 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5119     my $data = $1;
5120     ## As if in body
5121     $reconstruct_active_formatting_elements->($insert_to_current);
5122    
5123     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5124    
5125     unless (length $token->{data}) {
5126 wakaba 1.79 !!!cp ('t300');
5127 wakaba 1.52 !!!next-token;
5128     redo B;
5129     }
5130     }
5131    
5132 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5133 wakaba 1.79 !!!cp ('t301');
5134 wakaba 1.52 !!!parse-error (type => 'after html:#character');
5135    
5136 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5137 wakaba 1.79 } else {
5138     !!!cp ('t302');
5139 wakaba 1.52 }
5140    
5141     ## "after body" insertion mode
5142     !!!parse-error (type => 'after body:#character');
5143    
5144 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5145 wakaba 1.52 ## reprocess
5146     redo B;
5147 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5148 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5149 wakaba 1.79 !!!cp ('t303');
5150 wakaba 1.52 !!!parse-error (type => 'after html:'.$token->{tag_name});
5151    
5152 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5153 wakaba 1.79 } else {
5154     !!!cp ('t304');
5155 wakaba 1.52 }
5156    
5157     ## "after body" insertion mode
5158     !!!parse-error (type => 'after body:'.$token->{tag_name});
5159    
5160 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5161 wakaba 1.52 ## reprocess
5162     redo B;
5163 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5164 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5165 wakaba 1.79 !!!cp ('t305');
5166 wakaba 1.52 !!!parse-error (type => 'after html:/'.$token->{tag_name});
5167    
5168 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5169 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5170 wakaba 1.79 } else {
5171     !!!cp ('t306');
5172 wakaba 1.52 }
5173    
5174     ## "after body" insertion mode
5175     if ($token->{tag_name} eq 'html') {
5176     if (defined $self->{inner_html_node}) {
5177 wakaba 1.79 !!!cp ('t307');
5178 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:html');
5179     ## Ignore the token
5180     !!!next-token;
5181     redo B;
5182     } else {
5183 wakaba 1.79 !!!cp ('t308');
5184 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
5185 wakaba 1.52 !!!next-token;
5186     redo B;
5187     }
5188     } else {
5189 wakaba 1.79 !!!cp ('t309');
5190 wakaba 1.52 !!!parse-error (type => 'after body:/'.$token->{tag_name});
5191    
5192 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5193 wakaba 1.52 ## reprocess
5194     redo B;
5195     }
5196     } else {
5197     die "$0: $token->{type}: Unknown token type";
5198     }
5199 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
5200 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5201 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5202     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5203    
5204     unless (length $token->{data}) {
5205 wakaba 1.79 !!!cp ('t310');
5206 wakaba 1.52 !!!next-token;
5207     redo B;
5208     }
5209     }
5210    
5211     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
5212 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5213 wakaba 1.79 !!!cp ('t311');
5214 wakaba 1.52 !!!parse-error (type => 'in frameset:#character');
5215 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
5216 wakaba 1.79 !!!cp ('t312');
5217 wakaba 1.52 !!!parse-error (type => 'after frameset:#character');
5218     } else { # "after html frameset"
5219 wakaba 1.79 !!!cp ('t313');
5220 wakaba 1.52 !!!parse-error (type => 'after html:#character');
5221    
5222 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5223 wakaba 1.84 ## Reprocess in the "after frameset" insertion mode.
5224 wakaba 1.52 !!!parse-error (type => 'after frameset:#character');
5225     }
5226    
5227     ## Ignore the token.
5228     if (length $token->{data}) {
5229 wakaba 1.79 !!!cp ('t314');
5230 wakaba 1.52 ## reprocess the rest of characters
5231     } else {
5232 wakaba 1.79 !!!cp ('t315');
5233 wakaba 1.52 !!!next-token;
5234     }
5235     redo B;
5236     }
5237    
5238     die qq[$0: Character "$token->{data}"];
5239 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5240 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5241 wakaba 1.79 !!!cp ('t316');
5242 wakaba 1.52 !!!parse-error (type => 'after html:'.$token->{tag_name});
5243 wakaba 1.1
5244 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5245 wakaba 1.84 ## Process in the "after frameset" insertion mode.
5246 wakaba 1.79 } else {
5247     !!!cp ('t317');
5248     }
5249 wakaba 1.1
5250 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
5251 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5252 wakaba 1.79 !!!cp ('t318');
5253 wakaba 1.52 !!!insert-element ($token->{tag_name}, $token->{attributes});
5254     !!!next-token;
5255     redo B;
5256     } elsif ($token->{tag_name} eq 'frame' and
5257 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5258 wakaba 1.79 !!!cp ('t319');
5259 wakaba 1.52 !!!insert-element ($token->{tag_name}, $token->{attributes});
5260     pop @{$self->{open_elements}};
5261     !!!next-token;
5262     redo B;
5263     } elsif ($token->{tag_name} eq 'noframes') {
5264 wakaba 1.79 !!!cp ('t320');
5265 wakaba 1.52 ## NOTE: As if in body.
5266 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5267 wakaba 1.52 redo B;
5268     } else {
5269 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5270 wakaba 1.79 !!!cp ('t321');
5271 wakaba 1.52 !!!parse-error (type => 'in frameset:'.$token->{tag_name});
5272     } else {
5273 wakaba 1.79 !!!cp ('t322');
5274 wakaba 1.52 !!!parse-error (type => 'after frameset:'.$token->{tag_name});
5275     }
5276     ## Ignore the token
5277     !!!next-token;
5278     redo B;
5279     }
5280 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5281 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5282 wakaba 1.79 !!!cp ('t323');
5283 wakaba 1.52 !!!parse-error (type => 'after html:/'.$token->{tag_name});
5284 wakaba 1.1
5285 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5286 wakaba 1.84 ## Process in the "after frameset" insertion mode.
5287 wakaba 1.79 } else {
5288     !!!cp ('t324');
5289 wakaba 1.52 }
5290 wakaba 1.1
5291 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
5292 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5293 wakaba 1.52 if ($self->{open_elements}->[-1]->[1] eq 'html' and
5294     @{$self->{open_elements}} == 1) {
5295 wakaba 1.79 !!!cp ('t325');
5296 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
5297     ## Ignore the token
5298     !!!next-token;
5299     } else {
5300 wakaba 1.79 !!!cp ('t326');
5301 wakaba 1.52 pop @{$self->{open_elements}};
5302     !!!next-token;
5303     }
5304 wakaba 1.47
5305 wakaba 1.52 if (not defined $self->{inner_html_node} and
5306     $self->{open_elements}->[-1]->[1] ne 'frameset') {
5307 wakaba 1.79 !!!cp ('t327');
5308 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5309 wakaba 1.79 } else {
5310     !!!cp ('t328');
5311 wakaba 1.52 }
5312     redo B;
5313     } elsif ($token->{tag_name} eq 'html' and
5314 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
5315 wakaba 1.79 !!!cp ('t329');
5316 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
5317 wakaba 1.52 !!!next-token;
5318     redo B;
5319     } else {
5320 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5321 wakaba 1.79 !!!cp ('t330');
5322 wakaba 1.52 !!!parse-error (type => 'in frameset:/'.$token->{tag_name});
5323     } else {
5324 wakaba 1.79 !!!cp ('t331');
5325 wakaba 1.52 !!!parse-error (type => 'after frameset:/'.$token->{tag_name});
5326     }
5327     ## Ignore the token
5328     !!!next-token;
5329     redo B;
5330     }
5331     } else {
5332     die "$0: $token->{type}: Unknown token type";
5333     }
5334 wakaba 1.47
5335 wakaba 1.52 ## ISSUE: An issue in spec here
5336     } else {
5337     die "$0: $self->{insertion_mode}: Unknown insertion mode";
5338     }
5339 wakaba 1.47
5340 wakaba 1.52 ## "in body" insertion mode
5341 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
5342 wakaba 1.52 if ($token->{tag_name} eq 'script') {
5343 wakaba 1.79 !!!cp ('t332');
5344 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5345 wakaba 1.100 $script_start_tag->();
5346 wakaba 1.53 redo B;
5347 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
5348 wakaba 1.79 !!!cp ('t333');
5349 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5350 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5351 wakaba 1.53 redo B;
5352 wakaba 1.52 } elsif ({
5353     base => 1, link => 1,
5354     }->{$token->{tag_name}}) {
5355 wakaba 1.79 !!!cp ('t334');
5356 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
5357     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5358     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5359     !!!next-token;
5360 wakaba 1.53 redo B;
5361 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
5362     ## NOTE: This is an "as if in head" code clone, only "-t" differs
5363     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5364 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5365 wakaba 1.46
5366 wakaba 1.52 unless ($self->{confident}) {
5367     if ($token->{attributes}->{charset}) { ## TODO: And if supported
5368 wakaba 1.79 !!!cp ('t335');
5369 wakaba 1.63 $self->{change_encoding}
5370     ->($self, $token->{attributes}->{charset}->{value});
5371 wakaba 1.66
5372     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
5373     ->set_user_data (manakai_has_reference =>
5374     $token->{attributes}->{charset}
5375     ->{has_reference});
5376 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
5377 wakaba 1.52 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
5378 wakaba 1.63 if ($token->{attributes}->{content}->{value}
5379 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
5380     [\x09-\x0D\x20]*=
5381 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
5382     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
5383 wakaba 1.79 !!!cp ('t336');
5384 wakaba 1.63 $self->{change_encoding}
5385     ->($self, defined $1 ? $1 : defined $2 ? $2 : $3);
5386 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
5387     ->set_user_data (manakai_has_reference =>
5388     $token->{attributes}->{content}
5389     ->{has_reference});
5390 wakaba 1.63 }
5391 wakaba 1.52 }
5392 wakaba 1.66 } else {
5393     if ($token->{attributes}->{charset}) {
5394 wakaba 1.79 !!!cp ('t337');
5395 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
5396     ->set_user_data (manakai_has_reference =>
5397     $token->{attributes}->{charset}
5398     ->{has_reference});
5399     }
5400 wakaba 1.68 if ($token->{attributes}->{content}) {
5401 wakaba 1.79 !!!cp ('t338');
5402 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
5403     ->set_user_data (manakai_has_reference =>
5404     $token->{attributes}->{content}
5405     ->{has_reference});
5406     }
5407 wakaba 1.52 }
5408 wakaba 1.1
5409 wakaba 1.52 !!!next-token;
5410 wakaba 1.53 redo B;
5411 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
5412 wakaba 1.79 !!!cp ('t341');
5413 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5414 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
5415 wakaba 1.53 redo B;
5416 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
5417     !!!parse-error (type => 'in body:body');
5418 wakaba 1.46
5419 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
5420     $self->{open_elements}->[1]->[1] ne 'body') {
5421 wakaba 1.79 !!!cp ('t342');
5422 wakaba 1.52 ## Ignore the token
5423     } else {
5424     my $body_el = $self->{open_elements}->[1]->[0];
5425     for my $attr_name (keys %{$token->{attributes}}) {
5426     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
5427 wakaba 1.79 !!!cp ('t343');
5428 wakaba 1.52 $body_el->set_attribute_ns
5429     (undef, [undef, $attr_name],
5430     $token->{attributes}->{$attr_name}->{value});
5431     }
5432     }
5433     }
5434     !!!next-token;
5435 wakaba 1.53 redo B;
5436 wakaba 1.52 } elsif ({
5437     address => 1, blockquote => 1, center => 1, dir => 1,
5438 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
5439     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5440 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
5441     pre => 1, listing => 1,
5442 wakaba 1.52 }->{$token->{tag_name}}) {
5443     ## has a p element in scope
5444     INSCOPE: for (reverse @{$self->{open_elements}}) {
5445     if ($_->[1] eq 'p') {
5446 wakaba 1.79 !!!cp ('t344');
5447 wakaba 1.52 !!!back-token;
5448 wakaba 1.55 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5449 wakaba 1.53 redo B;
5450 wakaba 1.52 } elsif ({
5451     table => 1, caption => 1, td => 1, th => 1,
5452     button => 1, marquee => 1, object => 1, html => 1,
5453     }->{$_->[1]}) {
5454 wakaba 1.79 !!!cp ('t345');
5455 wakaba 1.52 last INSCOPE;
5456     }
5457     } # INSCOPE
5458    
5459     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5460 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
5461 wakaba 1.52 !!!next-token;
5462 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5463 wakaba 1.52 $token->{data} =~ s/^\x0A//;
5464     unless (length $token->{data}) {
5465 wakaba 1.79 !!!cp ('t346');
5466 wakaba 1.1 !!!next-token;
5467 wakaba 1.79 } else {
5468     !!!cp ('t349');
5469 wakaba 1.52 }
5470 wakaba 1.79 } else {
5471     !!!cp ('t348');
5472 wakaba 1.52 }
5473     } else {
5474 wakaba 1.79 !!!cp ('t347');
5475 wakaba 1.52 !!!next-token;
5476     }
5477 wakaba 1.53 redo B;
5478 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
5479     if (defined $self->{form_element}) {
5480 wakaba 1.79 !!!cp ('t350');
5481 wakaba 1.52 !!!parse-error (type => 'in form:form');
5482     ## Ignore the token
5483     !!!next-token;
5484 wakaba 1.53 redo B;
5485 wakaba 1.52 } else {
5486     ## has a p element in scope
5487     INSCOPE: for (reverse @{$self->{open_elements}}) {
5488     if ($_->[1] eq 'p') {
5489 wakaba 1.79 !!!cp ('t351');
5490 wakaba 1.52 !!!back-token;
5491 wakaba 1.55 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5492 wakaba 1.53 redo B;
5493 wakaba 1.46 } elsif ({
5494 wakaba 1.52 table => 1, caption => 1, td => 1, th => 1,
5495     button => 1, marquee => 1, object => 1, html => 1,
5496     }->{$_->[1]}) {
5497 wakaba 1.79 !!!cp ('t352');
5498 wakaba 1.52 last INSCOPE;
5499     }
5500     } # INSCOPE
5501    
5502     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5503     $self->{form_element} = $self->{open_elements}->[-1]->[0];
5504     !!!next-token;
5505 wakaba 1.53 redo B;
5506 wakaba 1.52 }
5507     } elsif ($token->{tag_name} eq 'li') {
5508     ## has a p element in scope
5509     INSCOPE: for (reverse @{$self->{open_elements}}) {
5510     if ($_->[1] eq 'p') {
5511 wakaba 1.79 !!!cp ('t353');
5512 wakaba 1.52 !!!back-token;
5513 wakaba 1.55 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5514 wakaba 1.53 redo B;
5515 wakaba 1.52 } elsif ({
5516     table => 1, caption => 1, td => 1, th => 1,
5517     button => 1, marquee => 1, object => 1, html => 1,
5518     }->{$_->[1]}) {
5519 wakaba 1.79 !!!cp ('t354');
5520 wakaba 1.52 last INSCOPE;
5521     }
5522     } # INSCOPE
5523    
5524     ## Step 1
5525     my $i = -1;
5526     my $node = $self->{open_elements}->[$i];
5527     LI: {
5528     ## Step 2
5529     if ($node->[1] eq 'li') {
5530     if ($i != -1) {
5531 wakaba 1.79 !!!cp ('t355');
5532 wakaba 1.52 !!!parse-error (type => 'end tag missing:'.
5533     $self->{open_elements}->[-1]->[1]);
5534 wakaba 1.79 } else {
5535     !!!cp ('t356');
5536 wakaba 1.52 }
5537     splice @{$self->{open_elements}}, $i;
5538     last LI;
5539 wakaba 1.79 } else {
5540     !!!cp ('t357');
5541 wakaba 1.52 }
5542    
5543     ## Step 3
5544     if (not $formatting_category->{$node->[1]} and
5545     #not $phrasing_category->{$node->[1]} and
5546     ($special_category->{$node->[1]} or
5547     $scoping_category->{$node->[1]}) and
5548     $node->[1] ne 'address' and $node->[1] ne 'div') {
5549 wakaba 1.79 !!!cp ('t358');
5550 wakaba 1.52 last LI;
5551     }
5552    
5553 wakaba 1.79 !!!cp ('t359');
5554 wakaba 1.52 ## Step 4
5555     $i--;
5556     $node = $self->{open_elements}->[$i];
5557     redo LI;
5558     } # LI
5559    
5560     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5561     !!!next-token;
5562 wakaba 1.53 redo B;
5563 wakaba 1.52 } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
5564     ## has a p element in scope
5565     INSCOPE: for (reverse @{$self->{open_elements}}) {
5566     if ($_->[1] eq 'p') {
5567 wakaba 1.79 !!!cp ('t360');
5568 wakaba 1.52 !!!back-token;
5569 wakaba 1.55 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5570 wakaba 1.53 redo B;
5571 wakaba 1.52 } elsif ({
5572     table => 1, caption => 1, td => 1, th => 1,
5573     button => 1, marquee => 1, object => 1, html => 1,
5574     }->{$_->[1]}) {
5575 wakaba 1.79 !!!cp ('t361');
5576 wakaba 1.52 last INSCOPE;
5577     }
5578     } # INSCOPE
5579    
5580     ## Step 1
5581     my $i = -1;
5582     my $node = $self->{open_elements}->[$i];
5583     LI: {
5584     ## Step 2
5585     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
5586     if ($i != -1) {
5587 wakaba 1.79 !!!cp ('t362');
5588 wakaba 1.52 !!!parse-error (type => 'end tag missing:'.
5589     $self->{open_elements}->[-1]->[1]);
5590 wakaba 1.79 } else {
5591     !!!cp ('t363');
5592 wakaba 1.1 }
5593 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5594     last LI;
5595 wakaba 1.79 } else {
5596     !!!cp ('t364');
5597 wakaba 1.52 }
5598    
5599     ## Step 3
5600     if (not $formatting_category->{$node->[1]} and
5601     #not $phrasing_category->{$node->[1]} and
5602     ($special_category->{$node->[1]} or
5603     $scoping_category->{$node->[1]}) and
5604     $node->[1] ne 'address' and $node->[1] ne 'div') {
5605 wakaba 1.79 !!!cp ('t365');
5606 wakaba 1.52 last LI;
5607 wakaba 1.1 }
5608 wakaba 1.52
5609 wakaba 1.79 !!!cp ('t366');
5610 wakaba 1.52 ## Step 4
5611     $i--;
5612     $node = $self->{open_elements}->[$i];
5613     redo LI;
5614     } # LI
5615    
5616     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5617     !!!next-token;
5618 wakaba 1.53 redo B;
5619 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
5620     ## has a p element in scope
5621     INSCOPE: for (reverse @{$self->{open_elements}}) {
5622     if ($_->[1] eq 'p') {
5623 wakaba 1.79 !!!cp ('t367');
5624 wakaba 1.52 !!!back-token;
5625 wakaba 1.55 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5626 wakaba 1.53 redo B;
5627 wakaba 1.52 } elsif ({
5628     table => 1, caption => 1, td => 1, th => 1,
5629     button => 1, marquee => 1, object => 1, html => 1,
5630     }->{$_->[1]}) {
5631 wakaba 1.79 !!!cp ('t368');
5632 wakaba 1.52 last INSCOPE;
5633 wakaba 1.46 }
5634 wakaba 1.52 } # INSCOPE
5635    
5636     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5637    
5638     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
5639    
5640     !!!next-token;
5641 wakaba 1.53 redo B;
5642 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
5643     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
5644     my $node = $active_formatting_elements->[$i];
5645     if ($node->[1] eq 'a') {
5646 wakaba 1.79 !!!cp ('t371');
5647 wakaba 1.52 !!!parse-error (type => 'in a:a');
5648    
5649     !!!back-token;
5650 wakaba 1.55 $token = {type => END_TAG_TOKEN, tag_name => 'a'};
5651 wakaba 1.52 $formatting_end_tag->($token->{tag_name});
5652    
5653     AFE2: for (reverse 0..$#$active_formatting_elements) {
5654     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
5655 wakaba 1.79 !!!cp ('t372');
5656 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
5657     last AFE2;
5658 wakaba 1.1 }
5659 wakaba 1.52 } # AFE2
5660     OE: for (reverse 0..$#{$self->{open_elements}}) {
5661     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
5662 wakaba 1.79 !!!cp ('t373');
5663 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
5664     last OE;
5665 wakaba 1.1 }
5666 wakaba 1.52 } # OE
5667     last AFE;
5668     } elsif ($node->[0] eq '#marker') {
5669 wakaba 1.79 !!!cp ('t374');
5670 wakaba 1.52 last AFE;
5671     }
5672     } # AFE
5673    
5674     $reconstruct_active_formatting_elements->($insert_to_current);
5675 wakaba 1.1
5676 wakaba 1.52 !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5677     push @$active_formatting_elements, $self->{open_elements}->[-1];
5678 wakaba 1.1
5679 wakaba 1.52 !!!next-token;
5680 wakaba 1.53 redo B;
5681 wakaba 1.52 } elsif ({
5682     b => 1, big => 1, em => 1, font => 1, i => 1,
5683     s => 1, small => 1, strile => 1,
5684     strong => 1, tt => 1, u => 1,
5685     }->{$token->{tag_name}}) {
5686 wakaba 1.79 !!!cp ('t375');
5687 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
5688    
5689     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5690     push @$active_formatting_elements, $self->{open_elements}->[-1];
5691    
5692     !!!next-token;
5693 wakaba 1.53 redo B;
5694 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
5695     $reconstruct_active_formatting_elements->($insert_to_current);
5696 wakaba 1.1
5697 wakaba 1.52 ## has a |nobr| element in scope
5698     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5699     my $node = $self->{open_elements}->[$_];
5700     if ($node->[1] eq 'nobr') {
5701 wakaba 1.79 !!!cp ('t376');
5702 wakaba 1.58 !!!parse-error (type => 'in nobr:nobr');
5703 wakaba 1.52 !!!back-token;
5704 wakaba 1.55 $token = {type => END_TAG_TOKEN, tag_name => 'nobr'};
5705 wakaba 1.53 redo B;
5706 wakaba 1.52 } elsif ({
5707     table => 1, caption => 1, td => 1, th => 1,
5708     button => 1, marquee => 1, object => 1, html => 1,
5709     }->{$node->[1]}) {
5710 wakaba 1.79 !!!cp ('t377');
5711 wakaba 1.52 last INSCOPE;
5712     }
5713     } # INSCOPE
5714    
5715     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5716     push @$active_formatting_elements, $self->{open_elements}->[-1];
5717    
5718     !!!next-token;
5719 wakaba 1.53 redo B;
5720 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
5721     ## has a button element in scope
5722     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5723     my $node = $self->{open_elements}->[$_];
5724     if ($node->[1] eq 'button') {
5725 wakaba 1.79 !!!cp ('t378');
5726 wakaba 1.52 !!!parse-error (type => 'in button:button');
5727     !!!back-token;
5728 wakaba 1.55 $token = {type => END_TAG_TOKEN, tag_name => 'button'};
5729 wakaba 1.53 redo B;
5730 wakaba 1.52 } elsif ({
5731     table => 1, caption => 1, td => 1, th => 1,
5732     button => 1, marquee => 1, object => 1, html => 1,
5733     }->{$node->[1]}) {
5734 wakaba 1.79 !!!cp ('t379');
5735 wakaba 1.52 last INSCOPE;
5736     }
5737     } # INSCOPE
5738    
5739     $reconstruct_active_formatting_elements->($insert_to_current);
5740    
5741     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5742 wakaba 1.85
5743     ## TODO: associate with $self->{form_element} if defined
5744    
5745 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
5746 wakaba 1.1
5747 wakaba 1.52 !!!next-token;
5748 wakaba 1.53 redo B;
5749 wakaba 1.52 } elsif ($token->{tag_name} eq 'marquee' or
5750     $token->{tag_name} eq 'object') {
5751 wakaba 1.79 !!!cp ('t380');
5752 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
5753    
5754     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5755     push @$active_formatting_elements, ['#marker', ''];
5756    
5757     !!!next-token;
5758 wakaba 1.53 redo B;
5759 wakaba 1.52 } elsif ($token->{tag_name} eq 'xmp') {
5760 wakaba 1.79 !!!cp ('t381');
5761 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
5762 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5763 wakaba 1.53 redo B;
5764 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5765     ## has a p element in scope
5766     INSCOPE: for (reverse @{$self->{open_elements}}) {
5767     if ($_->[1] eq 'p') {
5768 wakaba 1.79 !!!cp ('t382');
5769 wakaba 1.52 !!!back-token;
5770 wakaba 1.55 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5771 wakaba 1.53 redo B;
5772 wakaba 1.52 } elsif ({
5773     table => 1, caption => 1, td => 1, th => 1,
5774     button => 1, marquee => 1, object => 1, html => 1,
5775     }->{$_->[1]}) {
5776 wakaba 1.79 !!!cp ('t383');
5777 wakaba 1.52 last INSCOPE;
5778     }
5779     } # INSCOPE
5780    
5781     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5782 wakaba 1.95 push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
5783    
5784 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5785 wakaba 1.52
5786     !!!next-token;
5787 wakaba 1.53 redo B;
5788 wakaba 1.52 } elsif ({
5789     area => 1, basefont => 1, bgsound => 1, br => 1,
5790     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
5791     image => 1,
5792     }->{$token->{tag_name}}) {
5793     if ($token->{tag_name} eq 'image') {
5794 wakaba 1.79 !!!cp ('t384');
5795 wakaba 1.52 !!!parse-error (type => 'image');
5796     $token->{tag_name} = 'img';
5797 wakaba 1.79 } else {
5798     !!!cp ('t385');
5799 wakaba 1.52 }
5800 wakaba 1.1
5801 wakaba 1.52 ## NOTE: There is an "as if <br>" code clone.
5802     $reconstruct_active_formatting_elements->($insert_to_current);
5803    
5804     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5805     pop @{$self->{open_elements}};
5806    
5807     !!!next-token;
5808 wakaba 1.53 redo B;
5809 wakaba 1.52 } elsif ($token->{tag_name} eq 'hr') {
5810     ## has a p element in scope
5811     INSCOPE: for (reverse @{$self->{open_elements}}) {
5812     if ($_->[1] eq 'p') {
5813 wakaba 1.79 !!!cp ('t386');
5814 wakaba 1.52 !!!back-token;
5815 wakaba 1.55 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5816 wakaba 1.53 redo B;
5817 wakaba 1.52 } elsif ({
5818     table => 1, caption => 1, td => 1, th => 1,
5819     button => 1, marquee => 1, object => 1, html => 1,
5820     }->{$_->[1]}) {
5821 wakaba 1.79 !!!cp ('t387');
5822 wakaba 1.52 last INSCOPE;
5823     }
5824     } # INSCOPE
5825    
5826     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5827     pop @{$self->{open_elements}};
5828    
5829     !!!next-token;
5830 wakaba 1.53 redo B;
5831 wakaba 1.52 } elsif ($token->{tag_name} eq 'input') {
5832 wakaba 1.79 !!!cp ('t388');
5833 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
5834    
5835     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5836     ## TODO: associate with $self->{form_element} if defined
5837     pop @{$self->{open_elements}};
5838    
5839     !!!next-token;
5840 wakaba 1.53 redo B;
5841 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
5842     !!!parse-error (type => 'isindex');
5843    
5844     if (defined $self->{form_element}) {
5845 wakaba 1.79 !!!cp ('t389');
5846 wakaba 1.52 ## Ignore the token
5847     !!!next-token;
5848 wakaba 1.53 redo B;
5849 wakaba 1.52 } else {
5850     my $at = $token->{attributes};
5851     my $form_attrs;
5852     $form_attrs->{action} = $at->{action} if $at->{action};
5853     my $prompt_attr = $at->{prompt};
5854     $at->{name} = {name => 'name', value => 'isindex'};
5855     delete $at->{action};
5856     delete $at->{prompt};
5857     my @tokens = (
5858 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
5859 wakaba 1.52 attributes => $form_attrs},
5860 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'hr'},
5861     {type => START_TAG_TOKEN, tag_name => 'p'},
5862     {type => START_TAG_TOKEN, tag_name => 'label'},
5863 wakaba 1.52 );
5864     if ($prompt_attr) {
5865 wakaba 1.79 !!!cp ('t390');
5866 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value}};
5867 wakaba 1.1 } else {
5868 wakaba 1.79 !!!cp ('t391');
5869 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
5870 wakaba 1.52 data => 'This is a searchable index. Insert your search keywords here: '}; # SHOULD
5871     ## TODO: make this configurable
5872 wakaba 1.1 }
5873 wakaba 1.52 push @tokens,
5874 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at},
5875     #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
5876     {type => END_TAG_TOKEN, tag_name => 'label'},
5877     {type => END_TAG_TOKEN, tag_name => 'p'},
5878     {type => START_TAG_TOKEN, tag_name => 'hr'},
5879     {type => END_TAG_TOKEN, tag_name => 'form'};
5880 wakaba 1.52 $token = shift @tokens;
5881     !!!back-token (@tokens);
5882 wakaba 1.53 redo B;
5883 wakaba 1.52 }
5884     } elsif ($token->{tag_name} eq 'textarea') {
5885     my $tag_name = $token->{tag_name};
5886     my $el;
5887     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
5888    
5889     ## TODO: $self->{form_element} if defined
5890     $self->{content_model} = RCDATA_CONTENT_MODEL;
5891     delete $self->{escape}; # MUST
5892    
5893     $insert->($el);
5894    
5895     my $text = '';
5896     !!!next-token;
5897 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5898 wakaba 1.52 $token->{data} =~ s/^\x0A//;
5899 wakaba 1.51 unless (length $token->{data}) {
5900 wakaba 1.79 !!!cp ('t392');
5901 wakaba 1.51 !!!next-token;
5902 wakaba 1.79 } else {
5903     !!!cp ('t393');
5904 wakaba 1.51 }
5905 wakaba 1.79 } else {
5906     !!!cp ('t394');
5907 wakaba 1.51 }
5908 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
5909 wakaba 1.79 !!!cp ('t395');
5910 wakaba 1.52 $text .= $token->{data};
5911     !!!next-token;
5912     }
5913     if (length $text) {
5914 wakaba 1.79 !!!cp ('t396');
5915 wakaba 1.52 $el->manakai_append_text ($text);
5916     }
5917    
5918     $self->{content_model} = PCDATA_CONTENT_MODEL;
5919 wakaba 1.51
5920 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
5921 wakaba 1.52 $token->{tag_name} eq $tag_name) {
5922 wakaba 1.79 !!!cp ('t397');
5923 wakaba 1.52 ## Ignore the token
5924     } else {
5925 wakaba 1.79 !!!cp ('t398');
5926 wakaba 1.52 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
5927 wakaba 1.51 }
5928 wakaba 1.52 !!!next-token;
5929 wakaba 1.53 redo B;
5930 wakaba 1.52 } elsif ({
5931     iframe => 1,
5932     noembed => 1,
5933     noframes => 1,
5934     noscript => 0, ## TODO: 1 if scripting is enabled
5935     }->{$token->{tag_name}}) {
5936 wakaba 1.79 !!!cp ('t399');
5937 wakaba 1.58 ## NOTE: There is an "as if in body" code clone.
5938 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5939 wakaba 1.53 redo B;
5940 wakaba 1.52 } elsif ($token->{tag_name} eq 'select') {
5941 wakaba 1.79 !!!cp ('t400');
5942 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
5943    
5944     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5945 wakaba 1.85
5946     ## TODO: associate with $self->{form_element} if defined
5947 wakaba 1.52
5948 wakaba 1.54 $self->{insertion_mode} = IN_SELECT_IM;
5949 wakaba 1.52 !!!next-token;
5950 wakaba 1.53 redo B;
5951 wakaba 1.52 } elsif ({
5952     caption => 1, col => 1, colgroup => 1, frame => 1,
5953     frameset => 1, head => 1, option => 1, optgroup => 1,
5954     tbody => 1, td => 1, tfoot => 1, th => 1,
5955     thead => 1, tr => 1,
5956     }->{$token->{tag_name}}) {
5957 wakaba 1.79 !!!cp ('t401');
5958 wakaba 1.52 !!!parse-error (type => 'in body:'.$token->{tag_name});
5959     ## Ignore the token
5960     !!!next-token;
5961 wakaba 1.53 redo B;
5962 wakaba 1.52
5963     ## ISSUE: An issue on HTML5 new elements in the spec.
5964     } else {
5965 wakaba 1.79 !!!cp ('t402');
5966 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
5967    
5968     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
5969 wakaba 1.51
5970 wakaba 1.52 !!!next-token;
5971 wakaba 1.53 redo B;
5972 wakaba 1.52 }
5973 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5974 wakaba 1.52 if ($token->{tag_name} eq 'body') {
5975     if (@{$self->{open_elements}} > 1 and
5976     $self->{open_elements}->[1]->[1] eq 'body') {
5977     for (@{$self->{open_elements}}) {
5978     unless ({
5979     dd => 1, dt => 1, li => 1, p => 1, td => 1,
5980     th => 1, tr => 1, body => 1, html => 1,
5981     tbody => 1, tfoot => 1, thead => 1,
5982     }->{$_->[1]}) {
5983 wakaba 1.79 !!!cp ('t403');
5984 wakaba 1.52 !!!parse-error (type => 'not closed:'.$_->[1]);
5985 wakaba 1.79 } else {
5986     !!!cp ('t404');
5987 wakaba 1.52 }
5988     }
5989 wakaba 1.51
5990 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5991 wakaba 1.52 !!!next-token;
5992 wakaba 1.53 redo B;
5993 wakaba 1.52 } else {
5994 wakaba 1.79 !!!cp ('t405');
5995 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
5996     ## Ignore the token
5997     !!!next-token;
5998 wakaba 1.53 redo B;
5999 wakaba 1.51 }
6000 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
6001     if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
6002     ## ISSUE: There is an issue in the spec.
6003     if ($self->{open_elements}->[-1]->[1] ne 'body') {
6004 wakaba 1.79 !!!cp ('t406');
6005 wakaba 1.52 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
6006 wakaba 1.79 } else {
6007     !!!cp ('t407');
6008 wakaba 1.1 }
6009 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6010 wakaba 1.52 ## reprocess
6011 wakaba 1.53 redo B;
6012 wakaba 1.51 } else {
6013 wakaba 1.79 !!!cp ('t408');
6014 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
6015     ## Ignore the token
6016     !!!next-token;
6017 wakaba 1.53 redo B;
6018 wakaba 1.51 }
6019 wakaba 1.52 } elsif ({
6020     address => 1, blockquote => 1, center => 1, dir => 1,
6021     div => 1, dl => 1, fieldset => 1, listing => 1,
6022     menu => 1, ol => 1, pre => 1, ul => 1,
6023     dd => 1, dt => 1, li => 1,
6024     button => 1, marquee => 1, object => 1,
6025     }->{$token->{tag_name}}) {
6026     ## has an element in scope
6027     my $i;
6028     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6029     my $node = $self->{open_elements}->[$_];
6030     if ($node->[1] eq $token->{tag_name}) {
6031 wakaba 1.79 !!!cp ('t410');
6032 wakaba 1.52 $i = $_;
6033 wakaba 1.87 last INSCOPE;
6034 wakaba 1.52 } elsif ({
6035     table => 1, caption => 1, td => 1, th => 1,
6036     button => 1, marquee => 1, object => 1, html => 1,
6037     }->{$node->[1]}) {
6038 wakaba 1.79 !!!cp ('t411');
6039 wakaba 1.52 last INSCOPE;
6040 wakaba 1.51 }
6041 wakaba 1.52 } # INSCOPE
6042 wakaba 1.89
6043     unless (defined $i) { # has an element in scope
6044     !!!cp ('t413');
6045     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
6046     } else {
6047     ## Step 1. generate implied end tags
6048     while ({
6049     dd => ($token->{tag_name} ne 'dd'),
6050     dt => ($token->{tag_name} ne 'dt'),
6051     li => ($token->{tag_name} ne 'li'),
6052     p => 1,
6053     }->{$self->{open_elements}->[-1]->[1]}) {
6054     !!!cp ('t409');
6055     pop @{$self->{open_elements}};
6056     }
6057    
6058     ## Step 2.
6059     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6060 wakaba 1.79 !!!cp ('t412');
6061 wakaba 1.52 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
6062 wakaba 1.51 } else {
6063 wakaba 1.89 !!!cp ('t414');
6064 wakaba 1.51 }
6065 wakaba 1.89
6066     ## Step 3.
6067 wakaba 1.52 splice @{$self->{open_elements}}, $i;
6068 wakaba 1.89
6069     ## Step 4.
6070     $clear_up_to_marker->()
6071     if {
6072     button => 1, marquee => 1, object => 1,
6073     }->{$token->{tag_name}};
6074 wakaba 1.51 }
6075 wakaba 1.52 !!!next-token;
6076 wakaba 1.53 redo B;
6077 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
6078 wakaba 1.92 undef $self->{form_element};
6079    
6080 wakaba 1.52 ## has an element in scope
6081 wakaba 1.92 my $i;
6082 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6083     my $node = $self->{open_elements}->[$_];
6084     if ($node->[1] eq $token->{tag_name}) {
6085 wakaba 1.79 !!!cp ('t418');
6086 wakaba 1.92 $i = $_;
6087 wakaba 1.52 last INSCOPE;
6088     } elsif ({
6089     table => 1, caption => 1, td => 1, th => 1,
6090     button => 1, marquee => 1, object => 1, html => 1,
6091     }->{$node->[1]}) {
6092 wakaba 1.79 !!!cp ('t419');
6093 wakaba 1.52 last INSCOPE;
6094     }
6095     } # INSCOPE
6096 wakaba 1.92
6097     unless (defined $i) { # has an element in scope
6098 wakaba 1.79 !!!cp ('t421');
6099 wakaba 1.58 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
6100 wakaba 1.92 } else {
6101     ## Step 1. generate implied end tags
6102     while ({
6103     dd => 1, dt => 1, li => 1, p => 1,
6104     }->{$self->{open_elements}->[-1]->[1]}) {
6105     !!!cp ('t417');
6106     pop @{$self->{open_elements}};
6107     }
6108    
6109     ## Step 2.
6110     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6111     !!!cp ('t417.1');
6112     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
6113     } else {
6114     !!!cp ('t420');
6115     }
6116    
6117     ## Step 3.
6118     splice @{$self->{open_elements}}, $i;
6119 wakaba 1.52 }
6120    
6121     !!!next-token;
6122 wakaba 1.53 redo B;
6123 wakaba 1.52 } elsif ({
6124     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6125     }->{$token->{tag_name}}) {
6126     ## has an element in scope
6127     my $i;
6128     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6129     my $node = $self->{open_elements}->[$_];
6130     if ({
6131     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6132     }->{$node->[1]}) {
6133 wakaba 1.79 !!!cp ('t423');
6134 wakaba 1.52 $i = $_;
6135     last INSCOPE;
6136     } elsif ({
6137     table => 1, caption => 1, td => 1, th => 1,
6138     button => 1, marquee => 1, object => 1, html => 1,
6139     }->{$node->[1]}) {
6140 wakaba 1.79 !!!cp ('t424');
6141 wakaba 1.52 last INSCOPE;
6142 wakaba 1.51 }
6143 wakaba 1.52 } # INSCOPE
6144 wakaba 1.93
6145     unless (defined $i) { # has an element in scope
6146     !!!cp ('t425.1');
6147 wakaba 1.58 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
6148 wakaba 1.79 } else {
6149 wakaba 1.93 ## Step 1. generate implied end tags
6150     while ({
6151     dd => 1, dt => 1, li => 1, p => 1,
6152     }->{$self->{open_elements}->[-1]->[1]}) {
6153     !!!cp ('t422');
6154     pop @{$self->{open_elements}};
6155     }
6156    
6157     ## Step 2.
6158     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6159     !!!cp ('t425');
6160     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
6161     } else {
6162     !!!cp ('t426');
6163     }
6164    
6165     ## Step 3.
6166     splice @{$self->{open_elements}}, $i;
6167 wakaba 1.36 }
6168 wakaba 1.52
6169     !!!next-token;
6170 wakaba 1.53 redo B;
6171 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
6172     ## has an element in scope
6173     my $i;
6174     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6175     my $node = $self->{open_elements}->[$_];
6176     if ($node->[1] eq $token->{tag_name}) {
6177     !!!cp ('t410.1');
6178     $i = $_;
6179 wakaba 1.88 last INSCOPE;
6180 wakaba 1.87 } elsif ({
6181     table => 1, caption => 1, td => 1, th => 1,
6182     button => 1, marquee => 1, object => 1, html => 1,
6183     }->{$node->[1]}) {
6184     !!!cp ('t411.1');
6185     last INSCOPE;
6186     }
6187     } # INSCOPE
6188 wakaba 1.91
6189     if (defined $i) {
6190     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6191 wakaba 1.87 !!!cp ('t412.1');
6192     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
6193     } else {
6194 wakaba 1.91 !!!cp ('t414.1');
6195 wakaba 1.87 }
6196 wakaba 1.91
6197 wakaba 1.87 splice @{$self->{open_elements}}, $i;
6198     } else {
6199 wakaba 1.91 !!!cp ('t413.1');
6200     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
6201    
6202 wakaba 1.87 !!!cp ('t415.1');
6203     ## As if <p>, then reprocess the current token
6204     my $el;
6205     !!!create-element ($el, 'p');
6206     $insert->($el);
6207 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
6208 wakaba 1.87 }
6209 wakaba 1.91
6210 wakaba 1.87 !!!next-token;
6211     redo B;
6212 wakaba 1.52 } elsif ({
6213     a => 1,
6214     b => 1, big => 1, em => 1, font => 1, i => 1,
6215     nobr => 1, s => 1, small => 1, strile => 1,
6216     strong => 1, tt => 1, u => 1,
6217     }->{$token->{tag_name}}) {
6218 wakaba 1.79 !!!cp ('t427');
6219 wakaba 1.52 $formatting_end_tag->($token->{tag_name});
6220 wakaba 1.53 redo B;
6221 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
6222 wakaba 1.79 !!!cp ('t428');
6223 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:br');
6224    
6225     ## As if <br>
6226     $reconstruct_active_formatting_elements->($insert_to_current);
6227    
6228     my $el;
6229     !!!create-element ($el, 'br');
6230     $insert->($el);
6231    
6232     ## Ignore the token.
6233     !!!next-token;
6234 wakaba 1.53 redo B;
6235 wakaba 1.52 } elsif ({
6236     caption => 1, col => 1, colgroup => 1, frame => 1,
6237     frameset => 1, head => 1, option => 1, optgroup => 1,
6238     tbody => 1, td => 1, tfoot => 1, th => 1,
6239     thead => 1, tr => 1,
6240     area => 1, basefont => 1, bgsound => 1,
6241     embed => 1, hr => 1, iframe => 1, image => 1,
6242     img => 1, input => 1, isindex => 1, noembed => 1,
6243     noframes => 1, param => 1, select => 1, spacer => 1,
6244     table => 1, textarea => 1, wbr => 1,
6245     noscript => 0, ## TODO: if scripting is enabled
6246     }->{$token->{tag_name}}) {
6247 wakaba 1.79 !!!cp ('t429');
6248 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
6249     ## Ignore the token
6250     !!!next-token;
6251 wakaba 1.53 redo B;
6252 wakaba 1.52
6253     ## ISSUE: Issue on HTML5 new elements in spec
6254    
6255     } else {
6256     ## Step 1
6257     my $node_i = -1;
6258     my $node = $self->{open_elements}->[$node_i];
6259 wakaba 1.51
6260 wakaba 1.52 ## Step 2
6261     S2: {
6262     if ($node->[1] eq $token->{tag_name}) {
6263     ## Step 1
6264     ## generate implied end tags
6265 wakaba 1.86 while ({
6266     dd => 1, dt => 1, li => 1, p => 1,
6267     }->{$self->{open_elements}->[-1]->[1]}) {
6268 wakaba 1.79 !!!cp ('t430');
6269 wakaba 1.83 ## ISSUE: Can this case be reached?
6270 wakaba 1.86 pop @{$self->{open_elements}};
6271 wakaba 1.52 }
6272    
6273     ## Step 2
6274     if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
6275 wakaba 1.79 !!!cp ('t431');
6276 wakaba 1.58 ## NOTE: <x><y></x>
6277 wakaba 1.52 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
6278 wakaba 1.79 } else {
6279     !!!cp ('t432');
6280 wakaba 1.52 }
6281    
6282     ## Step 3
6283     splice @{$self->{open_elements}}, $node_i;
6284 wakaba 1.51
6285 wakaba 1.1 !!!next-token;
6286 wakaba 1.52 last S2;
6287 wakaba 1.1 } else {
6288 wakaba 1.52 ## Step 3
6289     if (not $formatting_category->{$node->[1]} and
6290     #not $phrasing_category->{$node->[1]} and
6291     ($special_category->{$node->[1]} or
6292     $scoping_category->{$node->[1]})) {
6293 wakaba 1.79 !!!cp ('t433');
6294 wakaba 1.52 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
6295     ## Ignore the token
6296     !!!next-token;
6297     last S2;
6298     }
6299 wakaba 1.79
6300     !!!cp ('t434');
6301 wakaba 1.1 }
6302 wakaba 1.52
6303     ## Step 4
6304     $node_i--;
6305     $node = $self->{open_elements}->[$node_i];
6306    
6307     ## Step 5;
6308     redo S2;
6309     } # S2
6310 wakaba 1.53 redo B;
6311 wakaba 1.1 }
6312     }
6313 wakaba 1.52 redo B;
6314 wakaba 1.1 } # B
6315    
6316     ## Stop parsing # MUST
6317    
6318     ## TODO: script stuffs
6319 wakaba 1.3 } # _tree_construct_main
6320    
6321     sub set_inner_html ($$$) {
6322     my $class = shift;
6323     my $node = shift;
6324     my $s = \$_[0];
6325     my $onerror = $_[1];
6326    
6327 wakaba 1.63 ## ISSUE: Should {confident} be true?
6328    
6329 wakaba 1.3 my $nt = $node->node_type;
6330     if ($nt == 9) {
6331     # MUST
6332    
6333     ## Step 1 # MUST
6334     ## TODO: If the document has an active parser, ...
6335     ## ISSUE: There is an issue in the spec.
6336    
6337     ## Step 2 # MUST
6338     my @cn = @{$node->child_nodes};
6339     for (@cn) {
6340     $node->remove_child ($_);
6341     }
6342    
6343     ## Step 3, 4, 5 # MUST
6344     $class->parse_string ($$s => $node, $onerror);
6345     } elsif ($nt == 1) {
6346     ## TODO: If non-html element
6347    
6348     ## NOTE: Most of this code is copied from |parse_string|
6349    
6350     ## Step 1 # MUST
6351 wakaba 1.14 my $this_doc = $node->owner_document;
6352     my $doc = $this_doc->implementation->create_document;
6353 wakaba 1.18 $doc->manakai_is_html (1);
6354 wakaba 1.3 my $p = $class->new;
6355     $p->{document} = $doc;
6356    
6357 wakaba 1.84 ## Step 8 # MUST
6358 wakaba 1.3 my $i = 0;
6359     my $line = 1;
6360     my $column = 0;
6361 wakaba 1.76 $p->{set_next_char} = sub {
6362 wakaba 1.3 my $self = shift;
6363 wakaba 1.14
6364 wakaba 1.76 pop @{$self->{prev_char}};
6365     unshift @{$self->{prev_char}}, $self->{next_char};
6366 wakaba 1.14
6367 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
6368     $self->{next_char} = ord substr $$s, $i++, 1;
6369 wakaba 1.3 $column++;
6370 wakaba 1.4
6371 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
6372 wakaba 1.4 $line++;
6373     $column = 0;
6374 wakaba 1.79 !!!cp ('i1');
6375 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
6376 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6377 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
6378 wakaba 1.3 $line++;
6379 wakaba 1.4 $column = 0;
6380 wakaba 1.79 !!!cp ('i2');
6381 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
6382     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6383 wakaba 1.79 !!!cp ('i3');
6384 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
6385 wakaba 1.79 !!!cp ('i4');
6386 wakaba 1.14 !!!parse-error (type => 'NULL');
6387 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6388 wakaba 1.3 }
6389     };
6390 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
6391     $p->{next_char} = -1;
6392 wakaba 1.3
6393     my $ponerror = $onerror || sub {
6394     my (%opt) = @_;
6395     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
6396     };
6397     $p->{parse_error} = sub {
6398     $ponerror->(@_, line => $line, column => $column);
6399     };
6400    
6401     $p->_initialize_tokenizer;
6402     $p->_initialize_tree_constructor;
6403    
6404     ## Step 2
6405 wakaba 1.71 my $node_ln = $node->manakai_local_name;
6406 wakaba 1.40 $p->{content_model} = {
6407     title => RCDATA_CONTENT_MODEL,
6408     textarea => RCDATA_CONTENT_MODEL,
6409     style => CDATA_CONTENT_MODEL,
6410     script => CDATA_CONTENT_MODEL,
6411     xmp => CDATA_CONTENT_MODEL,
6412     iframe => CDATA_CONTENT_MODEL,
6413     noembed => CDATA_CONTENT_MODEL,
6414     noframes => CDATA_CONTENT_MODEL,
6415     noscript => CDATA_CONTENT_MODEL,
6416     plaintext => PLAINTEXT_CONTENT_MODEL,
6417     }->{$node_ln};
6418     $p->{content_model} = PCDATA_CONTENT_MODEL
6419     unless defined $p->{content_model};
6420     ## ISSUE: What is "the name of the element"? local name?
6421 wakaba 1.3
6422     $p->{inner_html_node} = [$node, $node_ln];
6423    
6424 wakaba 1.84 ## Step 3
6425 wakaba 1.3 my $root = $doc->create_element_ns
6426     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6427    
6428 wakaba 1.84 ## Step 4 # MUST
6429 wakaba 1.3 $doc->append_child ($root);
6430    
6431 wakaba 1.84 ## Step 5 # MUST
6432 wakaba 1.3 push @{$p->{open_elements}}, [$root, 'html'];
6433    
6434     undef $p->{head_element};
6435    
6436 wakaba 1.84 ## Step 6 # MUST
6437 wakaba 1.3 $p->_reset_insertion_mode;
6438    
6439 wakaba 1.84 ## Step 7 # MUST
6440 wakaba 1.3 my $anode = $node;
6441     AN: while (defined $anode) {
6442     if ($anode->node_type == 1) {
6443     my $nsuri = $anode->namespace_uri;
6444     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6445 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
6446 wakaba 1.79 !!!cp ('i5');
6447 wakaba 1.3 $p->{form_element} = $anode;
6448     last AN;
6449     }
6450     }
6451     }
6452     $anode = $anode->parent_node;
6453     } # AN
6454    
6455 wakaba 1.84 ## Step 9 # MUST
6456 wakaba 1.3 {
6457     my $self = $p;
6458     !!!next-token;
6459     }
6460     $p->_tree_construction_main;
6461    
6462 wakaba 1.84 ## Step 10 # MUST
6463 wakaba 1.3 my @cn = @{$node->child_nodes};
6464     for (@cn) {
6465     $node->remove_child ($_);
6466     }
6467     ## ISSUE: mutation events? read-only?
6468    
6469 wakaba 1.84 ## Step 11 # MUST
6470 wakaba 1.3 @cn = @{$root->child_nodes};
6471     for (@cn) {
6472 wakaba 1.14 $this_doc->adopt_node ($_);
6473 wakaba 1.3 $node->append_child ($_);
6474     }
6475 wakaba 1.14 ## ISSUE: mutation events?
6476 wakaba 1.3
6477     $p->_terminate_tree_constructor;
6478     } else {
6479     die "$0: |set_inner_html| is not defined for node of type $nt";
6480     }
6481     } # set_inner_html
6482    
6483     } # tree construction stage
6484 wakaba 1.1
6485 wakaba 1.63 package Whatpm::HTML::RestartParser;
6486     push our @ISA, 'Error';
6487    
6488 wakaba 1.1 1;
6489 wakaba 1.100 # $Date: 2008/03/09 03:46:43 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24