/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.117 - (hide annotations) (download) (as text)
Wed Mar 19 23:43:47 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.116: +2 -32 lines
File MIME type: application/x-wais-source
++ whatpm/Whatpm/ChangeLog	19 Mar 2008 23:42:08 -0000
2008-03-20  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src (_get_next_token): Remove |first_start_tag|
	flag, which is no longer used.

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.117 our $VERSION=do{my @r=(q$Revision: 1.116 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.70 ## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263)
12     ## TODO: 1252 parse error (revision 1264)
13     ## TODO: 8859-11 = 874 (revision 1271)
14    
15 wakaba 1.1 my $permitted_slash_tag_name = {
16     base => 1,
17     link => 1,
18     meta => 1,
19     hr => 1,
20     br => 1,
21 wakaba 1.71 img => 1,
22 wakaba 1.1 embed => 1,
23     param => 1,
24     area => 1,
25     col => 1,
26     input => 1,
27     };
28    
29 wakaba 1.4 my $c1_entity_char = {
30 wakaba 1.10 0x80 => 0x20AC,
31     0x81 => 0xFFFD,
32     0x82 => 0x201A,
33     0x83 => 0x0192,
34     0x84 => 0x201E,
35     0x85 => 0x2026,
36     0x86 => 0x2020,
37     0x87 => 0x2021,
38     0x88 => 0x02C6,
39     0x89 => 0x2030,
40     0x8A => 0x0160,
41     0x8B => 0x2039,
42     0x8C => 0x0152,
43     0x8D => 0xFFFD,
44     0x8E => 0x017D,
45     0x8F => 0xFFFD,
46     0x90 => 0xFFFD,
47     0x91 => 0x2018,
48     0x92 => 0x2019,
49     0x93 => 0x201C,
50     0x94 => 0x201D,
51     0x95 => 0x2022,
52     0x96 => 0x2013,
53     0x97 => 0x2014,
54     0x98 => 0x02DC,
55     0x99 => 0x2122,
56     0x9A => 0x0161,
57     0x9B => 0x203A,
58     0x9C => 0x0153,
59     0x9D => 0xFFFD,
60     0x9E => 0x017E,
61     0x9F => 0x0178,
62 wakaba 1.4 }; # $c1_entity_char
63 wakaba 1.1
64     my $special_category = {
65     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
66     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
67     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
68     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
69     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
70     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
71     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
72     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
73     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
74     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
75     };
76     my $scoping_category = {
77 wakaba 1.103 applet => 1, button => 1, caption => 1, html => 1, marquee => 1, object => 1,
78 wakaba 1.1 table => 1, td => 1, th => 1,
79     };
80     my $formatting_category = {
81     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
82     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
83     };
84     # $phrasing_category: all other elements
85    
86 wakaba 1.63 sub parse_byte_string ($$$$;$) {
87     my $self = ref $_[0] ? shift : shift->new;
88     my $charset = shift;
89     my $bytes_s = ref $_[0] ? $_[0] : \($_[0]);
90     my $s;
91    
92     if (defined $charset) {
93 wakaba 1.64 require Encode; ## TODO: decode(utf8) don't delete BOM
94 wakaba 1.63 $s = \ (Encode::decode ($charset, $$bytes_s));
95 wakaba 1.64 $self->{input_encoding} = lc $charset; ## TODO: normalize name
96 wakaba 1.63 $self->{confident} = 1;
97     } else {
98 wakaba 1.65 ## TODO: Implement HTML5 detection algorithm
99     require Whatpm::Charset::UniversalCharDet;
100     $charset = Whatpm::Charset::UniversalCharDet->detect_byte_string
101     (substr ($$bytes_s, 0, 1024));
102     $charset ||= 'windows-1252';
103 wakaba 1.64 $s = \ (Encode::decode ($charset, $$bytes_s));
104     $self->{input_encoding} = $charset;
105 wakaba 1.63 $self->{confident} = 0;
106     }
107    
108     $self->{change_encoding} = sub {
109     my $self = shift;
110     my $charset = lc shift;
111 wakaba 1.114 my $token = shift;
112 wakaba 1.63 ## TODO: if $charset is supported
113     ## TODO: normalize charset name
114    
115     ## "Change the encoding" algorithm:
116    
117     ## Step 1
118     if ($charset eq 'utf-16') { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8?
119     $charset = 'utf-8';
120     }
121    
122     ## Step 2
123     if (defined $self->{input_encoding} and
124     $self->{input_encoding} eq $charset) {
125     $self->{confident} = 1;
126     return;
127     }
128    
129 wakaba 1.64 !!!parse-error (type => 'charset label detected:'.$self->{input_encoding}.
130 wakaba 1.114 ':'.$charset, level => 'w', token => $token);
131 wakaba 1.63
132     ## Step 3
133     # if (can) {
134     ## change the encoding on the fly.
135     #$self->{confident} = 1;
136     #return;
137     # }
138    
139     ## Step 4
140     throw Whatpm::HTML::RestartParser (charset => $charset);
141     }; # $self->{change_encoding}
142    
143     my @args = @_; shift @args; # $s
144     my $return;
145     try {
146     $return = $self->parse_char_string ($s, @args);
147     } catch Whatpm::HTML::RestartParser with {
148     my $charset = shift->{charset};
149     $s = \ (Encode::decode ($charset, $$bytes_s));
150 wakaba 1.64 $self->{input_encoding} = $charset; ## TODO: normalize
151 wakaba 1.63 $self->{confident} = 1;
152     $return = $self->parse_char_string ($s, @args);
153     };
154     return $return;
155     } # parse_byte_string
156    
157 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
158     ## and the HTML layer MUST ignore it. However, we does strip BOM in
159     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
160     ## because the core part of our HTML parser expects a string of character,
161     ## not a string of bytes or code units or anything which might contain a BOM.
162     ## Therefore, any parser interface that accepts a string of bytes,
163     ## such as |parse_byte_string| in this module, must ensure that it does
164     ## strip the BOM and never strip any ZWNBSP.
165    
166 wakaba 1.63 *parse_char_string = \&parse_string;
167    
168 wakaba 1.1 sub parse_string ($$$;$) {
169 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
170     my $s = ref $_[0] ? $_[0] : \($_[0]);
171 wakaba 1.1 $self->{document} = $_[1];
172 wakaba 1.63 @{$self->{document}->child_nodes} = ();
173 wakaba 1.1
174 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
175    
176 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
177 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
178     if defined $self->{input_encoding};
179 wakaba 1.63
180 wakaba 1.1 my $i = 0;
181 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
182     $self->{column_prev} = $self->{column} = 0;
183 wakaba 1.76 $self->{set_next_char} = sub {
184 wakaba 1.1 my $self = shift;
185 wakaba 1.13
186 wakaba 1.76 pop @{$self->{prev_char}};
187     unshift @{$self->{prev_char}}, $self->{next_char};
188 wakaba 1.13
189 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
190     $self->{next_char} = ord substr $$s, $i++, 1;
191 wakaba 1.112
192     ($self->{line_prev}, $self->{column_prev})
193     = ($self->{line}, $self->{column});
194     $self->{column}++;
195 wakaba 1.1
196 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
197 wakaba 1.112 $self->{line}++;
198     $self->{column} = 0;
199 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
200 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
201 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
202 wakaba 1.112 $self->{line}++;
203     $self->{column} = 0;
204 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
205     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
206     } elsif ($self->{next_char} == 0x0000) { # NULL
207 wakaba 1.8 !!!parse-error (type => 'NULL');
208 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
209 wakaba 1.1 }
210     };
211 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
212     $self->{next_char} = -1;
213 wakaba 1.1
214 wakaba 1.3 my $onerror = $_[2] || sub {
215     my (%opt) = @_;
216 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
217     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
218     warn "Parse error ($opt{type}) at line $line column $column\n";
219 wakaba 1.3 };
220     $self->{parse_error} = sub {
221 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
222 wakaba 1.1 };
223    
224     $self->_initialize_tokenizer;
225     $self->_initialize_tree_constructor;
226     $self->_construct_tree;
227     $self->_terminate_tree_constructor;
228    
229 wakaba 1.112 delete $self->{parse_error}; # remove loop
230    
231 wakaba 1.1 return $self->{document};
232     } # parse_string
233    
234     sub new ($) {
235     my $class = shift;
236     my $self = bless {}, $class;
237 wakaba 1.76 $self->{set_next_char} = sub {
238     $self->{next_char} = -1;
239 wakaba 1.1 };
240     $self->{parse_error} = sub {
241     #
242     };
243 wakaba 1.63 $self->{change_encoding} = sub {
244     # if ($_[0] is a supported encoding) {
245     # run "change the encoding" algorithm;
246     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
247     # }
248     };
249 wakaba 1.61 $self->{application_cache_selection} = sub {
250     #
251     };
252 wakaba 1.1 return $self;
253     } # new
254    
255 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
256     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
257     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
258    
259     sub PLAINTEXT_CONTENT_MODEL () { 0 }
260     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
261     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
262     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
263    
264 wakaba 1.57 sub DATA_STATE () { 0 }
265     sub ENTITY_DATA_STATE () { 1 }
266     sub TAG_OPEN_STATE () { 2 }
267     sub CLOSE_TAG_OPEN_STATE () { 3 }
268     sub TAG_NAME_STATE () { 4 }
269     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
270     sub ATTRIBUTE_NAME_STATE () { 6 }
271     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
272     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
273     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
274     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
275     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
276     sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
277     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
278     sub COMMENT_START_STATE () { 14 }
279     sub COMMENT_START_DASH_STATE () { 15 }
280     sub COMMENT_STATE () { 16 }
281     sub COMMENT_END_STATE () { 17 }
282     sub COMMENT_END_DASH_STATE () { 18 }
283     sub BOGUS_COMMENT_STATE () { 19 }
284     sub DOCTYPE_STATE () { 20 }
285     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
286     sub DOCTYPE_NAME_STATE () { 22 }
287     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
288     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
289     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
290     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
291     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
292     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
293     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
294     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
295     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
296     sub BOGUS_DOCTYPE_STATE () { 32 }
297 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
298 wakaba 1.57
299 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
300     sub COMMENT_TOKEN () { 2 }
301     sub START_TAG_TOKEN () { 3 }
302     sub END_TAG_TOKEN () { 4 }
303     sub END_OF_FILE_TOKEN () { 5 }
304     sub CHARACTER_TOKEN () { 6 }
305    
306 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
307     sub HEAD_IMS () { 0b1000 }
308     sub BODY_IMS () { 0b10000 }
309 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
310 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
311 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
312 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
313     sub FRAME_IMS () { 0b1000000000 }
314 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
315 wakaba 1.54
316 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
317    
318     ## NOTE: "after after body" insertion mode.
319 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
320 wakaba 1.84
321     ## NOTE: "after after frameset" insertion mode.
322 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
323 wakaba 1.84
324 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
325     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
326     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
327     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
328     sub IN_BODY_IM () { BODY_IMS }
329 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
330     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
331     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
332     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
333 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
334     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
335     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
336     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
337 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
338     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
339 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
340    
341 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
342    
343     sub _initialize_tokenizer ($) {
344     my $self = shift;
345 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
346 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
347 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
348     undef $self->{current_attribute};
349     undef $self->{last_emitted_start_tag_name};
350     undef $self->{last_attribute_value_state};
351     $self->{char} = [];
352 wakaba 1.76 # $self->{next_char}
353 wakaba 1.1 !!!next-input-character;
354     $self->{token} = [];
355 wakaba 1.18 # $self->{escape}
356 wakaba 1.1 } # _initialize_tokenizer
357    
358     ## A token has:
359 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
360     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
361     ## ->{name} (DOCTYPE_TOKEN)
362     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
363     ## ->{public_identifier} (DOCTYPE_TOKEN)
364     ## ->{system_identifier} (DOCTYPE_TOKEN)
365 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
366 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
367 wakaba 1.66 ## ->{name}
368     ## ->{value}
369     ## ->{has_reference} == 1 or 0
370 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
371 wakaba 1.1
372     ## Emitted token MUST immediately be handled by the tree construction state.
373    
374     ## Before each step, UA MAY check to see if either one of the scripts in
375     ## "list of scripts that will execute as soon as possible" or the first
376     ## script in the "list of scripts that will execute asynchronously",
377     ## has completed loading. If one has, then it MUST be executed
378     ## and removed from the list.
379    
380 wakaba 1.59 ## NOTE: HTML5 "Writing HTML documents" section, applied to
381     ## documents and not to user agents and conformance checkers,
382     ## contains some requirements that are not detected by the
383     ## parsing algorithm:
384     ## - Some requirements on character encoding declarations. ## TODO
385     ## - "Elements MUST NOT contain content that their content model disallows."
386     ## ... Some are parse error, some are not (will be reported by c.c.).
387     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
388     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
389     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
390    
391     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
392     ## be detected by the HTML5 parsing algorithm:
393     ## - Text,
394    
395 wakaba 1.1 sub _get_next_token ($) {
396     my $self = shift;
397     if (@{$self->{token}}) {
398     return shift @{$self->{token}};
399     }
400    
401     A: {
402 wakaba 1.57 if ($self->{state} == DATA_STATE) {
403 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
404 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
405     not $self->{escape}) {
406 wakaba 1.77 !!!cp (1);
407 wakaba 1.57 $self->{state} = ENTITY_DATA_STATE;
408 wakaba 1.1 !!!next-input-character;
409     redo A;
410     } else {
411 wakaba 1.77 !!!cp (2);
412 wakaba 1.1 #
413     }
414 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
415 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
416 wakaba 1.13 unless ($self->{escape}) {
417 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
418     $self->{prev_char}->[1] == 0x0021 and # !
419     $self->{prev_char}->[2] == 0x003C) { # <
420 wakaba 1.77 !!!cp (3);
421 wakaba 1.13 $self->{escape} = 1;
422 wakaba 1.77 } else {
423     !!!cp (4);
424 wakaba 1.13 }
425 wakaba 1.77 } else {
426     !!!cp (5);
427 wakaba 1.13 }
428     }
429    
430     #
431 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
432 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
433     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
434 wakaba 1.13 not $self->{escape})) {
435 wakaba 1.77 !!!cp (6);
436 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
437 wakaba 1.1 !!!next-input-character;
438     redo A;
439     } else {
440 wakaba 1.77 !!!cp (7);
441 wakaba 1.1 #
442     }
443 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
444 wakaba 1.13 if ($self->{escape} and
445 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
446 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
447     $self->{prev_char}->[1] == 0x002D) { # -
448 wakaba 1.77 !!!cp (8);
449 wakaba 1.13 delete $self->{escape};
450 wakaba 1.77 } else {
451     !!!cp (9);
452 wakaba 1.13 }
453 wakaba 1.77 } else {
454     !!!cp (10);
455 wakaba 1.13 }
456    
457     #
458 wakaba 1.76 } elsif ($self->{next_char} == -1) {
459 wakaba 1.77 !!!cp (11);
460 wakaba 1.112 !!!emit ({type => END_OF_FILE_TOKEN,
461     line => $self->{line}, column => $self->{column}});
462 wakaba 1.1 last A; ## TODO: ok?
463 wakaba 1.77 } else {
464     !!!cp (12);
465 wakaba 1.1 }
466     # Anything else
467 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
468 wakaba 1.112 data => chr $self->{next_char},
469     line => $self->{line}, column => $self->{column}};
470 wakaba 1.1 ## Stay in the data state
471     !!!next-input-character;
472    
473     !!!emit ($token);
474    
475     redo A;
476 wakaba 1.57 } elsif ($self->{state} == ENTITY_DATA_STATE) {
477 wakaba 1.1 ## (cannot happen in CDATA state)
478 wakaba 1.112
479     my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
480 wakaba 1.1
481 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
482 wakaba 1.1
483 wakaba 1.57 $self->{state} = DATA_STATE;
484 wakaba 1.1 # next-input-character is already done
485    
486     unless (defined $token) {
487 wakaba 1.77 !!!cp (13);
488 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '&',
489     line => $l, column => $c});
490 wakaba 1.1 } else {
491 wakaba 1.77 !!!cp (14);
492 wakaba 1.1 !!!emit ($token);
493     }
494    
495     redo A;
496 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
497 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
498 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
499 wakaba 1.77 !!!cp (15);
500 wakaba 1.1 !!!next-input-character;
501 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
502 wakaba 1.1 redo A;
503     } else {
504 wakaba 1.77 !!!cp (16);
505 wakaba 1.1 ## reconsume
506 wakaba 1.57 $self->{state} = DATA_STATE;
507 wakaba 1.1
508 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
509     line => $self->{line_prev},
510     column => $self->{column_prev}});
511 wakaba 1.1
512     redo A;
513     }
514 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
515 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
516 wakaba 1.77 !!!cp (17);
517 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
518 wakaba 1.1 !!!next-input-character;
519     redo A;
520 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
521 wakaba 1.77 !!!cp (18);
522 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
523 wakaba 1.1 !!!next-input-character;
524     redo A;
525 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
526     $self->{next_char} <= 0x005A) { # A..Z
527 wakaba 1.77 !!!cp (19);
528 wakaba 1.1 $self->{current_token}
529 wakaba 1.55 = {type => START_TAG_TOKEN,
530 wakaba 1.112 tag_name => chr ($self->{next_char} + 0x0020),
531     line => $self->{line_prev},
532     column => $self->{column_prev}};
533 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
534 wakaba 1.1 !!!next-input-character;
535     redo A;
536 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
537     $self->{next_char} <= 0x007A) { # a..z
538 wakaba 1.77 !!!cp (20);
539 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
540 wakaba 1.112 tag_name => chr ($self->{next_char}),
541     line => $self->{line_prev},
542     column => $self->{column_prev}};
543 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
544 wakaba 1.1 !!!next-input-character;
545     redo A;
546 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
547 wakaba 1.77 !!!cp (21);
548 wakaba 1.115 !!!parse-error (type => 'empty start tag',
549     line => $self->{line_prev},
550     column => $self->{column_prev});
551 wakaba 1.57 $self->{state} = DATA_STATE;
552 wakaba 1.1 !!!next-input-character;
553    
554 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
555     line => $self->{line_prev},
556     column => $self->{column_prev}});
557 wakaba 1.1
558     redo A;
559 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
560 wakaba 1.77 !!!cp (22);
561 wakaba 1.115 !!!parse-error (type => 'pio',
562     line => $self->{line_prev},
563     column => $self->{column_prev});
564 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
565 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
566     line => $self->{line_prev},
567     column => $self->{column_prev}};
568 wakaba 1.76 ## $self->{next_char} is intentionally left as is
569 wakaba 1.1 redo A;
570     } else {
571 wakaba 1.77 !!!cp (23);
572 wakaba 1.3 !!!parse-error (type => 'bare stago');
573 wakaba 1.57 $self->{state} = DATA_STATE;
574 wakaba 1.1 ## reconsume
575    
576 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
577     line => $self->{line_prev},
578     column => $self->{column_prev}});
579 wakaba 1.1
580     redo A;
581     }
582     } else {
583 wakaba 1.40 die "$0: $self->{content_model} in tag open";
584 wakaba 1.1 }
585 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
586 wakaba 1.113 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
587 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
588 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
589 wakaba 1.112
590 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
591 wakaba 1.23 my @next_char;
592     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
593 wakaba 1.76 push @next_char, $self->{next_char};
594 wakaba 1.23 my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
595     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
596 wakaba 1.76 if ($self->{next_char} == $c or $self->{next_char} == $C) {
597 wakaba 1.77 !!!cp (24);
598 wakaba 1.23 !!!next-input-character;
599     next TAGNAME;
600     } else {
601 wakaba 1.77 !!!cp (25);
602 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
603 wakaba 1.23 !!!back-next-input-character (@next_char);
604 wakaba 1.57 $self->{state} = DATA_STATE;
605 wakaba 1.23
606 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
607     line => $l, column => $c});
608 wakaba 1.23
609     redo A;
610     }
611     }
612 wakaba 1.76 push @next_char, $self->{next_char};
613 wakaba 1.23
614 wakaba 1.76 unless ($self->{next_char} == 0x0009 or # HT
615     $self->{next_char} == 0x000A or # LF
616     $self->{next_char} == 0x000B or # VT
617     $self->{next_char} == 0x000C or # FF
618     $self->{next_char} == 0x0020 or # SP
619     $self->{next_char} == 0x003E or # >
620     $self->{next_char} == 0x002F or # /
621     $self->{next_char} == -1) {
622 wakaba 1.77 !!!cp (26);
623 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
624 wakaba 1.1 !!!back-next-input-character (@next_char);
625 wakaba 1.57 $self->{state} = DATA_STATE;
626 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
627     line => $l, column => $c});
628 wakaba 1.1 redo A;
629 wakaba 1.23 } else {
630 wakaba 1.77 !!!cp (27);
631 wakaba 1.76 $self->{next_char} = shift @next_char;
632 wakaba 1.23 !!!back-next-input-character (@next_char);
633     # and consume...
634 wakaba 1.1 }
635 wakaba 1.23 } else {
636     ## No start tag token has ever been emitted
637 wakaba 1.77 !!!cp (28);
638 wakaba 1.23 # next-input-character is already done
639 wakaba 1.57 $self->{state} = DATA_STATE;
640 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
641     line => $l, column => $c});
642 wakaba 1.1 redo A;
643     }
644     }
645    
646 wakaba 1.76 if (0x0041 <= $self->{next_char} and
647     $self->{next_char} <= 0x005A) { # A..Z
648 wakaba 1.77 !!!cp (29);
649 wakaba 1.112 $self->{current_token}
650     = {type => END_TAG_TOKEN,
651     tag_name => chr ($self->{next_char} + 0x0020),
652     line => $l, column => $c};
653 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
654 wakaba 1.1 !!!next-input-character;
655     redo A;
656 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
657     $self->{next_char} <= 0x007A) { # a..z
658 wakaba 1.77 !!!cp (30);
659 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
660 wakaba 1.112 tag_name => chr ($self->{next_char}),
661     line => $l, column => $c};
662 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
663 wakaba 1.1 !!!next-input-character;
664     redo A;
665 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
666 wakaba 1.77 !!!cp (31);
667 wakaba 1.115 !!!parse-error (type => 'empty end tag',
668     line => $self->{line_prev}, ## "<" in "</>"
669     column => $self->{column_prev} - 1);
670 wakaba 1.57 $self->{state} = DATA_STATE;
671 wakaba 1.1 !!!next-input-character;
672     redo A;
673 wakaba 1.76 } elsif ($self->{next_char} == -1) {
674 wakaba 1.77 !!!cp (32);
675 wakaba 1.3 !!!parse-error (type => 'bare etago');
676 wakaba 1.57 $self->{state} = DATA_STATE;
677 wakaba 1.1 # reconsume
678    
679 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
680     line => $l, column => $c});
681 wakaba 1.1
682     redo A;
683     } else {
684 wakaba 1.77 !!!cp (33);
685 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
686 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
687 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
688     line => $self->{line_prev}, # "<" of "</"
689     column => $self->{column_prev} - 1};
690 wakaba 1.76 ## $self->{next_char} is intentionally left as is
691 wakaba 1.1 redo A;
692     }
693 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
694 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
695     $self->{next_char} == 0x000A or # LF
696     $self->{next_char} == 0x000B or # VT
697     $self->{next_char} == 0x000C or # FF
698     $self->{next_char} == 0x0020) { # SP
699 wakaba 1.77 !!!cp (34);
700 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
701 wakaba 1.1 !!!next-input-character;
702     redo A;
703 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
704 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
705 wakaba 1.77 !!!cp (35);
706 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
707 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
708 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
709 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
710     # ## NOTE: This should never be reached.
711     # !!! cp (36);
712     # !!! parse-error (type => 'end tag attribute');
713     #} else {
714 wakaba 1.77 !!!cp (37);
715 wakaba 1.78 #}
716 wakaba 1.1 } else {
717     die "$0: $self->{current_token}->{type}: Unknown token type";
718     }
719 wakaba 1.57 $self->{state} = DATA_STATE;
720 wakaba 1.1 !!!next-input-character;
721    
722     !!!emit ($self->{current_token}); # start tag or end tag
723    
724     redo A;
725 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
726     $self->{next_char} <= 0x005A) { # A..Z
727 wakaba 1.77 !!!cp (38);
728 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
729 wakaba 1.1 # start tag or end tag
730     ## Stay in this state
731     !!!next-input-character;
732     redo A;
733 wakaba 1.76 } elsif ($self->{next_char} == -1) {
734 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
735 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
736 wakaba 1.77 !!!cp (39);
737 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
738 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
739 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
740 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
741     # ## NOTE: This state should never be reached.
742     # !!! cp (40);
743     # !!! parse-error (type => 'end tag attribute');
744     #} else {
745 wakaba 1.77 !!!cp (41);
746 wakaba 1.78 #}
747 wakaba 1.1 } else {
748     die "$0: $self->{current_token}->{type}: Unknown token type";
749     }
750 wakaba 1.57 $self->{state} = DATA_STATE;
751 wakaba 1.1 # reconsume
752    
753     !!!emit ($self->{current_token}); # start tag or end tag
754    
755     redo A;
756 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
757 wakaba 1.1 !!!next-input-character;
758 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
759 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
760 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
761     # permitted slash
762 wakaba 1.77 !!!cp (42);
763 wakaba 1.1 #
764     } else {
765 wakaba 1.77 !!!cp (43);
766 wakaba 1.3 !!!parse-error (type => 'nestc');
767 wakaba 1.1 }
768 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
769 wakaba 1.1 # next-input-character is already done
770     redo A;
771     } else {
772 wakaba 1.77 !!!cp (44);
773 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
774 wakaba 1.1 # start tag or end tag
775     ## Stay in the state
776     !!!next-input-character;
777     redo A;
778     }
779 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
780 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
781     $self->{next_char} == 0x000A or # LF
782     $self->{next_char} == 0x000B or # VT
783     $self->{next_char} == 0x000C or # FF
784     $self->{next_char} == 0x0020) { # SP
785 wakaba 1.77 !!!cp (45);
786 wakaba 1.1 ## Stay in the state
787     !!!next-input-character;
788     redo A;
789 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
790 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
791 wakaba 1.77 !!!cp (46);
792 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
793 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
794 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
795 wakaba 1.1 if ($self->{current_token}->{attributes}) {
796 wakaba 1.77 !!!cp (47);
797 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
798 wakaba 1.77 } else {
799     !!!cp (48);
800 wakaba 1.1 }
801     } else {
802     die "$0: $self->{current_token}->{type}: Unknown token type";
803     }
804 wakaba 1.57 $self->{state} = DATA_STATE;
805 wakaba 1.1 !!!next-input-character;
806    
807     !!!emit ($self->{current_token}); # start tag or end tag
808    
809     redo A;
810 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
811     $self->{next_char} <= 0x005A) { # A..Z
812 wakaba 1.77 !!!cp (49);
813 wakaba 1.76 $self->{current_attribute} = {name => chr ($self->{next_char} + 0x0020),
814 wakaba 1.1 value => ''};
815 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
816 wakaba 1.1 !!!next-input-character;
817     redo A;
818 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
819 wakaba 1.1 !!!next-input-character;
820 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
821 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
822 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
823     # permitted slash
824 wakaba 1.77 !!!cp (50);
825 wakaba 1.1 #
826     } else {
827 wakaba 1.77 !!!cp (51);
828 wakaba 1.3 !!!parse-error (type => 'nestc');
829 wakaba 1.1 }
830     ## Stay in the state
831     # next-input-character is already done
832     redo A;
833 wakaba 1.76 } elsif ($self->{next_char} == -1) {
834 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
835 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
836 wakaba 1.77 !!!cp (52);
837 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
838 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
839 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
840 wakaba 1.1 if ($self->{current_token}->{attributes}) {
841 wakaba 1.77 !!!cp (53);
842 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
843 wakaba 1.77 } else {
844     !!!cp (54);
845 wakaba 1.1 }
846     } else {
847     die "$0: $self->{current_token}->{type}: Unknown token type";
848     }
849 wakaba 1.57 $self->{state} = DATA_STATE;
850 wakaba 1.1 # reconsume
851    
852     !!!emit ($self->{current_token}); # start tag or end tag
853    
854     redo A;
855     } else {
856 wakaba 1.72 if ({
857     0x0022 => 1, # "
858     0x0027 => 1, # '
859     0x003D => 1, # =
860 wakaba 1.76 }->{$self->{next_char}}) {
861 wakaba 1.77 !!!cp (55);
862 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
863 wakaba 1.77 } else {
864     !!!cp (56);
865 wakaba 1.72 }
866 wakaba 1.76 $self->{current_attribute} = {name => chr ($self->{next_char}),
867 wakaba 1.1 value => ''};
868 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
869 wakaba 1.1 !!!next-input-character;
870     redo A;
871     }
872 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
873 wakaba 1.1 my $before_leave = sub {
874     if (exists $self->{current_token}->{attributes} # start tag or end tag
875     ->{$self->{current_attribute}->{name}}) { # MUST
876 wakaba 1.77 !!!cp (57);
877 wakaba 1.39 !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name});
878 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
879     } else {
880 wakaba 1.77 !!!cp (58);
881 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
882     = $self->{current_attribute};
883     }
884     }; # $before_leave
885    
886 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
887     $self->{next_char} == 0x000A or # LF
888     $self->{next_char} == 0x000B or # VT
889     $self->{next_char} == 0x000C or # FF
890     $self->{next_char} == 0x0020) { # SP
891 wakaba 1.77 !!!cp (59);
892 wakaba 1.1 $before_leave->();
893 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
894 wakaba 1.1 !!!next-input-character;
895     redo A;
896 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
897 wakaba 1.77 !!!cp (60);
898 wakaba 1.1 $before_leave->();
899 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
900 wakaba 1.1 !!!next-input-character;
901     redo A;
902 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
903 wakaba 1.1 $before_leave->();
904 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
905 wakaba 1.77 !!!cp (61);
906 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
907 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
908 wakaba 1.77 !!!cp (62);
909 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
910 wakaba 1.1 if ($self->{current_token}->{attributes}) {
911 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
912 wakaba 1.1 }
913     } else {
914     die "$0: $self->{current_token}->{type}: Unknown token type";
915     }
916 wakaba 1.57 $self->{state} = DATA_STATE;
917 wakaba 1.1 !!!next-input-character;
918    
919     !!!emit ($self->{current_token}); # start tag or end tag
920    
921     redo A;
922 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
923     $self->{next_char} <= 0x005A) { # A..Z
924 wakaba 1.77 !!!cp (63);
925 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
926 wakaba 1.1 ## Stay in the state
927     !!!next-input-character;
928     redo A;
929 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
930 wakaba 1.1 $before_leave->();
931     !!!next-input-character;
932 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
933 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
934 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
935     # permitted slash
936 wakaba 1.77 !!!cp (64);
937 wakaba 1.1 #
938     } else {
939 wakaba 1.77 !!!cp (65);
940 wakaba 1.3 !!!parse-error (type => 'nestc');
941 wakaba 1.1 }
942 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
943 wakaba 1.1 # next-input-character is already done
944     redo A;
945 wakaba 1.76 } elsif ($self->{next_char} == -1) {
946 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
947 wakaba 1.1 $before_leave->();
948 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
949 wakaba 1.77 !!!cp (66);
950 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
951 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
952 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
953 wakaba 1.1 if ($self->{current_token}->{attributes}) {
954 wakaba 1.77 !!!cp (67);
955 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
956 wakaba 1.77 } else {
957 wakaba 1.78 ## NOTE: This state should never be reached.
958 wakaba 1.77 !!!cp (68);
959 wakaba 1.1 }
960     } else {
961     die "$0: $self->{current_token}->{type}: Unknown token type";
962     }
963 wakaba 1.57 $self->{state} = DATA_STATE;
964 wakaba 1.1 # reconsume
965    
966     !!!emit ($self->{current_token}); # start tag or end tag
967    
968     redo A;
969     } else {
970 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
971     $self->{next_char} == 0x0027) { # '
972 wakaba 1.77 !!!cp (69);
973 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
974 wakaba 1.77 } else {
975     !!!cp (70);
976 wakaba 1.72 }
977 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
978 wakaba 1.1 ## Stay in the state
979     !!!next-input-character;
980     redo A;
981     }
982 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
983 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
984     $self->{next_char} == 0x000A or # LF
985     $self->{next_char} == 0x000B or # VT
986     $self->{next_char} == 0x000C or # FF
987     $self->{next_char} == 0x0020) { # SP
988 wakaba 1.77 !!!cp (71);
989 wakaba 1.1 ## Stay in the state
990     !!!next-input-character;
991     redo A;
992 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
993 wakaba 1.77 !!!cp (72);
994 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
995 wakaba 1.1 !!!next-input-character;
996     redo A;
997 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
998 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
999 wakaba 1.77 !!!cp (73);
1000 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1001 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1002 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1003 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1004 wakaba 1.77 !!!cp (74);
1005 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1006 wakaba 1.77 } else {
1007 wakaba 1.78 ## NOTE: This state should never be reached.
1008 wakaba 1.77 !!!cp (75);
1009 wakaba 1.1 }
1010     } else {
1011     die "$0: $self->{current_token}->{type}: Unknown token type";
1012     }
1013 wakaba 1.57 $self->{state} = DATA_STATE;
1014 wakaba 1.1 !!!next-input-character;
1015    
1016     !!!emit ($self->{current_token}); # start tag or end tag
1017    
1018     redo A;
1019 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1020     $self->{next_char} <= 0x005A) { # A..Z
1021 wakaba 1.77 !!!cp (76);
1022 wakaba 1.76 $self->{current_attribute} = {name => chr ($self->{next_char} + 0x0020),
1023 wakaba 1.1 value => ''};
1024 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1025 wakaba 1.1 !!!next-input-character;
1026     redo A;
1027 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1028 wakaba 1.1 !!!next-input-character;
1029 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
1030 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
1031 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1032     # permitted slash
1033 wakaba 1.77 !!!cp (77);
1034 wakaba 1.1 #
1035     } else {
1036 wakaba 1.77 !!!cp (78);
1037 wakaba 1.3 !!!parse-error (type => 'nestc');
1038 wakaba 1.33 ## TODO: Different error type for <aa / bb> than <aa/>
1039 wakaba 1.1 }
1040 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1041 wakaba 1.1 # next-input-character is already done
1042     redo A;
1043 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1044 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1045 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1046 wakaba 1.77 !!!cp (79);
1047 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1048 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1049 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1050 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1051 wakaba 1.77 !!!cp (80);
1052 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1053 wakaba 1.77 } else {
1054 wakaba 1.78 ## NOTE: This state should never be reached.
1055 wakaba 1.77 !!!cp (81);
1056 wakaba 1.1 }
1057     } else {
1058     die "$0: $self->{current_token}->{type}: Unknown token type";
1059     }
1060 wakaba 1.57 $self->{state} = DATA_STATE;
1061 wakaba 1.1 # reconsume
1062    
1063     !!!emit ($self->{current_token}); # start tag or end tag
1064    
1065     redo A;
1066     } else {
1067 wakaba 1.77 !!!cp (82);
1068 wakaba 1.76 $self->{current_attribute} = {name => chr ($self->{next_char}),
1069 wakaba 1.1 value => ''};
1070 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1071 wakaba 1.1 !!!next-input-character;
1072     redo A;
1073     }
1074 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1075 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1076     $self->{next_char} == 0x000A or # LF
1077     $self->{next_char} == 0x000B or # VT
1078     $self->{next_char} == 0x000C or # FF
1079     $self->{next_char} == 0x0020) { # SP
1080 wakaba 1.77 !!!cp (83);
1081 wakaba 1.1 ## Stay in the state
1082     !!!next-input-character;
1083     redo A;
1084 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1085 wakaba 1.77 !!!cp (84);
1086 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1087 wakaba 1.1 !!!next-input-character;
1088     redo A;
1089 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1090 wakaba 1.77 !!!cp (85);
1091 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1092 wakaba 1.1 ## reconsume
1093     redo A;
1094 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1095 wakaba 1.77 !!!cp (86);
1096 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1097 wakaba 1.1 !!!next-input-character;
1098     redo A;
1099 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1100 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1101 wakaba 1.77 !!!cp (87);
1102 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1103 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1104 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1105 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1106 wakaba 1.77 !!!cp (88);
1107 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1108 wakaba 1.77 } else {
1109 wakaba 1.78 ## NOTE: This state should never be reached.
1110 wakaba 1.77 !!!cp (89);
1111 wakaba 1.1 }
1112     } else {
1113     die "$0: $self->{current_token}->{type}: Unknown token type";
1114     }
1115 wakaba 1.57 $self->{state} = DATA_STATE;
1116 wakaba 1.1 !!!next-input-character;
1117    
1118     !!!emit ($self->{current_token}); # start tag or end tag
1119    
1120     redo A;
1121 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1122 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1123 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1124 wakaba 1.77 !!!cp (90);
1125 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1126 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1127 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1128 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1129 wakaba 1.77 !!!cp (91);
1130 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1131 wakaba 1.77 } else {
1132 wakaba 1.78 ## NOTE: This state should never be reached.
1133 wakaba 1.77 !!!cp (92);
1134 wakaba 1.1 }
1135     } else {
1136     die "$0: $self->{current_token}->{type}: Unknown token type";
1137     }
1138 wakaba 1.57 $self->{state} = DATA_STATE;
1139 wakaba 1.1 ## reconsume
1140    
1141     !!!emit ($self->{current_token}); # start tag or end tag
1142    
1143     redo A;
1144     } else {
1145 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1146 wakaba 1.77 !!!cp (93);
1147 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1148 wakaba 1.77 } else {
1149     !!!cp (94);
1150 wakaba 1.72 }
1151 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1152 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1153 wakaba 1.1 !!!next-input-character;
1154     redo A;
1155     }
1156 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1157 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1158 wakaba 1.77 !!!cp (95);
1159 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1160 wakaba 1.1 !!!next-input-character;
1161     redo A;
1162 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1163 wakaba 1.77 !!!cp (96);
1164 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1165     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1166 wakaba 1.1 !!!next-input-character;
1167     redo A;
1168 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1169 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1170 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1171 wakaba 1.77 !!!cp (97);
1172 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1173 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1174 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1175 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1176 wakaba 1.77 !!!cp (98);
1177 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1178 wakaba 1.77 } else {
1179 wakaba 1.78 ## NOTE: This state should never be reached.
1180 wakaba 1.77 !!!cp (99);
1181 wakaba 1.1 }
1182     } else {
1183     die "$0: $self->{current_token}->{type}: Unknown token type";
1184     }
1185 wakaba 1.57 $self->{state} = DATA_STATE;
1186 wakaba 1.1 ## reconsume
1187    
1188     !!!emit ($self->{current_token}); # start tag or end tag
1189    
1190     redo A;
1191     } else {
1192 wakaba 1.77 !!!cp (100);
1193 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1194 wakaba 1.1 ## Stay in the state
1195     !!!next-input-character;
1196     redo A;
1197     }
1198 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1199 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1200 wakaba 1.77 !!!cp (101);
1201 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1202 wakaba 1.1 !!!next-input-character;
1203     redo A;
1204 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1205 wakaba 1.77 !!!cp (102);
1206 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1207     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1208 wakaba 1.1 !!!next-input-character;
1209     redo A;
1210 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1211 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1212 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1213 wakaba 1.77 !!!cp (103);
1214 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1215 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1216 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1217 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1218 wakaba 1.77 !!!cp (104);
1219 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1220 wakaba 1.77 } else {
1221 wakaba 1.78 ## NOTE: This state should never be reached.
1222 wakaba 1.77 !!!cp (105);
1223 wakaba 1.1 }
1224     } else {
1225     die "$0: $self->{current_token}->{type}: Unknown token type";
1226     }
1227 wakaba 1.57 $self->{state} = DATA_STATE;
1228 wakaba 1.1 ## reconsume
1229    
1230     !!!emit ($self->{current_token}); # start tag or end tag
1231    
1232     redo A;
1233     } else {
1234 wakaba 1.77 !!!cp (106);
1235 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1236 wakaba 1.1 ## Stay in the state
1237     !!!next-input-character;
1238     redo A;
1239     }
1240 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1241 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1242     $self->{next_char} == 0x000A or # LF
1243     $self->{next_char} == 0x000B or # HT
1244     $self->{next_char} == 0x000C or # FF
1245     $self->{next_char} == 0x0020) { # SP
1246 wakaba 1.77 !!!cp (107);
1247 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1248 wakaba 1.1 !!!next-input-character;
1249     redo A;
1250 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1251 wakaba 1.77 !!!cp (108);
1252 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1253     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1254 wakaba 1.1 !!!next-input-character;
1255     redo A;
1256 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1257 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1258 wakaba 1.77 !!!cp (109);
1259 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1260 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1261 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1262 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1263 wakaba 1.77 !!!cp (110);
1264 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1265 wakaba 1.77 } else {
1266 wakaba 1.78 ## NOTE: This state should never be reached.
1267 wakaba 1.77 !!!cp (111);
1268 wakaba 1.1 }
1269     } else {
1270     die "$0: $self->{current_token}->{type}: Unknown token type";
1271     }
1272 wakaba 1.57 $self->{state} = DATA_STATE;
1273 wakaba 1.1 !!!next-input-character;
1274    
1275     !!!emit ($self->{current_token}); # start tag or end tag
1276    
1277     redo A;
1278 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1279 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1280 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1281 wakaba 1.77 !!!cp (112);
1282 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1283 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1284 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1285 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1286 wakaba 1.77 !!!cp (113);
1287 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1288 wakaba 1.77 } else {
1289 wakaba 1.78 ## NOTE: This state should never be reached.
1290 wakaba 1.77 !!!cp (114);
1291 wakaba 1.1 }
1292     } else {
1293     die "$0: $self->{current_token}->{type}: Unknown token type";
1294     }
1295 wakaba 1.57 $self->{state} = DATA_STATE;
1296 wakaba 1.1 ## reconsume
1297    
1298     !!!emit ($self->{current_token}); # start tag or end tag
1299    
1300     redo A;
1301     } else {
1302 wakaba 1.72 if ({
1303     0x0022 => 1, # "
1304     0x0027 => 1, # '
1305     0x003D => 1, # =
1306 wakaba 1.76 }->{$self->{next_char}}) {
1307 wakaba 1.77 !!!cp (115);
1308 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1309 wakaba 1.77 } else {
1310     !!!cp (116);
1311 wakaba 1.72 }
1312 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1313 wakaba 1.1 ## Stay in the state
1314     !!!next-input-character;
1315     redo A;
1316     }
1317 wakaba 1.57 } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1318 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity
1319     (1,
1320     $self->{last_attribute_value_state}
1321     == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # "
1322     $self->{last_attribute_value_state}
1323     == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # '
1324     -1);
1325 wakaba 1.1
1326     unless (defined $token) {
1327 wakaba 1.77 !!!cp (117);
1328 wakaba 1.1 $self->{current_attribute}->{value} .= '&';
1329     } else {
1330 wakaba 1.77 !!!cp (118);
1331 wakaba 1.1 $self->{current_attribute}->{value} .= $token->{data};
1332 wakaba 1.66 $self->{current_attribute}->{has_reference} = $token->{has_reference};
1333 wakaba 1.1 ## ISSUE: spec says "append the returned character token to the current attribute's value"
1334     }
1335    
1336     $self->{state} = $self->{last_attribute_value_state};
1337     # next-input-character is already done
1338     redo A;
1339 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1340 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1341     $self->{next_char} == 0x000A or # LF
1342     $self->{next_char} == 0x000B or # VT
1343     $self->{next_char} == 0x000C or # FF
1344     $self->{next_char} == 0x0020) { # SP
1345 wakaba 1.77 !!!cp (118);
1346 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1347     !!!next-input-character;
1348     redo A;
1349 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1350 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1351 wakaba 1.77 !!!cp (119);
1352 wakaba 1.72 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1353     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1354     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1355     if ($self->{current_token}->{attributes}) {
1356 wakaba 1.77 !!!cp (120);
1357 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1358 wakaba 1.77 } else {
1359 wakaba 1.78 ## NOTE: This state should never be reached.
1360 wakaba 1.77 !!!cp (121);
1361 wakaba 1.72 }
1362     } else {
1363     die "$0: $self->{current_token}->{type}: Unknown token type";
1364     }
1365     $self->{state} = DATA_STATE;
1366     !!!next-input-character;
1367    
1368     !!!emit ($self->{current_token}); # start tag or end tag
1369    
1370     redo A;
1371 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1372 wakaba 1.72 !!!next-input-character;
1373 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
1374 wakaba 1.72 $self->{current_token}->{type} == START_TAG_TOKEN and
1375     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1376     # permitted slash
1377 wakaba 1.77 !!!cp (122);
1378 wakaba 1.72 #
1379     } else {
1380 wakaba 1.77 !!!cp (123);
1381 wakaba 1.72 !!!parse-error (type => 'nestc');
1382     }
1383     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1384     # next-input-character is already done
1385     redo A;
1386     } else {
1387 wakaba 1.77 !!!cp (124);
1388 wakaba 1.72 !!!parse-error (type => 'no space between attributes');
1389     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1390     ## reconsume
1391     redo A;
1392     }
1393 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1394 wakaba 1.1 ## (only happen if PCDATA state)
1395    
1396 wakaba 1.112 ## NOTE: Set by the previous state
1397     #my $token = {type => COMMENT_TOKEN, data => ''};
1398 wakaba 1.1
1399     BC: {
1400 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1401 wakaba 1.77 !!!cp (124);
1402 wakaba 1.57 $self->{state} = DATA_STATE;
1403 wakaba 1.1 !!!next-input-character;
1404    
1405 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1406 wakaba 1.1
1407     redo A;
1408 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1409 wakaba 1.77 !!!cp (125);
1410 wakaba 1.57 $self->{state} = DATA_STATE;
1411 wakaba 1.1 ## reconsume
1412    
1413 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1414 wakaba 1.1
1415     redo A;
1416     } else {
1417 wakaba 1.77 !!!cp (126);
1418 wakaba 1.112 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1419 wakaba 1.1 !!!next-input-character;
1420     redo BC;
1421     }
1422     } # BC
1423 wakaba 1.77
1424     die "$0: _get_next_token: unexpected case [BC]";
1425 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1426 wakaba 1.1 ## (only happen if PCDATA state)
1427    
1428 wakaba 1.112 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1);
1429    
1430 wakaba 1.1 my @next_char;
1431 wakaba 1.76 push @next_char, $self->{next_char};
1432 wakaba 1.1
1433 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1434 wakaba 1.1 !!!next-input-character;
1435 wakaba 1.76 push @next_char, $self->{next_char};
1436     if ($self->{next_char} == 0x002D) { # -
1437 wakaba 1.77 !!!cp (127);
1438 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1439     line => $l, column => $c};
1440 wakaba 1.57 $self->{state} = COMMENT_START_STATE;
1441 wakaba 1.1 !!!next-input-character;
1442     redo A;
1443 wakaba 1.77 } else {
1444     !!!cp (128);
1445 wakaba 1.1 }
1446 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
1447     $self->{next_char} == 0x0064) { # d
1448 wakaba 1.1 !!!next-input-character;
1449 wakaba 1.76 push @next_char, $self->{next_char};
1450     if ($self->{next_char} == 0x004F or # O
1451     $self->{next_char} == 0x006F) { # o
1452 wakaba 1.1 !!!next-input-character;
1453 wakaba 1.76 push @next_char, $self->{next_char};
1454     if ($self->{next_char} == 0x0043 or # C
1455     $self->{next_char} == 0x0063) { # c
1456 wakaba 1.1 !!!next-input-character;
1457 wakaba 1.76 push @next_char, $self->{next_char};
1458     if ($self->{next_char} == 0x0054 or # T
1459     $self->{next_char} == 0x0074) { # t
1460 wakaba 1.1 !!!next-input-character;
1461 wakaba 1.76 push @next_char, $self->{next_char};
1462     if ($self->{next_char} == 0x0059 or # Y
1463     $self->{next_char} == 0x0079) { # y
1464 wakaba 1.1 !!!next-input-character;
1465 wakaba 1.76 push @next_char, $self->{next_char};
1466     if ($self->{next_char} == 0x0050 or # P
1467     $self->{next_char} == 0x0070) { # p
1468 wakaba 1.1 !!!next-input-character;
1469 wakaba 1.76 push @next_char, $self->{next_char};
1470     if ($self->{next_char} == 0x0045 or # E
1471     $self->{next_char} == 0x0065) { # e
1472 wakaba 1.77 !!!cp (129);
1473     ## TODO: What a stupid code this is!
1474 wakaba 1.57 $self->{state} = DOCTYPE_STATE;
1475 wakaba 1.112 $self->{current_token} = {type => DOCTYPE_TOKEN,
1476     quirks => 1,
1477     line => $l, column => $c};
1478 wakaba 1.1 !!!next-input-character;
1479     redo A;
1480 wakaba 1.77 } else {
1481     !!!cp (130);
1482 wakaba 1.1 }
1483 wakaba 1.77 } else {
1484     !!!cp (131);
1485 wakaba 1.1 }
1486 wakaba 1.77 } else {
1487     !!!cp (132);
1488 wakaba 1.1 }
1489 wakaba 1.77 } else {
1490     !!!cp (133);
1491 wakaba 1.1 }
1492 wakaba 1.77 } else {
1493     !!!cp (134);
1494 wakaba 1.1 }
1495 wakaba 1.77 } else {
1496     !!!cp (135);
1497 wakaba 1.1 }
1498 wakaba 1.77 } else {
1499     !!!cp (136);
1500 wakaba 1.1 }
1501    
1502 wakaba 1.30 !!!parse-error (type => 'bogus comment');
1503 wakaba 1.76 $self->{next_char} = shift @next_char;
1504 wakaba 1.1 !!!back-next-input-character (@next_char);
1505 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1506 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1507     line => $l, column => $c};
1508 wakaba 1.1 redo A;
1509    
1510     ## ISSUE: typos in spec: chacacters, is is a parse error
1511     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1512 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
1513 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1514 wakaba 1.77 !!!cp (137);
1515 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
1516 wakaba 1.23 !!!next-input-character;
1517     redo A;
1518 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1519 wakaba 1.77 !!!cp (138);
1520 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1521 wakaba 1.57 $self->{state} = DATA_STATE;
1522 wakaba 1.23 !!!next-input-character;
1523    
1524     !!!emit ($self->{current_token}); # comment
1525    
1526     redo A;
1527 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1528 wakaba 1.77 !!!cp (139);
1529 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1530 wakaba 1.57 $self->{state} = DATA_STATE;
1531 wakaba 1.23 ## reconsume
1532    
1533     !!!emit ($self->{current_token}); # comment
1534    
1535     redo A;
1536     } else {
1537 wakaba 1.77 !!!cp (140);
1538 wakaba 1.23 $self->{current_token}->{data} # comment
1539 wakaba 1.76 .= chr ($self->{next_char});
1540 wakaba 1.57 $self->{state} = COMMENT_STATE;
1541 wakaba 1.23 !!!next-input-character;
1542     redo A;
1543     }
1544 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
1545 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1546 wakaba 1.77 !!!cp (141);
1547 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1548 wakaba 1.23 !!!next-input-character;
1549     redo A;
1550 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1551 wakaba 1.77 !!!cp (142);
1552 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1553 wakaba 1.57 $self->{state} = DATA_STATE;
1554 wakaba 1.23 !!!next-input-character;
1555    
1556     !!!emit ($self->{current_token}); # comment
1557    
1558     redo A;
1559 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1560 wakaba 1.77 !!!cp (143);
1561 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1562 wakaba 1.57 $self->{state} = DATA_STATE;
1563 wakaba 1.23 ## reconsume
1564    
1565     !!!emit ($self->{current_token}); # comment
1566    
1567     redo A;
1568     } else {
1569 wakaba 1.77 !!!cp (144);
1570 wakaba 1.23 $self->{current_token}->{data} # comment
1571 wakaba 1.76 .= '-' . chr ($self->{next_char});
1572 wakaba 1.57 $self->{state} = COMMENT_STATE;
1573 wakaba 1.23 !!!next-input-character;
1574     redo A;
1575     }
1576 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
1577 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1578 wakaba 1.77 !!!cp (145);
1579 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
1580 wakaba 1.1 !!!next-input-character;
1581     redo A;
1582 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1583 wakaba 1.77 !!!cp (146);
1584 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1585 wakaba 1.57 $self->{state} = DATA_STATE;
1586 wakaba 1.1 ## reconsume
1587    
1588     !!!emit ($self->{current_token}); # comment
1589    
1590     redo A;
1591     } else {
1592 wakaba 1.77 !!!cp (147);
1593 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1594 wakaba 1.1 ## Stay in the state
1595     !!!next-input-character;
1596     redo A;
1597     }
1598 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
1599 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1600 wakaba 1.77 !!!cp (148);
1601 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1602 wakaba 1.1 !!!next-input-character;
1603     redo A;
1604 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1605 wakaba 1.77 !!!cp (149);
1606 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1607 wakaba 1.57 $self->{state} = DATA_STATE;
1608 wakaba 1.1 ## reconsume
1609    
1610     !!!emit ($self->{current_token}); # comment
1611    
1612     redo A;
1613     } else {
1614 wakaba 1.77 !!!cp (150);
1615 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
1616 wakaba 1.57 $self->{state} = COMMENT_STATE;
1617 wakaba 1.1 !!!next-input-character;
1618     redo A;
1619     }
1620 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
1621 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1622 wakaba 1.77 !!!cp (151);
1623 wakaba 1.57 $self->{state} = DATA_STATE;
1624 wakaba 1.1 !!!next-input-character;
1625    
1626     !!!emit ($self->{current_token}); # comment
1627    
1628     redo A;
1629 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
1630 wakaba 1.77 !!!cp (152);
1631 wakaba 1.114 !!!parse-error (type => 'dash in comment',
1632     line => $self->{line_prev},
1633     column => $self->{column_prev});
1634 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1635     ## Stay in the state
1636     !!!next-input-character;
1637     redo A;
1638 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1639 wakaba 1.77 !!!cp (153);
1640 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1641 wakaba 1.57 $self->{state} = DATA_STATE;
1642 wakaba 1.1 ## reconsume
1643    
1644     !!!emit ($self->{current_token}); # comment
1645    
1646     redo A;
1647     } else {
1648 wakaba 1.77 !!!cp (154);
1649 wakaba 1.114 !!!parse-error (type => 'dash in comment',
1650     line => $self->{line_prev},
1651     column => $self->{column_prev});
1652 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
1653 wakaba 1.57 $self->{state} = COMMENT_STATE;
1654 wakaba 1.1 !!!next-input-character;
1655     redo A;
1656     }
1657 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
1658 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1659     $self->{next_char} == 0x000A or # LF
1660     $self->{next_char} == 0x000B or # VT
1661     $self->{next_char} == 0x000C or # FF
1662     $self->{next_char} == 0x0020) { # SP
1663 wakaba 1.77 !!!cp (155);
1664 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1665 wakaba 1.1 !!!next-input-character;
1666     redo A;
1667     } else {
1668 wakaba 1.77 !!!cp (156);
1669 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1670 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1671 wakaba 1.1 ## reconsume
1672     redo A;
1673     }
1674 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
1675 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1676     $self->{next_char} == 0x000A or # LF
1677     $self->{next_char} == 0x000B or # VT
1678     $self->{next_char} == 0x000C or # FF
1679     $self->{next_char} == 0x0020) { # SP
1680 wakaba 1.77 !!!cp (157);
1681 wakaba 1.1 ## Stay in the state
1682     !!!next-input-character;
1683     redo A;
1684 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1685 wakaba 1.77 !!!cp (158);
1686 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1687 wakaba 1.57 $self->{state} = DATA_STATE;
1688 wakaba 1.1 !!!next-input-character;
1689    
1690 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
1691 wakaba 1.1
1692     redo A;
1693 wakaba 1.77 } elsif ($self->{next_char} == -1) {
1694     !!!cp (159);
1695 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1696 wakaba 1.57 $self->{state} = DATA_STATE;
1697 wakaba 1.1 ## reconsume
1698    
1699 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
1700 wakaba 1.1
1701     redo A;
1702     } else {
1703 wakaba 1.77 !!!cp (160);
1704 wakaba 1.112 $self->{current_token}->{name} = chr $self->{next_char};
1705     delete $self->{current_token}->{quirks};
1706 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1707 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
1708 wakaba 1.1 !!!next-input-character;
1709     redo A;
1710     }
1711 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
1712 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
1713 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1714     $self->{next_char} == 0x000A or # LF
1715     $self->{next_char} == 0x000B or # VT
1716     $self->{next_char} == 0x000C or # FF
1717     $self->{next_char} == 0x0020) { # SP
1718 wakaba 1.77 !!!cp (161);
1719 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
1720 wakaba 1.1 !!!next-input-character;
1721     redo A;
1722 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1723 wakaba 1.77 !!!cp (162);
1724 wakaba 1.57 $self->{state} = DATA_STATE;
1725 wakaba 1.1 !!!next-input-character;
1726    
1727     !!!emit ($self->{current_token}); # DOCTYPE
1728    
1729     redo A;
1730 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1731 wakaba 1.77 !!!cp (163);
1732 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1733 wakaba 1.57 $self->{state} = DATA_STATE;
1734 wakaba 1.1 ## reconsume
1735    
1736 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1737 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1738 wakaba 1.1
1739     redo A;
1740     } else {
1741 wakaba 1.77 !!!cp (164);
1742 wakaba 1.1 $self->{current_token}->{name}
1743 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
1744 wakaba 1.1 ## Stay in the state
1745     !!!next-input-character;
1746     redo A;
1747     }
1748 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
1749 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1750     $self->{next_char} == 0x000A or # LF
1751     $self->{next_char} == 0x000B or # VT
1752     $self->{next_char} == 0x000C or # FF
1753     $self->{next_char} == 0x0020) { # SP
1754 wakaba 1.77 !!!cp (165);
1755 wakaba 1.1 ## Stay in the state
1756     !!!next-input-character;
1757     redo A;
1758 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1759 wakaba 1.77 !!!cp (166);
1760 wakaba 1.57 $self->{state} = DATA_STATE;
1761 wakaba 1.1 !!!next-input-character;
1762    
1763     !!!emit ($self->{current_token}); # DOCTYPE
1764    
1765     redo A;
1766 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1767 wakaba 1.77 !!!cp (167);
1768 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1769 wakaba 1.57 $self->{state} = DATA_STATE;
1770 wakaba 1.1 ## reconsume
1771    
1772 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1773 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1774    
1775     redo A;
1776 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
1777     $self->{next_char} == 0x0070) { # p
1778 wakaba 1.18 !!!next-input-character;
1779 wakaba 1.76 if ($self->{next_char} == 0x0055 or # U
1780     $self->{next_char} == 0x0075) { # u
1781 wakaba 1.18 !!!next-input-character;
1782 wakaba 1.76 if ($self->{next_char} == 0x0042 or # B
1783     $self->{next_char} == 0x0062) { # b
1784 wakaba 1.18 !!!next-input-character;
1785 wakaba 1.76 if ($self->{next_char} == 0x004C or # L
1786     $self->{next_char} == 0x006C) { # l
1787 wakaba 1.18 !!!next-input-character;
1788 wakaba 1.76 if ($self->{next_char} == 0x0049 or # I
1789     $self->{next_char} == 0x0069) { # i
1790 wakaba 1.18 !!!next-input-character;
1791 wakaba 1.76 if ($self->{next_char} == 0x0043 or # C
1792     $self->{next_char} == 0x0063) { # c
1793 wakaba 1.77 !!!cp (168);
1794 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1795 wakaba 1.18 !!!next-input-character;
1796     redo A;
1797 wakaba 1.77 } else {
1798     !!!cp (169);
1799 wakaba 1.18 }
1800 wakaba 1.77 } else {
1801     !!!cp (170);
1802 wakaba 1.18 }
1803 wakaba 1.77 } else {
1804     !!!cp (171);
1805 wakaba 1.18 }
1806 wakaba 1.77 } else {
1807     !!!cp (172);
1808 wakaba 1.18 }
1809 wakaba 1.77 } else {
1810     !!!cp (173);
1811 wakaba 1.18 }
1812    
1813     #
1814 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
1815     $self->{next_char} == 0x0073) { # s
1816 wakaba 1.18 !!!next-input-character;
1817 wakaba 1.76 if ($self->{next_char} == 0x0059 or # Y
1818     $self->{next_char} == 0x0079) { # y
1819 wakaba 1.18 !!!next-input-character;
1820 wakaba 1.76 if ($self->{next_char} == 0x0053 or # S
1821     $self->{next_char} == 0x0073) { # s
1822 wakaba 1.18 !!!next-input-character;
1823 wakaba 1.76 if ($self->{next_char} == 0x0054 or # T
1824     $self->{next_char} == 0x0074) { # t
1825 wakaba 1.18 !!!next-input-character;
1826 wakaba 1.76 if ($self->{next_char} == 0x0045 or # E
1827     $self->{next_char} == 0x0065) { # e
1828 wakaba 1.18 !!!next-input-character;
1829 wakaba 1.76 if ($self->{next_char} == 0x004D or # M
1830     $self->{next_char} == 0x006D) { # m
1831 wakaba 1.77 !!!cp (174);
1832 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
1833 wakaba 1.18 !!!next-input-character;
1834     redo A;
1835 wakaba 1.77 } else {
1836     !!!cp (175);
1837 wakaba 1.18 }
1838 wakaba 1.77 } else {
1839     !!!cp (176);
1840 wakaba 1.18 }
1841 wakaba 1.77 } else {
1842     !!!cp (177);
1843 wakaba 1.18 }
1844 wakaba 1.77 } else {
1845     !!!cp (178);
1846 wakaba 1.18 }
1847 wakaba 1.77 } else {
1848     !!!cp (179);
1849 wakaba 1.18 }
1850    
1851     #
1852     } else {
1853 wakaba 1.77 !!!cp (180);
1854 wakaba 1.18 !!!next-input-character;
1855     #
1856     }
1857    
1858     !!!parse-error (type => 'string after DOCTYPE name');
1859 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1860 wakaba 1.73
1861 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
1862 wakaba 1.18 # next-input-character is already done
1863     redo A;
1864 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
1865 wakaba 1.18 if ({
1866     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1867     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1868 wakaba 1.76 }->{$self->{next_char}}) {
1869 wakaba 1.77 !!!cp (181);
1870 wakaba 1.18 ## Stay in the state
1871     !!!next-input-character;
1872     redo A;
1873 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
1874 wakaba 1.77 !!!cp (182);
1875 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1876 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
1877 wakaba 1.18 !!!next-input-character;
1878     redo A;
1879 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
1880 wakaba 1.77 !!!cp (183);
1881 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1882 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
1883 wakaba 1.18 !!!next-input-character;
1884     redo A;
1885 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
1886 wakaba 1.77 !!!cp (184);
1887 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
1888    
1889 wakaba 1.57 $self->{state} = DATA_STATE;
1890 wakaba 1.18 !!!next-input-character;
1891    
1892 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1893 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1894    
1895     redo A;
1896 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1897 wakaba 1.77 !!!cp (185);
1898 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
1899    
1900 wakaba 1.57 $self->{state} = DATA_STATE;
1901 wakaba 1.18 ## reconsume
1902    
1903 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1904 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1905    
1906     redo A;
1907     } else {
1908 wakaba 1.77 !!!cp (186);
1909 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
1910 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1911 wakaba 1.73
1912 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
1913 wakaba 1.18 !!!next-input-character;
1914     redo A;
1915     }
1916 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
1917 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1918 wakaba 1.77 !!!cp (187);
1919 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1920 wakaba 1.18 !!!next-input-character;
1921     redo A;
1922 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1923 wakaba 1.77 !!!cp (188);
1924 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
1925    
1926     $self->{state} = DATA_STATE;
1927     !!!next-input-character;
1928    
1929 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1930 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
1931    
1932     redo A;
1933 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1934 wakaba 1.77 !!!cp (189);
1935 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
1936    
1937 wakaba 1.57 $self->{state} = DATA_STATE;
1938 wakaba 1.18 ## reconsume
1939    
1940 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1941 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1942    
1943     redo A;
1944     } else {
1945 wakaba 1.77 !!!cp (190);
1946 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
1947 wakaba 1.76 .= chr $self->{next_char};
1948 wakaba 1.18 ## Stay in the state
1949     !!!next-input-character;
1950     redo A;
1951     }
1952 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
1953 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1954 wakaba 1.77 !!!cp (191);
1955 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1956 wakaba 1.18 !!!next-input-character;
1957     redo A;
1958 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1959 wakaba 1.77 !!!cp (192);
1960 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
1961    
1962     $self->{state} = DATA_STATE;
1963     !!!next-input-character;
1964    
1965 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1966 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
1967    
1968     redo A;
1969 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1970 wakaba 1.77 !!!cp (193);
1971 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
1972    
1973 wakaba 1.57 $self->{state} = DATA_STATE;
1974 wakaba 1.18 ## reconsume
1975    
1976 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1977 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1978    
1979     redo A;
1980     } else {
1981 wakaba 1.77 !!!cp (194);
1982 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
1983 wakaba 1.76 .= chr $self->{next_char};
1984 wakaba 1.18 ## Stay in the state
1985     !!!next-input-character;
1986     redo A;
1987     }
1988 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
1989 wakaba 1.18 if ({
1990     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1991     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1992 wakaba 1.76 }->{$self->{next_char}}) {
1993 wakaba 1.77 !!!cp (195);
1994 wakaba 1.18 ## Stay in the state
1995     !!!next-input-character;
1996     redo A;
1997 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1998 wakaba 1.77 !!!cp (196);
1999 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2000 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2001 wakaba 1.18 !!!next-input-character;
2002     redo A;
2003 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2004 wakaba 1.77 !!!cp (197);
2005 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2006 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2007 wakaba 1.18 !!!next-input-character;
2008     redo A;
2009 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2010 wakaba 1.77 !!!cp (198);
2011 wakaba 1.57 $self->{state} = DATA_STATE;
2012 wakaba 1.18 !!!next-input-character;
2013    
2014     !!!emit ($self->{current_token}); # DOCTYPE
2015    
2016     redo A;
2017 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2018 wakaba 1.77 !!!cp (199);
2019 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2020    
2021 wakaba 1.57 $self->{state} = DATA_STATE;
2022 wakaba 1.26 ## reconsume
2023 wakaba 1.18
2024 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2025 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2026    
2027     redo A;
2028     } else {
2029 wakaba 1.77 !!!cp (200);
2030 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2031 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2032 wakaba 1.73
2033 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2034 wakaba 1.18 !!!next-input-character;
2035     redo A;
2036     }
2037 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2038 wakaba 1.18 if ({
2039     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2040     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2041 wakaba 1.76 }->{$self->{next_char}}) {
2042 wakaba 1.77 !!!cp (201);
2043 wakaba 1.18 ## Stay in the state
2044     !!!next-input-character;
2045     redo A;
2046 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2047 wakaba 1.77 !!!cp (202);
2048 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2049 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2050 wakaba 1.18 !!!next-input-character;
2051     redo A;
2052 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2053 wakaba 1.77 !!!cp (203);
2054 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2055 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2056 wakaba 1.18 !!!next-input-character;
2057     redo A;
2058 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2059 wakaba 1.77 !!!cp (204);
2060 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2061 wakaba 1.57 $self->{state} = DATA_STATE;
2062 wakaba 1.18 !!!next-input-character;
2063    
2064 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2065 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2066    
2067     redo A;
2068 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2069 wakaba 1.77 !!!cp (205);
2070 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2071    
2072 wakaba 1.57 $self->{state} = DATA_STATE;
2073 wakaba 1.26 ## reconsume
2074 wakaba 1.18
2075 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2076 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2077    
2078     redo A;
2079     } else {
2080 wakaba 1.77 !!!cp (206);
2081 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2082 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2083 wakaba 1.73
2084 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2085 wakaba 1.18 !!!next-input-character;
2086     redo A;
2087     }
2088 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2089 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2090 wakaba 1.77 !!!cp (207);
2091 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2092 wakaba 1.18 !!!next-input-character;
2093     redo A;
2094 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2095 wakaba 1.77 !!!cp (208);
2096 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2097    
2098     $self->{state} = DATA_STATE;
2099     !!!next-input-character;
2100    
2101 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2102 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2103    
2104     redo A;
2105 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2106 wakaba 1.77 !!!cp (209);
2107 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2108    
2109 wakaba 1.57 $self->{state} = DATA_STATE;
2110 wakaba 1.18 ## reconsume
2111    
2112 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2113 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2114    
2115     redo A;
2116     } else {
2117 wakaba 1.77 !!!cp (210);
2118 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2119 wakaba 1.76 .= chr $self->{next_char};
2120 wakaba 1.18 ## Stay in the state
2121     !!!next-input-character;
2122     redo A;
2123     }
2124 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2125 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2126 wakaba 1.77 !!!cp (211);
2127 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2128 wakaba 1.18 !!!next-input-character;
2129     redo A;
2130 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2131 wakaba 1.77 !!!cp (212);
2132 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2133    
2134     $self->{state} = DATA_STATE;
2135     !!!next-input-character;
2136    
2137 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2138 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2139    
2140     redo A;
2141 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2142 wakaba 1.77 !!!cp (213);
2143 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2144    
2145 wakaba 1.57 $self->{state} = DATA_STATE;
2146 wakaba 1.18 ## reconsume
2147    
2148 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2149 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2150    
2151     redo A;
2152     } else {
2153 wakaba 1.77 !!!cp (214);
2154 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2155 wakaba 1.76 .= chr $self->{next_char};
2156 wakaba 1.18 ## Stay in the state
2157     !!!next-input-character;
2158     redo A;
2159     }
2160 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2161 wakaba 1.18 if ({
2162     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2163     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2164 wakaba 1.76 }->{$self->{next_char}}) {
2165 wakaba 1.77 !!!cp (215);
2166 wakaba 1.18 ## Stay in the state
2167     !!!next-input-character;
2168     redo A;
2169 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2170 wakaba 1.77 !!!cp (216);
2171 wakaba 1.57 $self->{state} = DATA_STATE;
2172 wakaba 1.18 !!!next-input-character;
2173    
2174     !!!emit ($self->{current_token}); # DOCTYPE
2175    
2176     redo A;
2177 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2178 wakaba 1.77 !!!cp (217);
2179 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2180    
2181 wakaba 1.57 $self->{state} = DATA_STATE;
2182 wakaba 1.26 ## reconsume
2183 wakaba 1.18
2184 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2185 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2186    
2187     redo A;
2188     } else {
2189 wakaba 1.77 !!!cp (218);
2190 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2191 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2192 wakaba 1.73
2193 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2194 wakaba 1.1 !!!next-input-character;
2195     redo A;
2196     }
2197 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2198 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2199 wakaba 1.77 !!!cp (219);
2200 wakaba 1.57 $self->{state} = DATA_STATE;
2201 wakaba 1.1 !!!next-input-character;
2202    
2203     !!!emit ($self->{current_token}); # DOCTYPE
2204    
2205     redo A;
2206 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2207 wakaba 1.77 !!!cp (220);
2208 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2209 wakaba 1.57 $self->{state} = DATA_STATE;
2210 wakaba 1.1 ## reconsume
2211    
2212     !!!emit ($self->{current_token}); # DOCTYPE
2213    
2214     redo A;
2215     } else {
2216 wakaba 1.77 !!!cp (221);
2217 wakaba 1.1 ## Stay in the state
2218     !!!next-input-character;
2219     redo A;
2220     }
2221     } else {
2222     die "$0: $self->{state}: Unknown state";
2223     }
2224     } # A
2225    
2226     die "$0: _get_next_token: unexpected case";
2227     } # _get_next_token
2228    
2229 wakaba 1.72 sub _tokenize_attempt_to_consume_an_entity ($$$) {
2230     my ($self, $in_attr, $additional) = @_;
2231 wakaba 1.20
2232 wakaba 1.112 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
2233    
2234 wakaba 1.20 if ({
2235     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2236     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2237 wakaba 1.72 $additional => 1,
2238 wakaba 1.76 }->{$self->{next_char}}) {
2239 wakaba 1.78 !!!cp (1001);
2240 wakaba 1.20 ## Don't consume
2241     ## No error
2242     return undef;
2243 wakaba 1.76 } elsif ($self->{next_char} == 0x0023) { # #
2244 wakaba 1.1 !!!next-input-character;
2245 wakaba 1.76 if ($self->{next_char} == 0x0078 or # x
2246     $self->{next_char} == 0x0058) { # X
2247 wakaba 1.26 my $code;
2248 wakaba 1.1 X: {
2249 wakaba 1.76 my $x_char = $self->{next_char};
2250 wakaba 1.1 !!!next-input-character;
2251 wakaba 1.76 if (0x0030 <= $self->{next_char} and
2252     $self->{next_char} <= 0x0039) { # 0..9
2253 wakaba 1.78 !!!cp (1002);
2254 wakaba 1.26 $code ||= 0;
2255     $code *= 0x10;
2256 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2257 wakaba 1.1 redo X;
2258 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
2259     $self->{next_char} <= 0x0066) { # a..f
2260 wakaba 1.78 !!!cp (1003);
2261 wakaba 1.26 $code ||= 0;
2262     $code *= 0x10;
2263 wakaba 1.76 $code += $self->{next_char} - 0x0060 + 9;
2264 wakaba 1.1 redo X;
2265 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
2266     $self->{next_char} <= 0x0046) { # A..F
2267 wakaba 1.78 !!!cp (1004);
2268 wakaba 1.26 $code ||= 0;
2269     $code *= 0x10;
2270 wakaba 1.76 $code += $self->{next_char} - 0x0040 + 9;
2271 wakaba 1.1 redo X;
2272 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2273 wakaba 1.78 !!!cp (1005);
2274 wakaba 1.112 !!!parse-error (type => 'bare hcro', line => $l, column => $c);
2275 wakaba 1.76 !!!back-next-input-character ($x_char, $self->{next_char});
2276     $self->{next_char} = 0x0023; # #
2277 wakaba 1.1 return undef;
2278 wakaba 1.76 } elsif ($self->{next_char} == 0x003B) { # ;
2279 wakaba 1.78 !!!cp (1006);
2280 wakaba 1.1 !!!next-input-character;
2281     } else {
2282 wakaba 1.78 !!!cp (1007);
2283 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2284 wakaba 1.1 }
2285    
2286 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2287 wakaba 1.78 !!!cp (1008);
2288 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2289 wakaba 1.26 $code = 0xFFFD;
2290     } elsif ($code > 0x10FFFF) {
2291 wakaba 1.78 !!!cp (1009);
2292 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2293 wakaba 1.26 $code = 0xFFFD;
2294     } elsif ($code == 0x000D) {
2295 wakaba 1.78 !!!cp (1010);
2296 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2297 wakaba 1.26 $code = 0x000A;
2298     } elsif (0x80 <= $code and $code <= 0x9F) {
2299 wakaba 1.78 !!!cp (1011);
2300 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2301 wakaba 1.26 $code = $c1_entity_char->{$code};
2302 wakaba 1.1 }
2303    
2304 wakaba 1.66 return {type => CHARACTER_TOKEN, data => chr $code,
2305 wakaba 1.112 has_reference => 1, line => $l, column => $c};
2306 wakaba 1.1 } # X
2307 wakaba 1.76 } elsif (0x0030 <= $self->{next_char} and
2308     $self->{next_char} <= 0x0039) { # 0..9
2309     my $code = $self->{next_char} - 0x0030;
2310 wakaba 1.1 !!!next-input-character;
2311    
2312 wakaba 1.76 while (0x0030 <= $self->{next_char} and
2313     $self->{next_char} <= 0x0039) { # 0..9
2314 wakaba 1.78 !!!cp (1012);
2315 wakaba 1.1 $code *= 10;
2316 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2317 wakaba 1.1
2318     !!!next-input-character;
2319     }
2320    
2321 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2322 wakaba 1.78 !!!cp (1013);
2323 wakaba 1.1 !!!next-input-character;
2324     } else {
2325 wakaba 1.78 !!!cp (1014);
2326 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2327 wakaba 1.1 }
2328    
2329 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2330 wakaba 1.78 !!!cp (1015);
2331 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2332 wakaba 1.26 $code = 0xFFFD;
2333     } elsif ($code > 0x10FFFF) {
2334 wakaba 1.78 !!!cp (1016);
2335 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2336 wakaba 1.26 $code = 0xFFFD;
2337     } elsif ($code == 0x000D) {
2338 wakaba 1.78 !!!cp (1017);
2339 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2340 wakaba 1.26 $code = 0x000A;
2341 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2342 wakaba 1.78 !!!cp (1018);
2343 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2344 wakaba 1.4 $code = $c1_entity_char->{$code};
2345 wakaba 1.1 }
2346    
2347 wakaba 1.112 return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
2348     line => $l, column => $c};
2349 wakaba 1.1 } else {
2350 wakaba 1.78 !!!cp (1019);
2351 wakaba 1.112 !!!parse-error (type => 'bare nero', line => $l, column => $c);
2352 wakaba 1.76 !!!back-next-input-character ($self->{next_char});
2353     $self->{next_char} = 0x0023; # #
2354 wakaba 1.1 return undef;
2355     }
2356 wakaba 1.76 } elsif ((0x0041 <= $self->{next_char} and
2357     $self->{next_char} <= 0x005A) or
2358     (0x0061 <= $self->{next_char} and
2359     $self->{next_char} <= 0x007A)) {
2360     my $entity_name = chr $self->{next_char};
2361 wakaba 1.1 !!!next-input-character;
2362    
2363     my $value = $entity_name;
2364 wakaba 1.37 my $match = 0;
2365 wakaba 1.16 require Whatpm::_NamedEntityList;
2366     our $EntityChar;
2367 wakaba 1.1
2368     while (length $entity_name < 10 and
2369     ## NOTE: Some number greater than the maximum length of entity name
2370 wakaba 1.76 ((0x0041 <= $self->{next_char} and # a
2371     $self->{next_char} <= 0x005A) or # x
2372     (0x0061 <= $self->{next_char} and # a
2373     $self->{next_char} <= 0x007A) or # z
2374     (0x0030 <= $self->{next_char} and # 0
2375     $self->{next_char} <= 0x0039) or # 9
2376     $self->{next_char} == 0x003B)) { # ;
2377     $entity_name .= chr $self->{next_char};
2378 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2379 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2380 wakaba 1.78 !!!cp (1020);
2381 wakaba 1.26 $value = $EntityChar->{$entity_name};
2382 wakaba 1.16 $match = 1;
2383     !!!next-input-character;
2384     last;
2385 wakaba 1.37 } else {
2386 wakaba 1.78 !!!cp (1021);
2387 wakaba 1.26 $value = $EntityChar->{$entity_name};
2388     $match = -1;
2389 wakaba 1.37 !!!next-input-character;
2390 wakaba 1.16 }
2391 wakaba 1.1 } else {
2392 wakaba 1.78 !!!cp (1022);
2393 wakaba 1.76 $value .= chr $self->{next_char};
2394 wakaba 1.37 $match *= 2;
2395     !!!next-input-character;
2396 wakaba 1.1 }
2397     }
2398    
2399 wakaba 1.16 if ($match > 0) {
2400 wakaba 1.78 !!!cp (1023);
2401 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2402     line => $l, column => $c};
2403 wakaba 1.16 } elsif ($match < 0) {
2404 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2405 wakaba 1.37 if ($in_attr and $match < -1) {
2406 wakaba 1.78 !!!cp (1024);
2407 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
2408     line => $l, column => $c};
2409 wakaba 1.37 } else {
2410 wakaba 1.78 !!!cp (1025);
2411 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2412     line => $l, column => $c};
2413 wakaba 1.37 }
2414 wakaba 1.1 } else {
2415 wakaba 1.78 !!!cp (1026);
2416 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
2417 wakaba 1.66 ## NOTE: "No characters are consumed" in the spec.
2418 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$value,
2419     line => $l, column => $c};
2420 wakaba 1.1 }
2421     } else {
2422 wakaba 1.78 !!!cp (1027);
2423 wakaba 1.1 ## no characters are consumed
2424 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
2425 wakaba 1.1 return undef;
2426     }
2427     } # _tokenize_attempt_to_consume_an_entity
2428    
2429     sub _initialize_tree_constructor ($) {
2430     my $self = shift;
2431     ## NOTE: $self->{document} MUST be specified before this method is called
2432     $self->{document}->strict_error_checking (0);
2433     ## TODO: Turn mutation events off # MUST
2434     ## TODO: Turn loose Document option (manakai extension) on
2435 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2436 wakaba 1.1 } # _initialize_tree_constructor
2437    
2438     sub _terminate_tree_constructor ($) {
2439     my $self = shift;
2440     $self->{document}->strict_error_checking (1);
2441     ## TODO: Turn mutation events on
2442     } # _terminate_tree_constructor
2443    
2444     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2445    
2446 wakaba 1.3 { # tree construction stage
2447     my $token;
2448    
2449 wakaba 1.1 sub _construct_tree ($) {
2450     my ($self) = @_;
2451    
2452     ## When an interactive UA render the $self->{document} available
2453     ## to the user, or when it begin accepting user input, are
2454     ## not defined.
2455    
2456     ## Append a character: collect it and all subsequent consecutive
2457     ## characters and insert one Text node whose data is concatenation
2458     ## of all those characters. # MUST
2459    
2460     !!!next-token;
2461    
2462 wakaba 1.3 undef $self->{form_element};
2463     undef $self->{head_element};
2464     $self->{open_elements} = [];
2465     undef $self->{inner_html_node};
2466    
2467 wakaba 1.84 ## NOTE: The "initial" insertion mode.
2468 wakaba 1.3 $self->_tree_construction_initial; # MUST
2469 wakaba 1.84
2470     ## NOTE: The "before html" insertion mode.
2471 wakaba 1.3 $self->_tree_construction_root_element;
2472 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
2473    
2474     ## NOTE: The "before head" insertion mode and so on.
2475 wakaba 1.3 $self->_tree_construction_main;
2476     } # _construct_tree
2477    
2478     sub _tree_construction_initial ($) {
2479     my $self = shift;
2480 wakaba 1.84
2481     ## NOTE: "initial" insertion mode
2482    
2483 wakaba 1.18 INITIAL: {
2484 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2485 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2486     ## error, switch to a conformance checking mode for another
2487     ## language.
2488     my $doctype_name = $token->{name};
2489     $doctype_name = '' unless defined $doctype_name;
2490     $doctype_name =~ tr/a-z/A-Z/;
2491     if (not defined $token->{name} or # <!DOCTYPE>
2492     defined $token->{public_identifier} or
2493     defined $token->{system_identifier}) {
2494 wakaba 1.79 !!!cp ('t1');
2495 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
2496 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
2497 wakaba 1.79 !!!cp ('t2');
2498 wakaba 1.18 ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2499 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
2500 wakaba 1.79 } else {
2501     !!!cp ('t3');
2502 wakaba 1.18 }
2503    
2504     my $doctype = $self->{document}->create_document_type_definition
2505     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2506     $doctype->public_id ($token->{public_identifier})
2507     if defined $token->{public_identifier};
2508     $doctype->system_id ($token->{system_identifier})
2509     if defined $token->{system_identifier};
2510     ## NOTE: Other DocumentType attributes are null or empty lists.
2511     ## ISSUE: internalSubset = null??
2512     $self->{document}->append_child ($doctype);
2513    
2514 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
2515 wakaba 1.79 !!!cp ('t4');
2516 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2517     } elsif (defined $token->{public_identifier}) {
2518     my $pubid = $token->{public_identifier};
2519     $pubid =~ tr/a-z/A-z/;
2520     if ({
2521     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2522     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2523     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2524     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2525     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2526     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2527     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2528     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2529     "-//IETF//DTD HTML 2.0//EN" => 1,
2530     "-//IETF//DTD HTML 2.1E//EN" => 1,
2531     "-//IETF//DTD HTML 3.0//EN" => 1,
2532     "-//IETF//DTD HTML 3.0//EN//" => 1,
2533     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2534     "-//IETF//DTD HTML 3.2//EN" => 1,
2535     "-//IETF//DTD HTML 3//EN" => 1,
2536     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2537     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2538     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2539     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2540     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2541     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2542     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2543     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2544     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2545     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2546     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2547     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2548     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2549     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2550     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2551     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2552     "-//IETF//DTD HTML STRICT//EN" => 1,
2553     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2554     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2555     "-//IETF//DTD HTML//EN" => 1,
2556     "-//IETF//DTD HTML//EN//2.0" => 1,
2557     "-//IETF//DTD HTML//EN//3.0" => 1,
2558     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2559     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2560     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2561     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2562     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2563     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2564     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2565     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2566     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2567     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2568     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2569 wakaba 1.72 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//EN" => 1,
2570     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//EN" => 1,
2571     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//EN" => 1,
2572 wakaba 1.18 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2573     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2574     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2575     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2576     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2577     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2578     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2579     "-//W3C//DTD HTML 3.2//EN" => 1,
2580     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2581     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2582     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2583     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2584     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2585     "-//W3C//DTD W3 HTML//EN" => 1,
2586     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2587     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2588     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2589     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2590     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2591     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2592     "HTML" => 1,
2593     }->{$pubid}) {
2594 wakaba 1.79 !!!cp ('t5');
2595 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2596     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2597     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2598     if (defined $token->{system_identifier}) {
2599 wakaba 1.79 !!!cp ('t6');
2600 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2601     } else {
2602 wakaba 1.79 !!!cp ('t7');
2603 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
2604 wakaba 1.3 }
2605 wakaba 1.80 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 FRAMESET//EN" or
2606     $pubid eq "-//W3C//DTD XHTML 1.0 TRANSITIONAL//EN") {
2607 wakaba 1.79 !!!cp ('t8');
2608 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
2609 wakaba 1.79 } else {
2610     !!!cp ('t9');
2611 wakaba 1.18 }
2612 wakaba 1.79 } else {
2613     !!!cp ('t10');
2614 wakaba 1.18 }
2615     if (defined $token->{system_identifier}) {
2616     my $sysid = $token->{system_identifier};
2617     $sysid =~ tr/A-Z/a-z/;
2618     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2619 wakaba 1.80 ## TODO: Check the spec: PUBLIC "(limited quirks)" "(quirks)"
2620 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2621 wakaba 1.79 !!!cp ('t11');
2622     } else {
2623     !!!cp ('t12');
2624 wakaba 1.18 }
2625 wakaba 1.79 } else {
2626     !!!cp ('t13');
2627 wakaba 1.18 }
2628    
2629 wakaba 1.84 ## Go to the "before html" insertion mode.
2630 wakaba 1.18 !!!next-token;
2631     return;
2632     } elsif ({
2633 wakaba 1.55 START_TAG_TOKEN, 1,
2634     END_TAG_TOKEN, 1,
2635     END_OF_FILE_TOKEN, 1,
2636 wakaba 1.18 }->{$token->{type}}) {
2637 wakaba 1.79 !!!cp ('t14');
2638 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
2639 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2640 wakaba 1.84 ## Go to the "before html" insertion mode.
2641 wakaba 1.18 ## reprocess
2642     return;
2643 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
2644 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2645     ## Ignore the token
2646 wakaba 1.26
2647 wakaba 1.18 unless (length $token->{data}) {
2648 wakaba 1.79 !!!cp ('t15');
2649 wakaba 1.84 ## Stay in the insertion mode.
2650 wakaba 1.18 !!!next-token;
2651     redo INITIAL;
2652 wakaba 1.79 } else {
2653     !!!cp ('t16');
2654 wakaba 1.3 }
2655 wakaba 1.79 } else {
2656     !!!cp ('t17');
2657 wakaba 1.3 }
2658 wakaba 1.18
2659 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
2660 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2661 wakaba 1.84 ## Go to the "before html" insertion mode.
2662 wakaba 1.18 ## reprocess
2663     return;
2664 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
2665 wakaba 1.79 !!!cp ('t18');
2666 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
2667     $self->{document}->append_child ($comment);
2668    
2669 wakaba 1.84 ## Stay in the insertion mode.
2670 wakaba 1.18 !!!next-token;
2671     redo INITIAL;
2672     } else {
2673 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
2674 wakaba 1.18 }
2675     } # INITIAL
2676 wakaba 1.79
2677     die "$0: _tree_construction_initial: This should be never reached";
2678 wakaba 1.3 } # _tree_construction_initial
2679    
2680     sub _tree_construction_root_element ($) {
2681     my $self = shift;
2682 wakaba 1.84
2683     ## NOTE: "before html" insertion mode.
2684 wakaba 1.3
2685     B: {
2686 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2687 wakaba 1.79 !!!cp ('t19');
2688 wakaba 1.113 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
2689 wakaba 1.3 ## Ignore the token
2690 wakaba 1.84 ## Stay in the insertion mode.
2691 wakaba 1.3 !!!next-token;
2692     redo B;
2693 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
2694 wakaba 1.79 !!!cp ('t20');
2695 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
2696     $self->{document}->append_child ($comment);
2697 wakaba 1.84 ## Stay in the insertion mode.
2698 wakaba 1.3 !!!next-token;
2699     redo B;
2700 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
2701 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2702     ## Ignore the token.
2703    
2704 wakaba 1.3 unless (length $token->{data}) {
2705 wakaba 1.79 !!!cp ('t21');
2706 wakaba 1.84 ## Stay in the insertion mode.
2707 wakaba 1.3 !!!next-token;
2708     redo B;
2709 wakaba 1.79 } else {
2710     !!!cp ('t22');
2711 wakaba 1.3 }
2712 wakaba 1.79 } else {
2713     !!!cp ('t23');
2714 wakaba 1.3 }
2715 wakaba 1.61
2716     $self->{application_cache_selection}->(undef);
2717    
2718     #
2719     } elsif ($token->{type} == START_TAG_TOKEN) {
2720 wakaba 1.84 if ($token->{tag_name} eq 'html') {
2721     my $root_element;
2722 wakaba 1.116 !!!create-element ($root_element, $token->{tag_name}, $token->{attributes}, $token);
2723 wakaba 1.84 $self->{document}->append_child ($root_element);
2724     push @{$self->{open_elements}}, [$root_element, 'html'];
2725    
2726     if ($token->{attributes}->{manifest}) {
2727     !!!cp ('t24');
2728     $self->{application_cache_selection}
2729     ->($token->{attributes}->{manifest}->{value});
2730     ## ISSUE: No relative reference resolution?
2731     } else {
2732     !!!cp ('t25');
2733     $self->{application_cache_selection}->(undef);
2734     }
2735    
2736     !!!next-token;
2737     return; ## Go to the "before head" insertion mode.
2738 wakaba 1.61 } else {
2739 wakaba 1.84 !!!cp ('t25.1');
2740     #
2741 wakaba 1.61 }
2742 wakaba 1.3 } elsif ({
2743 wakaba 1.55 END_TAG_TOKEN, 1,
2744     END_OF_FILE_TOKEN, 1,
2745 wakaba 1.3 }->{$token->{type}}) {
2746 wakaba 1.79 !!!cp ('t26');
2747 wakaba 1.3 #
2748     } else {
2749 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
2750 wakaba 1.3 }
2751 wakaba 1.61
2752 wakaba 1.116 my $root_element; !!!create-element ($root_element, 'html',, $token);
2753 wakaba 1.84 $self->{document}->append_child ($root_element);
2754     push @{$self->{open_elements}}, [$root_element, 'html'];
2755    
2756     $self->{application_cache_selection}->(undef);
2757    
2758     ## NOTE: Reprocess the token.
2759     return; ## Go to the "before head" insertion mode.
2760    
2761     ## ISSUE: There is an issue in the spec
2762 wakaba 1.3 } # B
2763 wakaba 1.79
2764     die "$0: _tree_construction_root_element: This should never be reached";
2765 wakaba 1.3 } # _tree_construction_root_element
2766    
2767     sub _reset_insertion_mode ($) {
2768     my $self = shift;
2769    
2770     ## Step 1
2771     my $last;
2772    
2773     ## Step 2
2774     my $i = -1;
2775     my $node = $self->{open_elements}->[$i];
2776    
2777     ## Step 3
2778     S3: {
2779 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
2780     $last = 1;
2781     if (defined $self->{inner_html_node}) {
2782     if ($self->{inner_html_node}->[1] eq 'td' or
2783     $self->{inner_html_node}->[1] eq 'th') {
2784 wakaba 1.79 !!!cp ('t27');
2785 wakaba 1.29 #
2786     } else {
2787 wakaba 1.79 !!!cp ('t28');
2788 wakaba 1.29 $node = $self->{inner_html_node};
2789     }
2790 wakaba 1.3 }
2791     }
2792    
2793     ## Step 4..13
2794     my $new_mode = {
2795 wakaba 1.54 select => IN_SELECT_IM,
2796 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
2797     ## insertion mode to "in select" by themselves.
2798 wakaba 1.54 td => IN_CELL_IM,
2799     th => IN_CELL_IM,
2800     tr => IN_ROW_IM,
2801     tbody => IN_TABLE_BODY_IM,
2802     thead => IN_TABLE_BODY_IM,
2803     tfoot => IN_TABLE_BODY_IM,
2804     caption => IN_CAPTION_IM,
2805     colgroup => IN_COLUMN_GROUP_IM,
2806     table => IN_TABLE_IM,
2807     head => IN_BODY_IM, # not in head!
2808     body => IN_BODY_IM,
2809     frameset => IN_FRAMESET_IM,
2810 wakaba 1.3 }->{$node->[1]};
2811     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2812    
2813     ## Step 14
2814     if ($node->[1] eq 'html') {
2815     unless (defined $self->{head_element}) {
2816 wakaba 1.79 !!!cp ('t29');
2817 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
2818 wakaba 1.3 } else {
2819 wakaba 1.81 ## ISSUE: Can this state be reached?
2820 wakaba 1.79 !!!cp ('t30');
2821 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2822 wakaba 1.3 }
2823     return;
2824 wakaba 1.79 } else {
2825     !!!cp ('t31');
2826 wakaba 1.3 }
2827    
2828     ## Step 15
2829 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
2830 wakaba 1.3
2831     ## Step 16
2832     $i--;
2833     $node = $self->{open_elements}->[$i];
2834    
2835     ## Step 17
2836     redo S3;
2837     } # S3
2838 wakaba 1.79
2839     die "$0: _reset_insertion_mode: This line should never be reached";
2840 wakaba 1.3 } # _reset_insertion_mode
2841    
2842     sub _tree_construction_main ($) {
2843     my $self = shift;
2844    
2845 wakaba 1.1 my $active_formatting_elements = [];
2846    
2847     my $reconstruct_active_formatting_elements = sub { # MUST
2848     my $insert = shift;
2849    
2850     ## Step 1
2851     return unless @$active_formatting_elements;
2852    
2853     ## Step 3
2854     my $i = -1;
2855     my $entry = $active_formatting_elements->[$i];
2856    
2857     ## Step 2
2858     return if $entry->[0] eq '#marker';
2859 wakaba 1.3 for (@{$self->{open_elements}}) {
2860 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2861 wakaba 1.79 !!!cp ('t32');
2862 wakaba 1.1 return;
2863     }
2864     }
2865    
2866     S4: {
2867     ## Step 4
2868     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2869    
2870     ## Step 5
2871     $i--;
2872     $entry = $active_formatting_elements->[$i];
2873    
2874     ## Step 6
2875     if ($entry->[0] eq '#marker') {
2876 wakaba 1.81 !!!cp ('t33_1');
2877 wakaba 1.1 #
2878     } else {
2879     my $in_open_elements;
2880 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
2881 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2882 wakaba 1.79 !!!cp ('t33');
2883 wakaba 1.1 $in_open_elements = 1;
2884     last OE;
2885     }
2886     }
2887     if ($in_open_elements) {
2888 wakaba 1.79 !!!cp ('t34');
2889 wakaba 1.1 #
2890     } else {
2891 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
2892 wakaba 1.79 !!!cp ('t35');
2893 wakaba 1.1 redo S4;
2894     }
2895     }
2896    
2897     ## Step 7
2898     $i++;
2899     $entry = $active_formatting_elements->[$i];
2900     } # S4
2901    
2902     S7: {
2903     ## Step 8
2904     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2905    
2906     ## Step 9
2907     $insert->($clone->[0]);
2908 wakaba 1.3 push @{$self->{open_elements}}, $clone;
2909 wakaba 1.1
2910     ## Step 10
2911 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2912 wakaba 1.1
2913     ## Step 11
2914     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2915 wakaba 1.79 !!!cp ('t36');
2916 wakaba 1.1 ## Step 7'
2917     $i++;
2918     $entry = $active_formatting_elements->[$i];
2919    
2920     redo S7;
2921     }
2922 wakaba 1.79
2923     !!!cp ('t37');
2924 wakaba 1.1 } # S7
2925     }; # $reconstruct_active_formatting_elements
2926    
2927     my $clear_up_to_marker = sub {
2928     for (reverse 0..$#$active_formatting_elements) {
2929     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2930 wakaba 1.79 !!!cp ('t38');
2931 wakaba 1.1 splice @$active_formatting_elements, $_;
2932     return;
2933     }
2934     }
2935 wakaba 1.79
2936     !!!cp ('t39');
2937 wakaba 1.1 }; # $clear_up_to_marker
2938    
2939 wakaba 1.96 my $insert;
2940    
2941     my $parse_rcdata = sub ($) {
2942     my ($content_model_flag) = @_;
2943 wakaba 1.25
2944     ## Step 1
2945     my $start_tag_name = $token->{tag_name};
2946     my $el;
2947 wakaba 1.116 !!!create-element ($el, $start_tag_name, $token->{attributes}, $token);
2948 wakaba 1.25
2949     ## Step 2
2950 wakaba 1.96 $insert->($el);
2951 wakaba 1.25
2952     ## Step 3
2953 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
2954 wakaba 1.13 delete $self->{escape}; # MUST
2955 wakaba 1.25
2956     ## Step 4
2957 wakaba 1.1 my $text = '';
2958     !!!next-token;
2959 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
2960 wakaba 1.79 !!!cp ('t40');
2961 wakaba 1.1 $text .= $token->{data};
2962     !!!next-token;
2963 wakaba 1.25 }
2964    
2965     ## Step 5
2966 wakaba 1.1 if (length $text) {
2967 wakaba 1.79 !!!cp ('t41');
2968 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
2969     $el->append_child ($text);
2970 wakaba 1.1 }
2971 wakaba 1.25
2972     ## Step 6
2973 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
2974 wakaba 1.25
2975     ## Step 7
2976 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
2977     $token->{tag_name} eq $start_tag_name) {
2978     !!!cp ('t42');
2979 wakaba 1.1 ## Ignore the token
2980     } else {
2981 wakaba 1.96 ## NOTE: An end-of-file token.
2982     if ($content_model_flag == CDATA_CONTENT_MODEL) {
2983     !!!cp ('t43');
2984 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
2985 wakaba 1.96 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
2986     !!!cp ('t44');
2987 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
2988 wakaba 1.96 } else {
2989     die "$0: $content_model_flag in parse_rcdata";
2990     }
2991 wakaba 1.1 }
2992     !!!next-token;
2993 wakaba 1.25 }; # $parse_rcdata
2994 wakaba 1.1
2995 wakaba 1.96 my $script_start_tag = sub () {
2996 wakaba 1.1 my $script_el;
2997 wakaba 1.116 !!!create-element ($script_el, 'script', $token->{attributes}, $token);
2998 wakaba 1.1 ## TODO: mark as "parser-inserted"
2999    
3000 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
3001 wakaba 1.13 delete $self->{escape}; # MUST
3002 wakaba 1.1
3003     my $text = '';
3004     !!!next-token;
3005 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
3006 wakaba 1.79 !!!cp ('t45');
3007 wakaba 1.1 $text .= $token->{data};
3008     !!!next-token;
3009     } # stop if non-character token or tokenizer stops tokenising
3010     if (length $text) {
3011 wakaba 1.79 !!!cp ('t46');
3012 wakaba 1.1 $script_el->manakai_append_text ($text);
3013     }
3014    
3015 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3016 wakaba 1.1
3017 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
3018 wakaba 1.1 $token->{tag_name} eq 'script') {
3019 wakaba 1.79 !!!cp ('t47');
3020 wakaba 1.1 ## Ignore the token
3021     } else {
3022 wakaba 1.79 !!!cp ('t48');
3023 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3024 wakaba 1.1 ## ISSUE: And ignore?
3025     ## TODO: mark as "already executed"
3026     }
3027    
3028 wakaba 1.3 if (defined $self->{inner_html_node}) {
3029 wakaba 1.79 !!!cp ('t49');
3030 wakaba 1.3 ## TODO: mark as "already executed"
3031     } else {
3032 wakaba 1.79 !!!cp ('t50');
3033 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3034     ## TODO: insertion point = just before the next input character
3035 wakaba 1.25
3036     $insert->($script_el);
3037 wakaba 1.1
3038     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3039    
3040     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3041     }
3042    
3043     !!!next-token;
3044     }; # $script_start_tag
3045    
3046 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
3047     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
3048     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
3049    
3050 wakaba 1.1 my $formatting_end_tag = sub {
3051 wakaba 1.113 my $end_tag_token = shift;
3052     my $tag_name = $end_tag_token->{tag_name};
3053 wakaba 1.1
3054 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
3055 wakaba 1.102
3056 wakaba 1.1 FET: {
3057     ## Step 1
3058     my $formatting_element;
3059     my $formatting_element_i_in_active;
3060     AFE: for (reverse 0..$#$active_formatting_elements) {
3061     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
3062 wakaba 1.79 !!!cp ('t51');
3063 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
3064     $formatting_element_i_in_active = $_;
3065     last AFE;
3066     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
3067 wakaba 1.79 !!!cp ('t52');
3068 wakaba 1.1 last AFE;
3069     }
3070     } # AFE
3071     unless (defined $formatting_element) {
3072 wakaba 1.79 !!!cp ('t53');
3073 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$tag_name, token => $end_tag_token);
3074 wakaba 1.1 ## Ignore the token
3075     !!!next-token;
3076     return;
3077     }
3078     ## has an element in scope
3079     my $in_scope = 1;
3080     my $formatting_element_i_in_open;
3081 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3082     my $node = $self->{open_elements}->[$_];
3083 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3084     if ($in_scope) {
3085 wakaba 1.79 !!!cp ('t54');
3086 wakaba 1.1 $formatting_element_i_in_open = $_;
3087     last INSCOPE;
3088     } else { # in open elements but not in scope
3089 wakaba 1.79 !!!cp ('t55');
3090 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3091     token => $end_tag_token);
3092 wakaba 1.1 ## Ignore the token
3093     !!!next-token;
3094     return;
3095     }
3096     } elsif ({
3097 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
3098 wakaba 1.1 button => 1, marquee => 1, object => 1, html => 1,
3099     }->{$node->[1]}) {
3100 wakaba 1.79 !!!cp ('t56');
3101 wakaba 1.1 $in_scope = 0;
3102     }
3103     } # INSCOPE
3104     unless (defined $formatting_element_i_in_open) {
3105 wakaba 1.79 !!!cp ('t57');
3106 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3107     token => $end_tag_token);
3108 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3109     !!!next-token; ## TODO: ok?
3110     return;
3111     }
3112 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3113 wakaba 1.79 !!!cp ('t58');
3114 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1],
3115     token => $end_tag_token);
3116 wakaba 1.1 }
3117    
3118     ## Step 2
3119     my $furthest_block;
3120     my $furthest_block_i_in_open;
3121 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3122     my $node = $self->{open_elements}->[$_];
3123 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
3124     #not $phrasing_category->{$node->[1]} and
3125     ($special_category->{$node->[1]} or
3126 wakaba 1.103 $scoping_category->{$node->[1]})) { ## Scoping is redundant, maybe
3127 wakaba 1.79 !!!cp ('t59');
3128 wakaba 1.1 $furthest_block = $node;
3129     $furthest_block_i_in_open = $_;
3130     } elsif ($node->[0] eq $formatting_element->[0]) {
3131 wakaba 1.79 !!!cp ('t60');
3132 wakaba 1.1 last OE;
3133     }
3134     } # OE
3135    
3136     ## Step 3
3137     unless (defined $furthest_block) { # MUST
3138 wakaba 1.79 !!!cp ('t61');
3139 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3140 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3141     !!!next-token;
3142     return;
3143     }
3144    
3145     ## Step 4
3146 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3147 wakaba 1.1
3148     ## Step 5
3149     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3150     if (defined $furthest_block_parent) {
3151 wakaba 1.79 !!!cp ('t62');
3152 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
3153     }
3154    
3155     ## Step 6
3156     my $bookmark_prev_el
3157     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3158     ->[0];
3159    
3160     ## Step 7
3161     my $node = $furthest_block;
3162     my $node_i_in_open = $furthest_block_i_in_open;
3163     my $last_node = $furthest_block;
3164     S7: {
3165     ## Step 1
3166     $node_i_in_open--;
3167 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3168 wakaba 1.1
3169     ## Step 2
3170     my $node_i_in_active;
3171     S7S2: {
3172     for (reverse 0..$#$active_formatting_elements) {
3173     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3174 wakaba 1.79 !!!cp ('t63');
3175 wakaba 1.1 $node_i_in_active = $_;
3176     last S7S2;
3177     }
3178     }
3179 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3180 wakaba 1.1 redo S7;
3181     } # S7S2
3182    
3183     ## Step 3
3184     last S7 if $node->[0] eq $formatting_element->[0];
3185    
3186     ## Step 4
3187     if ($last_node->[0] eq $furthest_block->[0]) {
3188 wakaba 1.79 !!!cp ('t64');
3189 wakaba 1.1 $bookmark_prev_el = $node->[0];
3190     }
3191    
3192     ## Step 5
3193     if ($node->[0]->has_child_nodes ()) {
3194 wakaba 1.79 !!!cp ('t65');
3195 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
3196     $active_formatting_elements->[$node_i_in_active] = $clone;
3197 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3198 wakaba 1.1 $node = $clone;
3199     }
3200    
3201     ## Step 6
3202     $node->[0]->append_child ($last_node->[0]);
3203    
3204     ## Step 7
3205     $last_node = $node;
3206    
3207     ## Step 8
3208     redo S7;
3209     } # S7
3210    
3211     ## Step 8
3212 wakaba 1.102 if ({
3213     table => 1, tbody => 1, tfoot => 1, thead => 1, tr => 1,
3214     }->{$common_ancestor_node->[1]}) {
3215     my $foster_parent_element;
3216     my $next_sibling;
3217     OE: for (reverse 0..$#{$self->{open_elements}}) {
3218     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3219     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3220     if (defined $parent and $parent->node_type == 1) {
3221     !!!cp ('t65.1');
3222     $foster_parent_element = $parent;
3223     $next_sibling = $self->{open_elements}->[$_]->[0];
3224     } else {
3225     !!!cp ('t65.2');
3226     $foster_parent_element
3227     = $self->{open_elements}->[$_ - 1]->[0];
3228     }
3229     last OE;
3230     }
3231     } # OE
3232     $foster_parent_element = $self->{open_elements}->[0]->[0]
3233     unless defined $foster_parent_element;
3234     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
3235     $open_tables->[-1]->[1] = 1; # tainted
3236     } else {
3237     !!!cp ('t65.3');
3238     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3239     }
3240 wakaba 1.1
3241     ## Step 9
3242     my $clone = [$formatting_element->[0]->clone_node (0),
3243     $formatting_element->[1]];
3244    
3245     ## Step 10
3246     my @cn = @{$furthest_block->[0]->child_nodes};
3247     $clone->[0]->append_child ($_) for @cn;
3248    
3249     ## Step 11
3250     $furthest_block->[0]->append_child ($clone->[0]);
3251    
3252     ## Step 12
3253     my $i;
3254     AFE: for (reverse 0..$#$active_formatting_elements) {
3255     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3256 wakaba 1.79 !!!cp ('t66');
3257 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
3258     $i-- and last AFE if defined $i;
3259     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3260 wakaba 1.79 !!!cp ('t67');
3261 wakaba 1.1 $i = $_;
3262     }
3263     } # AFE
3264     splice @$active_formatting_elements, $i + 1, 0, $clone;
3265    
3266     ## Step 13
3267     undef $i;
3268 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3269     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3270 wakaba 1.79 !!!cp ('t68');
3271 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
3272 wakaba 1.1 $i-- and last OE if defined $i;
3273 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3274 wakaba 1.79 !!!cp ('t69');
3275 wakaba 1.1 $i = $_;
3276     }
3277     } # OE
3278 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3279 wakaba 1.1
3280     ## Step 14
3281     redo FET;
3282     } # FET
3283     }; # $formatting_end_tag
3284    
3285 wakaba 1.96 $insert = my $insert_to_current = sub {
3286 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3287 wakaba 1.1 }; # $insert_to_current
3288    
3289     my $insert_to_foster = sub {
3290 wakaba 1.95 my $child = shift;
3291     if ({
3292     table => 1, tbody => 1, tfoot => 1, thead => 1, tr => 1,
3293     }->{$self->{open_elements}->[-1]->[1]}) {
3294     # MUST
3295     my $foster_parent_element;
3296     my $next_sibling;
3297 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3298     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3299     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3300 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3301 wakaba 1.79 !!!cp ('t70');
3302 wakaba 1.1 $foster_parent_element = $parent;
3303 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3304 wakaba 1.1 } else {
3305 wakaba 1.79 !!!cp ('t71');
3306 wakaba 1.1 $foster_parent_element
3307 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3308 wakaba 1.1 }
3309     last OE;
3310     }
3311     } # OE
3312 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3313 wakaba 1.1 unless defined $foster_parent_element;
3314     $foster_parent_element->insert_before
3315     ($child, $next_sibling);
3316 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
3317     } else {
3318     !!!cp ('t72');
3319     $self->{open_elements}->[-1]->[0]->append_child ($child);
3320     }
3321 wakaba 1.1 }; # $insert_to_foster
3322    
3323 wakaba 1.52 B: {
3324 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3325 wakaba 1.79 !!!cp ('t73');
3326 wakaba 1.113 !!!parse-error (type => 'DOCTYPE in the middle', token => $token);
3327 wakaba 1.52 ## Ignore the token
3328     ## Stay in the phase
3329     !!!next-token;
3330     redo B;
3331 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
3332 wakaba 1.52 $token->{tag_name} eq 'html') {
3333 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
3334 wakaba 1.79 !!!cp ('t79');
3335 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3336 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
3337     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
3338 wakaba 1.79 !!!cp ('t80');
3339 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3340 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
3341 wakaba 1.79 } else {
3342     !!!cp ('t81');
3343 wakaba 1.52 }
3344    
3345 wakaba 1.84 !!!cp ('t82');
3346 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
3347 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
3348     for my $attr_name (keys %{$token->{attributes}}) {
3349     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3350 wakaba 1.79 !!!cp ('t84');
3351 wakaba 1.52 $top_el->set_attribute_ns
3352     (undef, [undef, $attr_name],
3353     $token->{attributes}->{$attr_name}->{value});
3354     }
3355     }
3356     !!!next-token;
3357     redo B;
3358 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3359 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
3360 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
3361 wakaba 1.79 !!!cp ('t85');
3362 wakaba 1.52 $self->{document}->append_child ($comment);
3363 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
3364 wakaba 1.79 !!!cp ('t86');
3365 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
3366     } else {
3367 wakaba 1.79 !!!cp ('t87');
3368 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3369     }
3370     !!!next-token;
3371     redo B;
3372 wakaba 1.56 } elsif ($self->{insertion_mode} & HEAD_IMS) {
3373 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3374 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3375 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3376     !!!cp ('t88.2');
3377     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3378     } else {
3379     !!!cp ('t88.1');
3380     ## Ignore the token.
3381     !!!next-token;
3382     redo B;
3383     }
3384 wakaba 1.52 unless (length $token->{data}) {
3385 wakaba 1.79 !!!cp ('t88');
3386 wakaba 1.52 !!!next-token;
3387     redo B;
3388 wakaba 1.1 }
3389     }
3390 wakaba 1.52
3391 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3392 wakaba 1.79 !!!cp ('t89');
3393 wakaba 1.52 ## As if <head>
3394 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3395 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3396     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3397    
3398     ## Reprocess in the "in head" insertion mode...
3399     pop @{$self->{open_elements}};
3400    
3401     ## Reprocess in the "after head" insertion mode...
3402 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3403 wakaba 1.79 !!!cp ('t90');
3404 wakaba 1.52 ## As if </noscript>
3405     pop @{$self->{open_elements}};
3406 wakaba 1.113 !!!parse-error (type => 'in noscript:#character', token => $token);
3407 wakaba 1.1
3408 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
3409     ## As if </head>
3410     pop @{$self->{open_elements}};
3411    
3412     ## Reprocess in the "after head" insertion mode...
3413 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3414 wakaba 1.79 !!!cp ('t91');
3415 wakaba 1.52 pop @{$self->{open_elements}};
3416    
3417     ## Reprocess in the "after head" insertion mode...
3418 wakaba 1.79 } else {
3419     !!!cp ('t92');
3420 wakaba 1.1 }
3421 wakaba 1.52
3422     ## "after head" insertion mode
3423     ## As if <body>
3424 wakaba 1.116 !!!insert-element ('body',, $token);
3425 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3426 wakaba 1.52 ## reprocess
3427     redo B;
3428 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
3429 wakaba 1.52 if ($token->{tag_name} eq 'head') {
3430 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3431 wakaba 1.79 !!!cp ('t93');
3432 wakaba 1.116 !!!create-element ($self->{head_element}, $token->{tag_name}, $token->{attributes}, $token);
3433 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3434     push @{$self->{open_elements}}, [$self->{head_element}, $token->{tag_name}];
3435 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3436 wakaba 1.52 !!!next-token;
3437     redo B;
3438 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3439 wakaba 1.79 !!!cp ('t94');
3440 wakaba 1.54 #
3441     } else {
3442 wakaba 1.79 !!!cp ('t95');
3443 wakaba 1.113 !!!parse-error (type => 'in head:head', token => $token); # or in head noscript
3444 wakaba 1.52 ## Ignore the token
3445     !!!next-token;
3446     redo B;
3447     }
3448 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3449 wakaba 1.79 !!!cp ('t96');
3450 wakaba 1.52 ## As if <head>
3451 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3452 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3453     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3454    
3455 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3456 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
3457 wakaba 1.79 } else {
3458     !!!cp ('t97');
3459 wakaba 1.1 }
3460 wakaba 1.52
3461 wakaba 1.49 if ($token->{tag_name} eq 'base') {
3462 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3463 wakaba 1.79 !!!cp ('t98');
3464 wakaba 1.49 ## As if </noscript>
3465     pop @{$self->{open_elements}};
3466 wakaba 1.113 !!!parse-error (type => 'in noscript:base', token => $token);
3467 wakaba 1.49
3468 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3469 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3470 wakaba 1.79 } else {
3471     !!!cp ('t99');
3472 wakaba 1.49 }
3473    
3474     ## NOTE: There is a "as if in head" code clone.
3475 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3476 wakaba 1.79 !!!cp ('t100');
3477 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3478 wakaba 1.49 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3479 wakaba 1.79 } else {
3480     !!!cp ('t101');
3481 wakaba 1.49 }
3482 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3483 wakaba 1.49 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3484 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3485 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3486 wakaba 1.49 !!!next-token;
3487     redo B;
3488     } elsif ($token->{tag_name} eq 'link') {
3489 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3490 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3491 wakaba 1.79 !!!cp ('t102');
3492 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3493 wakaba 1.25 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3494 wakaba 1.79 } else {
3495     !!!cp ('t103');
3496 wakaba 1.25 }
3497 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3498 wakaba 1.25 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3499 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3500 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3501 wakaba 1.1 !!!next-token;
3502 wakaba 1.25 redo B;
3503 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
3504     ## NOTE: There is a "as if in head" code clone.
3505 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3506 wakaba 1.79 !!!cp ('t104');
3507 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3508 wakaba 1.34 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3509 wakaba 1.79 } else {
3510     !!!cp ('t105');
3511 wakaba 1.34 }
3512 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3513 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3514 wakaba 1.34
3515     unless ($self->{confident}) {
3516     if ($token->{attributes}->{charset}) { ## TODO: And if supported
3517 wakaba 1.79 !!!cp ('t106');
3518 wakaba 1.63 $self->{change_encoding}
3519 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
3520     $token);
3521 wakaba 1.66
3522     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
3523     ->set_user_data (manakai_has_reference =>
3524     $token->{attributes}->{charset}
3525     ->{has_reference});
3526 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
3527 wakaba 1.35 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
3528 wakaba 1.63 if ($token->{attributes}->{content}->{value}
3529 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
3530     [\x09-\x0D\x20]*=
3531 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
3532     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
3533 wakaba 1.79 !!!cp ('t107');
3534 wakaba 1.63 $self->{change_encoding}
3535 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
3536     $token);
3537 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
3538     ->set_user_data (manakai_has_reference =>
3539     $token->{attributes}->{content}
3540     ->{has_reference});
3541 wakaba 1.79 } else {
3542     !!!cp ('t108');
3543 wakaba 1.63 }
3544 wakaba 1.34 }
3545 wakaba 1.66 } else {
3546     if ($token->{attributes}->{charset}) {
3547 wakaba 1.79 !!!cp ('t109');
3548 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
3549     ->set_user_data (manakai_has_reference =>
3550     $token->{attributes}->{charset}
3551     ->{has_reference});
3552     }
3553 wakaba 1.68 if ($token->{attributes}->{content}) {
3554 wakaba 1.79 !!!cp ('t110');
3555 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
3556     ->set_user_data (manakai_has_reference =>
3557     $token->{attributes}->{content}
3558     ->{has_reference});
3559     }
3560 wakaba 1.34 }
3561    
3562 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3563 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3564 wakaba 1.34 !!!next-token;
3565     redo B;
3566 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
3567 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3568 wakaba 1.79 !!!cp ('t111');
3569 wakaba 1.49 ## As if </noscript>
3570     pop @{$self->{open_elements}};
3571 wakaba 1.113 !!!parse-error (type => 'in noscript:title', token => $token);
3572 wakaba 1.49
3573 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3574 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3575 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3576 wakaba 1.79 !!!cp ('t112');
3577 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3578 wakaba 1.25 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3579 wakaba 1.79 } else {
3580     !!!cp ('t113');
3581 wakaba 1.25 }
3582 wakaba 1.49
3583     ## NOTE: There is a "as if in head" code clone.
3584 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
3585     : $self->{open_elements}->[-1]->[0];
3586 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
3587 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3588 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3589 wakaba 1.25 redo B;
3590     } elsif ($token->{tag_name} eq 'style') {
3591     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
3592 wakaba 1.54 ## insertion mode IN_HEAD_IM)
3593 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3594 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3595 wakaba 1.79 !!!cp ('t114');
3596 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3597 wakaba 1.25 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3598 wakaba 1.79 } else {
3599     !!!cp ('t115');
3600 wakaba 1.25 }
3601 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
3602 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3603 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3604 wakaba 1.25 redo B;
3605     } elsif ($token->{tag_name} eq 'noscript') {
3606 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
3607 wakaba 1.79 !!!cp ('t116');
3608 wakaba 1.25 ## NOTE: and scripting is disalbed
3609 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3610 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
3611 wakaba 1.1 !!!next-token;
3612 wakaba 1.25 redo B;
3613 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3614 wakaba 1.79 !!!cp ('t117');
3615 wakaba 1.113 !!!parse-error (type => 'in noscript:noscript', token => $token);
3616 wakaba 1.1 ## Ignore the token
3617 wakaba 1.41 !!!next-token;
3618 wakaba 1.25 redo B;
3619 wakaba 1.1 } else {
3620 wakaba 1.79 !!!cp ('t118');
3621 wakaba 1.25 #
3622 wakaba 1.1 }
3623 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
3624 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3625 wakaba 1.79 !!!cp ('t119');
3626 wakaba 1.49 ## As if </noscript>
3627     pop @{$self->{open_elements}};
3628 wakaba 1.113 !!!parse-error (type => 'in noscript:script', token => $token);
3629 wakaba 1.49
3630 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3631 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3632 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3633 wakaba 1.79 !!!cp ('t120');
3634 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3635 wakaba 1.25 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3636 wakaba 1.79 } else {
3637     !!!cp ('t121');
3638 wakaba 1.25 }
3639 wakaba 1.49
3640 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3641 wakaba 1.100 $script_start_tag->();
3642     pop @{$self->{open_elements}} # <head>
3643 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3644 wakaba 1.1 redo B;
3645 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
3646 wakaba 1.25 $token->{tag_name} eq 'frameset') {
3647 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3648 wakaba 1.79 !!!cp ('t122');
3649 wakaba 1.49 ## As if </noscript>
3650     pop @{$self->{open_elements}};
3651 wakaba 1.113 !!!parse-error (type => 'in noscript:'.$token->{tag_name}, token => $token);
3652 wakaba 1.49
3653     ## Reprocess in the "in head" insertion mode...
3654     ## As if </head>
3655     pop @{$self->{open_elements}};
3656    
3657     ## Reprocess in the "after head" insertion mode...
3658 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3659 wakaba 1.79 !!!cp ('t124');
3660 wakaba 1.49 pop @{$self->{open_elements}};
3661    
3662     ## Reprocess in the "after head" insertion mode...
3663 wakaba 1.79 } else {
3664     !!!cp ('t125');
3665 wakaba 1.49 }
3666    
3667     ## "after head" insertion mode
3668 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3669 wakaba 1.54 if ($token->{tag_name} eq 'body') {
3670 wakaba 1.79 !!!cp ('t126');
3671 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3672     } elsif ($token->{tag_name} eq 'frameset') {
3673 wakaba 1.79 !!!cp ('t127');
3674 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
3675     } else {
3676     die "$0: tag name: $self->{tag_name}";
3677     }
3678 wakaba 1.1 !!!next-token;
3679     redo B;
3680     } else {
3681 wakaba 1.79 !!!cp ('t128');
3682 wakaba 1.1 #
3683     }
3684 wakaba 1.49
3685 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3686 wakaba 1.79 !!!cp ('t129');
3687 wakaba 1.49 ## As if </noscript>
3688     pop @{$self->{open_elements}};
3689 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
3690 wakaba 1.49
3691     ## Reprocess in the "in head" insertion mode...
3692     ## As if </head>
3693 wakaba 1.25 pop @{$self->{open_elements}};
3694 wakaba 1.49
3695     ## Reprocess in the "after head" insertion mode...
3696 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3697 wakaba 1.79 !!!cp ('t130');
3698 wakaba 1.49 ## As if </head>
3699 wakaba 1.25 pop @{$self->{open_elements}};
3700 wakaba 1.49
3701     ## Reprocess in the "after head" insertion mode...
3702 wakaba 1.79 } else {
3703     !!!cp ('t131');
3704 wakaba 1.49 }
3705    
3706     ## "after head" insertion mode
3707     ## As if <body>
3708 wakaba 1.116 !!!insert-element ('body',, $token);
3709 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3710 wakaba 1.49 ## reprocess
3711     redo B;
3712 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
3713 wakaba 1.49 if ($token->{tag_name} eq 'head') {
3714 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3715 wakaba 1.79 !!!cp ('t132');
3716 wakaba 1.50 ## As if <head>
3717 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3718 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3719     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3720    
3721     ## Reprocess in the "in head" insertion mode...
3722     pop @{$self->{open_elements}};
3723 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3724 wakaba 1.50 !!!next-token;
3725     redo B;
3726 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3727 wakaba 1.79 !!!cp ('t133');
3728 wakaba 1.49 ## As if </noscript>
3729     pop @{$self->{open_elements}};
3730 wakaba 1.113 !!!parse-error (type => 'in noscript:/head', token => $token);
3731 wakaba 1.49
3732     ## Reprocess in the "in head" insertion mode...
3733 wakaba 1.50 pop @{$self->{open_elements}};
3734 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3735 wakaba 1.50 !!!next-token;
3736     redo B;
3737 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3738 wakaba 1.79 !!!cp ('t134');
3739 wakaba 1.49 pop @{$self->{open_elements}};
3740 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3741 wakaba 1.49 !!!next-token;
3742     redo B;
3743     } else {
3744 wakaba 1.79 !!!cp ('t135');
3745 wakaba 1.49 #
3746     }
3747     } elsif ($token->{tag_name} eq 'noscript') {
3748 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3749 wakaba 1.79 !!!cp ('t136');
3750 wakaba 1.49 pop @{$self->{open_elements}};
3751 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3752 wakaba 1.49 !!!next-token;
3753     redo B;
3754 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3755 wakaba 1.79 !!!cp ('t137');
3756 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:noscript', token => $token);
3757 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
3758     !!!next-token;
3759     redo B;
3760 wakaba 1.49 } else {
3761 wakaba 1.79 !!!cp ('t138');
3762 wakaba 1.49 #
3763     }
3764     } elsif ({
3765 wakaba 1.31 body => 1, html => 1,
3766     }->{$token->{tag_name}}) {
3767 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3768 wakaba 1.79 !!!cp ('t139');
3769 wakaba 1.50 ## As if <head>
3770 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3771 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3772     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3773    
3774 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3775 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
3776 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3777 wakaba 1.79 !!!cp ('t140');
3778 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
3779 wakaba 1.49 ## Ignore the token
3780     !!!next-token;
3781     redo B;
3782 wakaba 1.79 } else {
3783     !!!cp ('t141');
3784 wakaba 1.49 }
3785 wakaba 1.50
3786     #
3787 wakaba 1.49 } elsif ({
3788 wakaba 1.31 p => 1, br => 1,
3789     }->{$token->{tag_name}}) {
3790 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3791 wakaba 1.79 !!!cp ('t142');
3792 wakaba 1.50 ## As if <head>
3793 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3794 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3795     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3796    
3797 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3798 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
3799 wakaba 1.79 } else {
3800     !!!cp ('t143');
3801 wakaba 1.50 }
3802    
3803 wakaba 1.1 #
3804 wakaba 1.25 } else {
3805 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3806 wakaba 1.79 !!!cp ('t144');
3807 wakaba 1.54 #
3808     } else {
3809 wakaba 1.79 !!!cp ('t145');
3810 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
3811 wakaba 1.49 ## Ignore the token
3812     !!!next-token;
3813     redo B;
3814     }
3815     }
3816    
3817 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3818 wakaba 1.79 !!!cp ('t146');
3819 wakaba 1.49 ## As if </noscript>
3820     pop @{$self->{open_elements}};
3821 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
3822 wakaba 1.49
3823     ## Reprocess in the "in head" insertion mode...
3824     ## As if </head>
3825     pop @{$self->{open_elements}};
3826    
3827     ## Reprocess in the "after head" insertion mode...
3828 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3829 wakaba 1.79 !!!cp ('t147');
3830 wakaba 1.49 ## As if </head>
3831     pop @{$self->{open_elements}};
3832    
3833     ## Reprocess in the "after head" insertion mode...
3834 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3835 wakaba 1.82 ## ISSUE: This case cannot be reached?
3836 wakaba 1.79 !!!cp ('t148');
3837 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
3838 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
3839     !!!next-token;
3840     redo B;
3841 wakaba 1.79 } else {
3842     !!!cp ('t149');
3843 wakaba 1.1 }
3844    
3845 wakaba 1.49 ## "after head" insertion mode
3846     ## As if <body>
3847 wakaba 1.116 !!!insert-element ('body',, $token);
3848 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3849 wakaba 1.52 ## reprocess
3850     redo B;
3851 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3852     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3853     !!!cp ('t149.1');
3854    
3855     ## NOTE: As if <head>
3856 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3857 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
3858     ($self->{head_element});
3859     #push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3860     #$self->{insertion_mode} = IN_HEAD_IM;
3861     ## NOTE: Reprocess.
3862    
3863     ## NOTE: As if </head>
3864     #pop @{$self->{open_elements}};
3865     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
3866     ## NOTE: Reprocess.
3867    
3868     #
3869     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3870     !!!cp ('t149.2');
3871    
3872     ## NOTE: As if </head>
3873     pop @{$self->{open_elements}};
3874     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
3875     ## NOTE: Reprocess.
3876    
3877     #
3878     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3879     !!!cp ('t149.3');
3880    
3881 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
3882 wakaba 1.104
3883     ## As if </noscript>
3884     pop @{$self->{open_elements}};
3885     #$self->{insertion_mode} = IN_HEAD_IM;
3886     ## NOTE: Reprocess.
3887    
3888     ## NOTE: As if </head>
3889     pop @{$self->{open_elements}};
3890     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
3891     ## NOTE: Reprocess.
3892    
3893     #
3894     } else {
3895     !!!cp ('t149.4');
3896     #
3897     }
3898    
3899     ## NOTE: As if <body>
3900 wakaba 1.116 !!!insert-element ('body',, $token);
3901 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
3902     ## NOTE: Reprocess.
3903     redo B;
3904     } else {
3905     die "$0: $token->{type}: Unknown token type";
3906     }
3907 wakaba 1.52
3908     ## ISSUE: An issue in the spec.
3909 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
3910 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3911 wakaba 1.79 !!!cp ('t150');
3912 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
3913     $reconstruct_active_formatting_elements->($insert_to_current);
3914    
3915     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3916    
3917     !!!next-token;
3918     redo B;
3919 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
3920 wakaba 1.52 if ({
3921     caption => 1, col => 1, colgroup => 1, tbody => 1,
3922     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3923     }->{$token->{tag_name}}) {
3924 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
3925 wakaba 1.52 ## have an element in table scope
3926 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
3927 wakaba 1.52 my $node = $self->{open_elements}->[$_];
3928     if ($node->[1] eq 'td' or $node->[1] eq 'th') {
3929 wakaba 1.79 !!!cp ('t151');
3930 wakaba 1.108
3931     ## Close the cell
3932     !!!back-token; # <?>
3933 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $node->[1],
3934     line => $token->{line},
3935     column => $token->{column}};
3936 wakaba 1.108 redo B;
3937 wakaba 1.52 } elsif ({
3938     table => 1, html => 1,
3939     }->{$node->[1]}) {
3940 wakaba 1.79 !!!cp ('t152');
3941 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
3942     last;
3943 wakaba 1.52 }
3944 wakaba 1.108 }
3945    
3946     !!!cp ('t153');
3947     !!!parse-error (type => 'start tag not allowed',
3948 wakaba 1.113 value => $token->{tag_name}, token => $token);
3949 wakaba 1.108 ## Ignore the token
3950     !!!next-token;
3951 wakaba 1.52 redo B;
3952 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
3953 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
3954 wakaba 1.52
3955 wakaba 1.108 ## NOTE: As if </caption>.
3956 wakaba 1.52 ## have a table element in table scope
3957     my $i;
3958 wakaba 1.108 INSCOPE: {
3959     for (reverse 0..$#{$self->{open_elements}}) {
3960     my $node = $self->{open_elements}->[$_];
3961     if ($node->[1] eq 'caption') {
3962     !!!cp ('t155');
3963     $i = $_;
3964     last INSCOPE;
3965     } elsif ({
3966     table => 1, html => 1,
3967     }->{$node->[1]}) {
3968     !!!cp ('t156');
3969     last;
3970     }
3971 wakaba 1.52 }
3972 wakaba 1.108
3973     !!!cp ('t157');
3974     !!!parse-error (type => 'start tag not allowed',
3975 wakaba 1.113 value => $token->{tag_name}, token => $token);
3976 wakaba 1.108 ## Ignore the token
3977     !!!next-token;
3978     redo B;
3979 wakaba 1.52 } # INSCOPE
3980    
3981     ## generate implied end tags
3982 wakaba 1.86 while ({
3983     dd => 1, dt => 1, li => 1, p => 1,
3984     }->{$self->{open_elements}->[-1]->[1]}) {
3985 wakaba 1.79 !!!cp ('t158');
3986 wakaba 1.86 pop @{$self->{open_elements}};
3987 wakaba 1.52 }
3988    
3989     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3990 wakaba 1.79 !!!cp ('t159');
3991 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
3992 wakaba 1.79 } else {
3993     !!!cp ('t160');
3994 wakaba 1.52 }
3995    
3996     splice @{$self->{open_elements}}, $i;
3997    
3998     $clear_up_to_marker->();
3999    
4000 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4001 wakaba 1.52
4002     ## reprocess
4003     redo B;
4004     } else {
4005 wakaba 1.79 !!!cp ('t161');
4006 wakaba 1.52 #
4007     }
4008     } else {
4009 wakaba 1.79 !!!cp ('t162');
4010 wakaba 1.52 #
4011     }
4012 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4013 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4014 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4015 wakaba 1.43 ## have an element in table scope
4016 wakaba 1.52 my $i;
4017 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4018     my $node = $self->{open_elements}->[$_];
4019 wakaba 1.52 if ($node->[1] eq $token->{tag_name}) {
4020 wakaba 1.79 !!!cp ('t163');
4021 wakaba 1.52 $i = $_;
4022 wakaba 1.43 last INSCOPE;
4023     } elsif ({
4024     table => 1, html => 1,
4025     }->{$node->[1]}) {
4026 wakaba 1.79 !!!cp ('t164');
4027 wakaba 1.43 last INSCOPE;
4028     }
4029     } # INSCOPE
4030 wakaba 1.52 unless (defined $i) {
4031 wakaba 1.79 !!!cp ('t165');
4032 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4033 wakaba 1.43 ## Ignore the token
4034     !!!next-token;
4035     redo B;
4036     }
4037    
4038 wakaba 1.52 ## generate implied end tags
4039 wakaba 1.86 while ({
4040     dd => 1, dt => 1, li => 1, p => 1,
4041     }->{$self->{open_elements}->[-1]->[1]}) {
4042 wakaba 1.79 !!!cp ('t166');
4043 wakaba 1.86 pop @{$self->{open_elements}};
4044 wakaba 1.52 }
4045 wakaba 1.86
4046 wakaba 1.52 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4047 wakaba 1.79 !!!cp ('t167');
4048 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
4049 wakaba 1.79 } else {
4050     !!!cp ('t168');
4051 wakaba 1.52 }
4052    
4053     splice @{$self->{open_elements}}, $i;
4054    
4055     $clear_up_to_marker->();
4056    
4057 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4058 wakaba 1.52
4059     !!!next-token;
4060 wakaba 1.43 redo B;
4061 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4062 wakaba 1.79 !!!cp ('t169');
4063 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4064 wakaba 1.52 ## Ignore the token
4065     !!!next-token;
4066     redo B;
4067     } else {
4068 wakaba 1.79 !!!cp ('t170');
4069 wakaba 1.52 #
4070     }
4071     } elsif ($token->{tag_name} eq 'caption') {
4072 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
4073 wakaba 1.43 ## have a table element in table scope
4074     my $i;
4075 wakaba 1.108 INSCOPE: {
4076     for (reverse 0..$#{$self->{open_elements}}) {
4077     my $node = $self->{open_elements}->[$_];
4078     if ($node->[1] eq $token->{tag_name}) {
4079     !!!cp ('t171');
4080     $i = $_;
4081     last INSCOPE;
4082     } elsif ({
4083     table => 1, html => 1,
4084     }->{$node->[1]}) {
4085     !!!cp ('t172');
4086     last;
4087     }
4088 wakaba 1.43 }
4089 wakaba 1.108
4090     !!!cp ('t173');
4091     !!!parse-error (type => 'unmatched end tag',
4092 wakaba 1.113 value => $token->{tag_name}, token => $token);
4093 wakaba 1.108 ## Ignore the token
4094     !!!next-token;
4095     redo B;
4096 wakaba 1.43 } # INSCOPE
4097    
4098     ## generate implied end tags
4099 wakaba 1.86 while ({
4100     dd => 1, dt => 1, li => 1, p => 1,
4101     }->{$self->{open_elements}->[-1]->[1]}) {
4102 wakaba 1.79 !!!cp ('t174');
4103 wakaba 1.86 pop @{$self->{open_elements}};
4104 wakaba 1.43 }
4105 wakaba 1.52
4106     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4107 wakaba 1.79 !!!cp ('t175');
4108 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
4109 wakaba 1.79 } else {
4110     !!!cp ('t176');
4111 wakaba 1.52 }
4112    
4113     splice @{$self->{open_elements}}, $i;
4114    
4115     $clear_up_to_marker->();
4116    
4117 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4118 wakaba 1.52
4119     !!!next-token;
4120     redo B;
4121 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
4122 wakaba 1.79 !!!cp ('t177');
4123 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4124 wakaba 1.52 ## Ignore the token
4125     !!!next-token;
4126     redo B;
4127     } else {
4128 wakaba 1.79 !!!cp ('t178');
4129 wakaba 1.52 #
4130     }
4131     } elsif ({
4132     table => 1, tbody => 1, tfoot => 1,
4133     thead => 1, tr => 1,
4134     }->{$token->{tag_name}} and
4135 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
4136 wakaba 1.52 ## have an element in table scope
4137     my $i;
4138     my $tn;
4139 wakaba 1.108 INSCOPE: {
4140     for (reverse 0..$#{$self->{open_elements}}) {
4141     my $node = $self->{open_elements}->[$_];
4142     if ($node->[1] eq $token->{tag_name}) {
4143     !!!cp ('t179');
4144     $i = $_;
4145    
4146     ## Close the cell
4147     !!!back-token; # </?>
4148 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
4149     line => $token->{line},
4150     column => $token->{column}};
4151 wakaba 1.108 redo B;
4152     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4153     !!!cp ('t180');
4154     $tn = $node->[1];
4155     ## NOTE: There is exactly one |td| or |th| element
4156     ## in scope in the stack of open elements by definition.
4157     } elsif ({
4158     table => 1, html => 1,
4159     }->{$node->[1]}) {
4160     ## ISSUE: Can this be reached?
4161     !!!cp ('t181');
4162     last;
4163     }
4164 wakaba 1.52 }
4165 wakaba 1.108
4166 wakaba 1.79 !!!cp ('t182');
4167 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
4168 wakaba 1.113 value => $token->{tag_name}, token => $token);
4169 wakaba 1.52 ## Ignore the token
4170     !!!next-token;
4171     redo B;
4172 wakaba 1.108 } # INSCOPE
4173 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
4174 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4175 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4176 wakaba 1.52
4177     ## As if </caption>
4178     ## have a table element in table scope
4179     my $i;
4180     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4181     my $node = $self->{open_elements}->[$_];
4182     if ($node->[1] eq 'caption') {
4183 wakaba 1.79 !!!cp ('t184');
4184 wakaba 1.52 $i = $_;
4185     last INSCOPE;
4186     } elsif ({
4187     table => 1, html => 1,
4188     }->{$node->[1]}) {
4189 wakaba 1.79 !!!cp ('t185');
4190 wakaba 1.52 last INSCOPE;
4191     }
4192     } # INSCOPE
4193     unless (defined $i) {
4194 wakaba 1.79 !!!cp ('t186');
4195 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:caption', token => $token);
4196 wakaba 1.52 ## Ignore the token
4197     !!!next-token;
4198     redo B;
4199     }
4200    
4201     ## generate implied end tags
4202 wakaba 1.86 while ({
4203     dd => 1, dt => 1, li => 1, p => 1,
4204     }->{$self->{open_elements}->[-1]->[1]}) {
4205 wakaba 1.79 !!!cp ('t187');
4206 wakaba 1.86 pop @{$self->{open_elements}};
4207 wakaba 1.52 }
4208    
4209     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4210 wakaba 1.79 !!!cp ('t188');
4211 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
4212 wakaba 1.79 } else {
4213     !!!cp ('t189');
4214 wakaba 1.52 }
4215    
4216     splice @{$self->{open_elements}}, $i;
4217    
4218     $clear_up_to_marker->();
4219    
4220 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4221 wakaba 1.52
4222     ## reprocess
4223     redo B;
4224     } elsif ({
4225     body => 1, col => 1, colgroup => 1, html => 1,
4226     }->{$token->{tag_name}}) {
4227 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
4228 wakaba 1.79 !!!cp ('t190');
4229 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4230 wakaba 1.52 ## Ignore the token
4231     !!!next-token;
4232     redo B;
4233     } else {
4234 wakaba 1.79 !!!cp ('t191');
4235 wakaba 1.52 #
4236     }
4237     } elsif ({
4238     tbody => 1, tfoot => 1,
4239     thead => 1, tr => 1,
4240     }->{$token->{tag_name}} and
4241 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4242 wakaba 1.79 !!!cp ('t192');
4243 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4244 wakaba 1.52 ## Ignore the token
4245     !!!next-token;
4246     redo B;
4247     } else {
4248 wakaba 1.79 !!!cp ('t193');
4249 wakaba 1.52 #
4250     }
4251 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4252     for my $entry (@{$self->{open_elements}}) {
4253     if (not {
4254     dd => 1, dt => 1, li => 1, p => 1, tbody => 1, td => 1, tfoot => 1,
4255     th => 1, thead => 1, tr => 1, body => 1, html => 1,
4256     }->{$entry->[1]}) {
4257     !!!cp ('t75');
4258 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4259 wakaba 1.104 last;
4260     }
4261     }
4262    
4263     ## Stop parsing.
4264     last B;
4265 wakaba 1.52 } else {
4266     die "$0: $token->{type}: Unknown token type";
4267     }
4268    
4269     $insert = $insert_to_current;
4270     #
4271 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
4272 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
4273 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
4274     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4275     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4276 wakaba 1.52
4277 wakaba 1.95 unless (length $token->{data}) {
4278     !!!cp ('t194');
4279     !!!next-token;
4280     redo B;
4281     } else {
4282     !!!cp ('t195');
4283     }
4284     }
4285 wakaba 1.52
4286 wakaba 1.113 !!!parse-error (type => 'in table:#character', token => $token);
4287 wakaba 1.52
4288     ## As if in body, but insert into foster parent element
4289     ## ISSUE: Spec says that "whenever a node would be inserted
4290     ## into the current node" while characters might not be
4291     ## result in a new Text node.
4292     $reconstruct_active_formatting_elements->($insert_to_foster);
4293    
4294     if ({
4295     table => 1, tbody => 1, tfoot => 1,
4296     thead => 1, tr => 1,
4297     }->{$self->{open_elements}->[-1]->[1]}) {
4298     # MUST
4299     my $foster_parent_element;
4300     my $next_sibling;
4301     my $prev_sibling;
4302     OE: for (reverse 0..$#{$self->{open_elements}}) {
4303     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4304     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4305     if (defined $parent and $parent->node_type == 1) {
4306 wakaba 1.79 !!!cp ('t196');
4307 wakaba 1.52 $foster_parent_element = $parent;
4308     $next_sibling = $self->{open_elements}->[$_]->[0];
4309     $prev_sibling = $next_sibling->previous_sibling;
4310     } else {
4311 wakaba 1.79 !!!cp ('t197');
4312 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4313     $prev_sibling = $foster_parent_element->last_child;
4314     }
4315     last OE;
4316     }
4317     } # OE
4318     $foster_parent_element = $self->{open_elements}->[0]->[0] and
4319     $prev_sibling = $foster_parent_element->last_child
4320     unless defined $foster_parent_element;
4321     if (defined $prev_sibling and
4322     $prev_sibling->node_type == 3) {
4323 wakaba 1.79 !!!cp ('t198');
4324 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
4325     } else {
4326 wakaba 1.79 !!!cp ('t199');
4327 wakaba 1.52 $foster_parent_element->insert_before
4328     ($self->{document}->create_text_node ($token->{data}),
4329     $next_sibling);
4330     }
4331 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
4332     } else {
4333     !!!cp ('t200');
4334     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4335     }
4336 wakaba 1.52
4337 wakaba 1.95 !!!next-token;
4338     redo B;
4339 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
4340 wakaba 1.52 if ({
4341 wakaba 1.54 tr => ($self->{insertion_mode} != IN_ROW_IM),
4342 wakaba 1.52 th => 1, td => 1,
4343     }->{$token->{tag_name}}) {
4344 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_IM) {
4345 wakaba 1.52 ## Clear back to table context
4346     while ($self->{open_elements}->[-1]->[1] ne 'table' and
4347     $self->{open_elements}->[-1]->[1] ne 'html') {
4348 wakaba 1.79 !!!cp ('t201');
4349 wakaba 1.52 pop @{$self->{open_elements}};
4350 wakaba 1.43 }
4351    
4352 wakaba 1.116 !!!insert-element ('tbody',, $token);
4353 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4354 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4355     }
4356    
4357 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4358 wakaba 1.52 unless ($token->{tag_name} eq 'tr') {
4359 wakaba 1.79 !!!cp ('t202');
4360 wakaba 1.113 !!!parse-error (type => 'missing start tag:tr', token => $token);
4361 wakaba 1.52 }
4362 wakaba 1.43
4363 wakaba 1.52 ## Clear back to table body context
4364     while (not {
4365     tbody => 1, tfoot => 1, thead => 1, html => 1,
4366     }->{$self->{open_elements}->[-1]->[1]}) {
4367 wakaba 1.79 !!!cp ('t203');
4368 wakaba 1.83 ## ISSUE: Can this case be reached?
4369 wakaba 1.52 pop @{$self->{open_elements}};
4370     }
4371 wakaba 1.43
4372 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4373 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4374 wakaba 1.79 !!!cp ('t204');
4375 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4376 wakaba 1.52 !!!next-token;
4377     redo B;
4378     } else {
4379 wakaba 1.79 !!!cp ('t205');
4380 wakaba 1.116 !!!insert-element ('tr',, $token);
4381 wakaba 1.52 ## reprocess in the "in row" insertion mode
4382     }
4383 wakaba 1.79 } else {
4384     !!!cp ('t206');
4385 wakaba 1.52 }
4386    
4387     ## Clear back to table row context
4388     while (not {
4389     tr => 1, html => 1,
4390     }->{$self->{open_elements}->[-1]->[1]}) {
4391 wakaba 1.79 !!!cp ('t207');
4392 wakaba 1.52 pop @{$self->{open_elements}};
4393 wakaba 1.43 }
4394 wakaba 1.52
4395 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4396 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
4397 wakaba 1.52
4398     push @$active_formatting_elements, ['#marker', ''];
4399    
4400     !!!next-token;
4401     redo B;
4402     } elsif ({
4403     caption => 1, col => 1, colgroup => 1,
4404     tbody => 1, tfoot => 1, thead => 1,
4405 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
4406 wakaba 1.52 }->{$token->{tag_name}}) {
4407 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4408 wakaba 1.52 ## As if </tr>
4409 wakaba 1.43 ## have an element in table scope
4410     my $i;
4411     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4412     my $node = $self->{open_elements}->[$_];
4413 wakaba 1.52 if ($node->[1] eq 'tr') {
4414 wakaba 1.79 !!!cp ('t208');
4415 wakaba 1.43 $i = $_;
4416     last INSCOPE;
4417     } elsif ({
4418 wakaba 1.83 html => 1,
4419    
4420     ## NOTE: This element does not appear here, maybe.
4421     table => 1,
4422 wakaba 1.43 }->{$node->[1]}) {
4423 wakaba 1.79 !!!cp ('t209');
4424 wakaba 1.43 last INSCOPE;
4425     }
4426     } # INSCOPE
4427 wakaba 1.79 unless (defined $i) {
4428     !!!cp ('t210');
4429 wakaba 1.83 ## TODO: This type is wrong.
4430 wakaba 1.113 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name}, token => $token);
4431 wakaba 1.52 ## Ignore the token
4432     !!!next-token;
4433 wakaba 1.43 redo B;
4434     }
4435    
4436 wakaba 1.52 ## Clear back to table row context
4437     while (not {
4438     tr => 1, html => 1,
4439     }->{$self->{open_elements}->[-1]->[1]}) {
4440 wakaba 1.79 !!!cp ('t211');
4441 wakaba 1.83 ## ISSUE: Can this case be reached?
4442 wakaba 1.52 pop @{$self->{open_elements}};
4443 wakaba 1.1 }
4444 wakaba 1.43
4445 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4446 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4447 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4448 wakaba 1.79 !!!cp ('t212');
4449 wakaba 1.52 ## reprocess
4450     redo B;
4451     } else {
4452 wakaba 1.79 !!!cp ('t213');
4453 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4454     }
4455 wakaba 1.1 }
4456 wakaba 1.52
4457 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4458 wakaba 1.52 ## have an element in table scope
4459 wakaba 1.43 my $i;
4460     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4461     my $node = $self->{open_elements}->[$_];
4462 wakaba 1.52 if ({
4463     tbody => 1, thead => 1, tfoot => 1,
4464     }->{$node->[1]}) {
4465 wakaba 1.79 !!!cp ('t214');
4466 wakaba 1.43 $i = $_;
4467     last INSCOPE;
4468     } elsif ({
4469     table => 1, html => 1,
4470     }->{$node->[1]}) {
4471 wakaba 1.79 !!!cp ('t215');
4472 wakaba 1.43 last INSCOPE;
4473     }
4474     } # INSCOPE
4475 wakaba 1.52 unless (defined $i) {
4476 wakaba 1.79 !!!cp ('t216');
4477 wakaba 1.82 ## TODO: This erorr type ios wrong.
4478 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4479 wakaba 1.52 ## Ignore the token
4480     !!!next-token;
4481 wakaba 1.43 redo B;
4482     }
4483 wakaba 1.52
4484     ## Clear back to table body context
4485     while (not {
4486     tbody => 1, tfoot => 1, thead => 1, html => 1,
4487     }->{$self->{open_elements}->[-1]->[1]}) {
4488 wakaba 1.79 !!!cp ('t217');
4489 wakaba 1.83 ## ISSUE: Can this state be reached?
4490 wakaba 1.52 pop @{$self->{open_elements}};
4491 wakaba 1.43 }
4492    
4493 wakaba 1.52 ## As if <{current node}>
4494     ## have an element in table scope
4495     ## true by definition
4496 wakaba 1.43
4497 wakaba 1.52 ## Clear back to table body context
4498     ## nop by definition
4499 wakaba 1.43
4500 wakaba 1.52 pop @{$self->{open_elements}};
4501 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4502 wakaba 1.52 ## reprocess in "in table" insertion mode...
4503 wakaba 1.79 } else {
4504     !!!cp ('t218');
4505 wakaba 1.52 }
4506    
4507     if ($token->{tag_name} eq 'col') {
4508     ## Clear back to table context
4509     while ($self->{open_elements}->[-1]->[1] ne 'table' and
4510     $self->{open_elements}->[-1]->[1] ne 'html') {
4511 wakaba 1.79 !!!cp ('t219');
4512 wakaba 1.83 ## ISSUE: Can this state be reached?
4513 wakaba 1.52 pop @{$self->{open_elements}};
4514     }
4515 wakaba 1.43
4516 wakaba 1.116 !!!insert-element ('colgroup',, $token);
4517 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
4518 wakaba 1.52 ## reprocess
4519 wakaba 1.43 redo B;
4520 wakaba 1.52 } elsif ({
4521     caption => 1,
4522     colgroup => 1,
4523     tbody => 1, tfoot => 1, thead => 1,
4524     }->{$token->{tag_name}}) {
4525     ## Clear back to table context
4526     while ($self->{open_elements}->[-1]->[1] ne 'table' and
4527     $self->{open_elements}->[-1]->[1] ne 'html') {
4528 wakaba 1.79 !!!cp ('t220');
4529 wakaba 1.83 ## ISSUE: Can this state be reached?
4530 wakaba 1.52 pop @{$self->{open_elements}};
4531 wakaba 1.1 }
4532 wakaba 1.52
4533     push @$active_formatting_elements, ['#marker', '']
4534     if $token->{tag_name} eq 'caption';
4535    
4536 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4537 wakaba 1.52 $self->{insertion_mode} = {
4538 wakaba 1.54 caption => IN_CAPTION_IM,
4539     colgroup => IN_COLUMN_GROUP_IM,
4540     tbody => IN_TABLE_BODY_IM,
4541     tfoot => IN_TABLE_BODY_IM,
4542     thead => IN_TABLE_BODY_IM,
4543 wakaba 1.52 }->{$token->{tag_name}};
4544 wakaba 1.1 !!!next-token;
4545     redo B;
4546 wakaba 1.52 } else {
4547     die "$0: in table: <>: $token->{tag_name}";
4548 wakaba 1.1 }
4549 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
4550 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
4551 wakaba 1.1
4552 wakaba 1.52 ## As if </table>
4553 wakaba 1.1 ## have a table element in table scope
4554     my $i;
4555 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4556     my $node = $self->{open_elements}->[$_];
4557 wakaba 1.52 if ($node->[1] eq 'table') {
4558 wakaba 1.79 !!!cp ('t221');
4559 wakaba 1.1 $i = $_;
4560     last INSCOPE;
4561     } elsif ({
4562 wakaba 1.83 #table => 1,
4563     html => 1,
4564 wakaba 1.1 }->{$node->[1]}) {
4565 wakaba 1.79 !!!cp ('t222');
4566 wakaba 1.1 last INSCOPE;
4567     }
4568     } # INSCOPE
4569     unless (defined $i) {
4570 wakaba 1.79 !!!cp ('t223');
4571 wakaba 1.83 ## TODO: The following is wrong, maybe.
4572 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:table', token => $token);
4573 wakaba 1.52 ## Ignore tokens </table><table>
4574 wakaba 1.1 !!!next-token;
4575     redo B;
4576     }
4577    
4578 wakaba 1.106 ## TODO: Followings are removed from the latest spec.
4579 wakaba 1.1 ## generate implied end tags
4580 wakaba 1.86 while ({
4581     dd => 1, dt => 1, li => 1, p => 1,
4582     }->{$self->{open_elements}->[-1]->[1]}) {
4583 wakaba 1.79 !!!cp ('t224');
4584 wakaba 1.86 pop @{$self->{open_elements}};
4585 wakaba 1.1 }
4586    
4587 wakaba 1.52 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4588 wakaba 1.79 !!!cp ('t225');
4589 wakaba 1.83 ## ISSUE: Can this case be reached?
4590 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
4591 wakaba 1.79 } else {
4592     !!!cp ('t226');
4593 wakaba 1.1 }
4594    
4595 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4596 wakaba 1.95 pop @{$open_tables};
4597 wakaba 1.1
4598 wakaba 1.52 $self->_reset_insertion_mode;
4599 wakaba 1.1
4600     ## reprocess
4601     redo B;
4602 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
4603     if (not $open_tables->[-1]->[1]) { # tainted
4604     !!!cp ('t227.8');
4605     ## NOTE: This is a "as if in head" code clone.
4606     $parse_rcdata->(CDATA_CONTENT_MODEL);
4607     redo B;
4608     } else {
4609     !!!cp ('t227.7');
4610     #
4611     }
4612     } elsif ($token->{tag_name} eq 'script') {
4613     if (not $open_tables->[-1]->[1]) { # tainted
4614     !!!cp ('t227.6');
4615     ## NOTE: This is a "as if in head" code clone.
4616     $script_start_tag->();
4617     redo B;
4618     } else {
4619     !!!cp ('t227.5');
4620     #
4621     }
4622 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
4623     if (not $open_tables->[-1]->[1]) { # tainted
4624     if ($token->{attributes}->{type}) { ## TODO: case
4625     my $type = lc $token->{attributes}->{type}->{value};
4626     if ($type eq 'hidden') {
4627     !!!cp ('t227.3');
4628 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
4629 wakaba 1.98
4630 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4631 wakaba 1.98
4632     ## TODO: form element pointer
4633    
4634     pop @{$self->{open_elements}};
4635    
4636     !!!next-token;
4637     redo B;
4638     } else {
4639     !!!cp ('t227.2');
4640     #
4641     }
4642     } else {
4643     !!!cp ('t227.1');
4644     #
4645     }
4646     } else {
4647     !!!cp ('t227.4');
4648     #
4649     }
4650 wakaba 1.58 } else {
4651 wakaba 1.79 !!!cp ('t227');
4652 wakaba 1.58 #
4653     }
4654 wakaba 1.98
4655 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
4656 wakaba 1.98
4657     $insert = $insert_to_foster;
4658     #
4659 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
4660 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
4661 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
4662 wakaba 1.52 ## have an element in table scope
4663     my $i;
4664     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4665     my $node = $self->{open_elements}->[$_];
4666     if ($node->[1] eq $token->{tag_name}) {
4667 wakaba 1.79 !!!cp ('t228');
4668 wakaba 1.52 $i = $_;
4669     last INSCOPE;
4670     } elsif ({
4671     table => 1, html => 1,
4672     }->{$node->[1]}) {
4673 wakaba 1.79 !!!cp ('t229');
4674 wakaba 1.52 last INSCOPE;
4675     }
4676     } # INSCOPE
4677     unless (defined $i) {
4678 wakaba 1.79 !!!cp ('t230');
4679 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4680 wakaba 1.52 ## Ignore the token
4681 wakaba 1.42 !!!next-token;
4682     redo B;
4683 wakaba 1.79 } else {
4684     !!!cp ('t232');
4685 wakaba 1.42 }
4686    
4687 wakaba 1.52 ## Clear back to table row context
4688     while (not {
4689     tr => 1, html => 1,
4690     }->{$self->{open_elements}->[-1]->[1]}) {
4691 wakaba 1.79 !!!cp ('t231');
4692 wakaba 1.83 ## ISSUE: Can this state be reached?
4693 wakaba 1.52 pop @{$self->{open_elements}};
4694     }
4695 wakaba 1.42
4696 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4697 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4698 wakaba 1.52 !!!next-token;
4699     redo B;
4700     } elsif ($token->{tag_name} eq 'table') {
4701 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4702 wakaba 1.52 ## As if </tr>
4703     ## have an element in table scope
4704     my $i;
4705     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4706     my $node = $self->{open_elements}->[$_];
4707     if ($node->[1] eq 'tr') {
4708 wakaba 1.79 !!!cp ('t233');
4709 wakaba 1.52 $i = $_;
4710     last INSCOPE;
4711     } elsif ({
4712     table => 1, html => 1,
4713     }->{$node->[1]}) {
4714 wakaba 1.79 !!!cp ('t234');
4715 wakaba 1.52 last INSCOPE;
4716 wakaba 1.42 }
4717 wakaba 1.52 } # INSCOPE
4718     unless (defined $i) {
4719 wakaba 1.79 !!!cp ('t235');
4720 wakaba 1.83 ## TODO: The following is wrong.
4721 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{type}, token => $token);
4722 wakaba 1.52 ## Ignore the token
4723     !!!next-token;
4724     redo B;
4725 wakaba 1.42 }
4726 wakaba 1.52
4727     ## Clear back to table row context
4728     while (not {
4729     tr => 1, html => 1,
4730     }->{$self->{open_elements}->[-1]->[1]}) {
4731 wakaba 1.79 !!!cp ('t236');
4732 wakaba 1.83 ## ISSUE: Can this state be reached?
4733 wakaba 1.46 pop @{$self->{open_elements}};
4734 wakaba 1.1 }
4735 wakaba 1.46
4736 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4737 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4738 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
4739 wakaba 1.1 }
4740    
4741 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4742 wakaba 1.52 ## have an element in table scope
4743     my $i;
4744     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4745     my $node = $self->{open_elements}->[$_];
4746     if ({
4747     tbody => 1, thead => 1, tfoot => 1,
4748     }->{$node->[1]}) {
4749 wakaba 1.79 !!!cp ('t237');
4750 wakaba 1.52 $i = $_;
4751     last INSCOPE;
4752     } elsif ({
4753     table => 1, html => 1,
4754     }->{$node->[1]}) {
4755 wakaba 1.79 !!!cp ('t238');
4756 wakaba 1.52 last INSCOPE;
4757     }
4758     } # INSCOPE
4759     unless (defined $i) {
4760 wakaba 1.79 !!!cp ('t239');
4761 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4762 wakaba 1.52 ## Ignore the token
4763     !!!next-token;
4764     redo B;
4765 wakaba 1.47 }
4766    
4767     ## Clear back to table body context
4768     while (not {
4769     tbody => 1, tfoot => 1, thead => 1, html => 1,
4770     }->{$self->{open_elements}->[-1]->[1]}) {
4771 wakaba 1.79 !!!cp ('t240');
4772 wakaba 1.47 pop @{$self->{open_elements}};
4773     }
4774    
4775 wakaba 1.52 ## As if <{current node}>
4776     ## have an element in table scope
4777     ## true by definition
4778    
4779     ## Clear back to table body context
4780     ## nop by definition
4781    
4782     pop @{$self->{open_elements}};
4783 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4784 wakaba 1.52 ## reprocess in the "in table" insertion mode...
4785     }
4786    
4787 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
4788     ## When you edit the code fragment below, please ensure that
4789     ## the code for <table> in the "in table" insertion mode
4790     ## is synced with it.
4791    
4792 wakaba 1.52 ## have a table element in table scope
4793     my $i;
4794     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4795     my $node = $self->{open_elements}->[$_];
4796     if ($node->[1] eq $token->{tag_name}) {
4797 wakaba 1.79 !!!cp ('t241');
4798 wakaba 1.52 $i = $_;
4799     last INSCOPE;
4800     } elsif ({
4801     table => 1, html => 1,
4802     }->{$node->[1]}) {
4803 wakaba 1.79 !!!cp ('t242');
4804 wakaba 1.52 last INSCOPE;
4805 wakaba 1.47 }
4806 wakaba 1.52 } # INSCOPE
4807     unless (defined $i) {
4808 wakaba 1.79 !!!cp ('t243');
4809 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4810 wakaba 1.52 ## Ignore the token
4811     !!!next-token;
4812     redo B;
4813 wakaba 1.3 }
4814 wakaba 1.52
4815     splice @{$self->{open_elements}}, $i;
4816 wakaba 1.95 pop @{$open_tables};
4817 wakaba 1.1
4818 wakaba 1.52 $self->_reset_insertion_mode;
4819 wakaba 1.47
4820     !!!next-token;
4821     redo B;
4822     } elsif ({
4823 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
4824 wakaba 1.52 }->{$token->{tag_name}} and
4825 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
4826 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4827 wakaba 1.52 ## have an element in table scope
4828     my $i;
4829     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4830     my $node = $self->{open_elements}->[$_];
4831     if ($node->[1] eq $token->{tag_name}) {
4832 wakaba 1.79 !!!cp ('t247');
4833 wakaba 1.52 $i = $_;
4834     last INSCOPE;
4835     } elsif ({
4836     table => 1, html => 1,
4837     }->{$node->[1]}) {
4838 wakaba 1.79 !!!cp ('t248');
4839 wakaba 1.52 last INSCOPE;
4840     }
4841     } # INSCOPE
4842     unless (defined $i) {
4843 wakaba 1.79 !!!cp ('t249');
4844 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4845 wakaba 1.52 ## Ignore the token
4846     !!!next-token;
4847     redo B;
4848     }
4849    
4850 wakaba 1.48 ## As if </tr>
4851     ## have an element in table scope
4852     my $i;
4853     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4854     my $node = $self->{open_elements}->[$_];
4855     if ($node->[1] eq 'tr') {
4856 wakaba 1.79 !!!cp ('t250');
4857 wakaba 1.48 $i = $_;
4858     last INSCOPE;
4859     } elsif ({
4860     table => 1, html => 1,
4861     }->{$node->[1]}) {
4862 wakaba 1.79 !!!cp ('t251');
4863 wakaba 1.48 last INSCOPE;
4864     }
4865     } # INSCOPE
4866 wakaba 1.52 unless (defined $i) {
4867 wakaba 1.79 !!!cp ('t252');
4868 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:tr', token => $token);
4869 wakaba 1.52 ## Ignore the token
4870     !!!next-token;
4871     redo B;
4872     }
4873 wakaba 1.48
4874     ## Clear back to table row context
4875     while (not {
4876     tr => 1, html => 1,
4877     }->{$self->{open_elements}->[-1]->[1]}) {
4878 wakaba 1.79 !!!cp ('t253');
4879 wakaba 1.83 ## ISSUE: Can this case be reached?
4880 wakaba 1.48 pop @{$self->{open_elements}};
4881     }
4882    
4883     pop @{$self->{open_elements}}; # tr
4884 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4885 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4886     }
4887    
4888     ## have an element in table scope
4889     my $i;
4890     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4891     my $node = $self->{open_elements}->[$_];
4892     if ($node->[1] eq $token->{tag_name}) {
4893 wakaba 1.79 !!!cp ('t254');
4894 wakaba 1.52 $i = $_;
4895     last INSCOPE;
4896     } elsif ({
4897     table => 1, html => 1,
4898     }->{$node->[1]}) {
4899 wakaba 1.79 !!!cp ('t255');
4900 wakaba 1.52 last INSCOPE;
4901     }
4902     } # INSCOPE
4903     unless (defined $i) {
4904 wakaba 1.79 !!!cp ('t256');
4905 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4906 wakaba 1.52 ## Ignore the token
4907     !!!next-token;
4908     redo B;
4909     }
4910    
4911     ## Clear back to table body context
4912     while (not {
4913     tbody => 1, tfoot => 1, thead => 1, html => 1,
4914     }->{$self->{open_elements}->[-1]->[1]}) {
4915 wakaba 1.79 !!!cp ('t257');
4916 wakaba 1.83 ## ISSUE: Can this case be reached?
4917 wakaba 1.52 pop @{$self->{open_elements}};
4918     }
4919    
4920     pop @{$self->{open_elements}};
4921 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4922 wakaba 1.52 !!!next-token;
4923     redo B;
4924     } elsif ({
4925     body => 1, caption => 1, col => 1, colgroup => 1,
4926     html => 1, td => 1, th => 1,
4927 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
4928     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
4929 wakaba 1.52 }->{$token->{tag_name}}) {
4930 wakaba 1.79 !!!cp ('t258');
4931 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4932 wakaba 1.52 ## Ignore the token
4933     !!!next-token;
4934     redo B;
4935 wakaba 1.58 } else {
4936 wakaba 1.79 !!!cp ('t259');
4937 wakaba 1.113 !!!parse-error (type => 'in table:/'.$token->{tag_name}, token => $token);
4938 wakaba 1.52
4939 wakaba 1.58 $insert = $insert_to_foster;
4940     #
4941     }
4942 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4943     unless ($self->{open_elements}->[-1]->[1] eq 'html' and
4944     @{$self->{open_elements}} == 1) { # redundant, maybe
4945 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4946 wakaba 1.104 !!!cp ('t259.1');
4947 wakaba 1.105 #
4948 wakaba 1.104 } else {
4949     !!!cp ('t259.2');
4950 wakaba 1.105 #
4951 wakaba 1.104 }
4952    
4953     ## Stop parsing
4954     last B;
4955 wakaba 1.58 } else {
4956     die "$0: $token->{type}: Unknown token type";
4957     }
4958 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
4959 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4960 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4961     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4962     unless (length $token->{data}) {
4963 wakaba 1.79 !!!cp ('t260');
4964 wakaba 1.52 !!!next-token;
4965     redo B;
4966     }
4967     }
4968    
4969 wakaba 1.79 !!!cp ('t261');
4970 wakaba 1.52 #
4971 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4972 wakaba 1.52 if ($token->{tag_name} eq 'col') {
4973 wakaba 1.79 !!!cp ('t262');
4974 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4975 wakaba 1.52 pop @{$self->{open_elements}};
4976     !!!next-token;
4977     redo B;
4978     } else {
4979 wakaba 1.79 !!!cp ('t263');
4980 wakaba 1.52 #
4981     }
4982 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4983 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
4984     if ($self->{open_elements}->[-1]->[1] eq 'html') {
4985 wakaba 1.79 !!!cp ('t264');
4986 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
4987 wakaba 1.52 ## Ignore the token
4988     !!!next-token;
4989     redo B;
4990     } else {
4991 wakaba 1.79 !!!cp ('t265');
4992 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
4993 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4994 wakaba 1.52 !!!next-token;
4995     redo B;
4996     }
4997     } elsif ($token->{tag_name} eq 'col') {
4998 wakaba 1.79 !!!cp ('t266');
4999 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:col', token => $token);
5000 wakaba 1.52 ## Ignore the token
5001     !!!next-token;
5002     redo B;
5003     } else {
5004 wakaba 1.79 !!!cp ('t267');
5005 wakaba 1.52 #
5006     }
5007 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5008     if ($self->{open_elements}->[-1]->[1] eq 'html' or
5009     @{$self->{open_elements}} == 1) { # redundant, maybe
5010     !!!cp ('t270.2');
5011     ## Stop parsing.
5012     last B;
5013     } else {
5014     ## NOTE: As if </colgroup>.
5015     !!!cp ('t270.1');
5016     pop @{$self->{open_elements}}; # colgroup
5017     $self->{insertion_mode} = IN_TABLE_IM;
5018     ## Reprocess.
5019     redo B;
5020     }
5021     } else {
5022     die "$0: $token->{type}: Unknown token type";
5023     }
5024 wakaba 1.52
5025     ## As if </colgroup>
5026     if ($self->{open_elements}->[-1]->[1] eq 'html') {
5027 wakaba 1.79 !!!cp ('t269');
5028 wakaba 1.104 ## TODO: Wrong error type?
5029 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5030 wakaba 1.52 ## Ignore the token
5031     !!!next-token;
5032     redo B;
5033     } else {
5034 wakaba 1.79 !!!cp ('t270');
5035 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5036 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5037 wakaba 1.52 ## reprocess
5038     redo B;
5039     }
5040 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
5041 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5042 wakaba 1.79 !!!cp ('t271');
5043 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5044     !!!next-token;
5045     redo B;
5046     } elsif ($token->{type} == START_TAG_TOKEN) {
5047 wakaba 1.52 if ($token->{tag_name} eq 'option') {
5048     if ($self->{open_elements}->[-1]->[1] eq 'option') {
5049 wakaba 1.79 !!!cp ('t272');
5050 wakaba 1.52 ## As if </option>
5051     pop @{$self->{open_elements}};
5052 wakaba 1.79 } else {
5053     !!!cp ('t273');
5054 wakaba 1.52 }
5055    
5056 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5057 wakaba 1.52 !!!next-token;
5058     redo B;
5059     } elsif ($token->{tag_name} eq 'optgroup') {
5060     if ($self->{open_elements}->[-1]->[1] eq 'option') {
5061 wakaba 1.79 !!!cp ('t274');
5062 wakaba 1.52 ## As if </option>
5063     pop @{$self->{open_elements}};
5064 wakaba 1.79 } else {
5065     !!!cp ('t275');
5066 wakaba 1.52 }
5067    
5068     if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
5069 wakaba 1.79 !!!cp ('t276');
5070 wakaba 1.52 ## As if </optgroup>
5071     pop @{$self->{open_elements}};
5072 wakaba 1.79 } else {
5073     !!!cp ('t277');
5074 wakaba 1.52 }
5075    
5076 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5077 wakaba 1.52 !!!next-token;
5078     redo B;
5079 wakaba 1.101 } elsif ($token->{tag_name} eq 'select' or
5080     $token->{tag_name} eq 'input' or
5081     ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5082     {
5083     caption => 1, table => 1,
5084     tbody => 1, tfoot => 1, thead => 1,
5085     tr => 1, td => 1, th => 1,
5086     }->{$token->{tag_name}})) {
5087     ## TODO: The type below is not good - <select> is replaced by </select>
5088 wakaba 1.113 !!!parse-error (type => 'not closed:select', token => $token);
5089 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
5090     ## as if there were </select> (otherwise).
5091 wakaba 1.52 ## have an element in table scope
5092     my $i;
5093     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5094     my $node = $self->{open_elements}->[$_];
5095 wakaba 1.101 if ($node->[1] eq 'select') {
5096 wakaba 1.79 !!!cp ('t278');
5097 wakaba 1.52 $i = $_;
5098     last INSCOPE;
5099     } elsif ({
5100     table => 1, html => 1,
5101     }->{$node->[1]}) {
5102 wakaba 1.79 !!!cp ('t279');
5103 wakaba 1.52 last INSCOPE;
5104 wakaba 1.47 }
5105 wakaba 1.52 } # INSCOPE
5106     unless (defined $i) {
5107 wakaba 1.79 !!!cp ('t280');
5108 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:select', token => $token);
5109 wakaba 1.52 ## Ignore the token
5110     !!!next-token;
5111     redo B;
5112 wakaba 1.47 }
5113 wakaba 1.52
5114 wakaba 1.79 !!!cp ('t281');
5115 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5116    
5117     $self->_reset_insertion_mode;
5118 wakaba 1.47
5119 wakaba 1.101 if ($token->{tag_name} eq 'select') {
5120     !!!cp ('t281.2');
5121     !!!next-token;
5122     redo B;
5123     } else {
5124     !!!cp ('t281.1');
5125     ## Reprocess the token.
5126     redo B;
5127     }
5128 wakaba 1.58 } else {
5129 wakaba 1.79 !!!cp ('t282');
5130 wakaba 1.113 !!!parse-error (type => 'in select:'.$token->{tag_name}, token => $token);
5131 wakaba 1.58 ## Ignore the token
5132     !!!next-token;
5133     redo B;
5134     }
5135     } elsif ($token->{type} == END_TAG_TOKEN) {
5136 wakaba 1.52 if ($token->{tag_name} eq 'optgroup') {
5137     if ($self->{open_elements}->[-1]->[1] eq 'option' and
5138     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
5139 wakaba 1.79 !!!cp ('t283');
5140 wakaba 1.52 ## As if </option>
5141     splice @{$self->{open_elements}}, -2;
5142     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
5143 wakaba 1.79 !!!cp ('t284');
5144 wakaba 1.52 pop @{$self->{open_elements}};
5145     } else {
5146 wakaba 1.79 !!!cp ('t285');
5147 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5148 wakaba 1.52 ## Ignore the token
5149     }
5150     !!!next-token;
5151     redo B;
5152     } elsif ($token->{tag_name} eq 'option') {
5153     if ($self->{open_elements}->[-1]->[1] eq 'option') {
5154 wakaba 1.79 !!!cp ('t286');
5155 wakaba 1.47 pop @{$self->{open_elements}};
5156 wakaba 1.52 } else {
5157 wakaba 1.79 !!!cp ('t287');
5158 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5159 wakaba 1.52 ## Ignore the token
5160 wakaba 1.1 }
5161 wakaba 1.52 !!!next-token;
5162     redo B;
5163     } elsif ($token->{tag_name} eq 'select') {
5164     ## have an element in table scope
5165     my $i;
5166     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5167     my $node = $self->{open_elements}->[$_];
5168     if ($node->[1] eq $token->{tag_name}) {
5169 wakaba 1.79 !!!cp ('t288');
5170 wakaba 1.52 $i = $_;
5171     last INSCOPE;
5172     } elsif ({
5173     table => 1, html => 1,
5174     }->{$node->[1]}) {
5175 wakaba 1.79 !!!cp ('t289');
5176 wakaba 1.52 last INSCOPE;
5177 wakaba 1.48 }
5178 wakaba 1.52 } # INSCOPE
5179     unless (defined $i) {
5180 wakaba 1.79 !!!cp ('t290');
5181 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5182 wakaba 1.52 ## Ignore the token
5183     !!!next-token;
5184 wakaba 1.48 redo B;
5185 wakaba 1.52 }
5186    
5187 wakaba 1.79 !!!cp ('t291');
5188 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5189    
5190     $self->_reset_insertion_mode;
5191    
5192     !!!next-token;
5193     redo B;
5194 wakaba 1.101 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5195     {
5196     caption => 1, table => 1, tbody => 1,
5197     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
5198     }->{$token->{tag_name}}) {
5199 wakaba 1.83 ## TODO: The following is wrong?
5200 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5201 wakaba 1.52
5202     ## have an element in table scope
5203     my $i;
5204     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5205     my $node = $self->{open_elements}->[$_];
5206     if ($node->[1] eq $token->{tag_name}) {
5207 wakaba 1.79 !!!cp ('t292');
5208 wakaba 1.52 $i = $_;
5209     last INSCOPE;
5210     } elsif ({
5211     table => 1, html => 1,
5212     }->{$node->[1]}) {
5213 wakaba 1.79 !!!cp ('t293');
5214 wakaba 1.52 last INSCOPE;
5215 wakaba 1.1 }
5216 wakaba 1.52 } # INSCOPE
5217     unless (defined $i) {
5218 wakaba 1.79 !!!cp ('t294');
5219 wakaba 1.52 ## Ignore the token
5220 wakaba 1.1 !!!next-token;
5221     redo B;
5222     }
5223 wakaba 1.52
5224     ## As if </select>
5225     ## have an element in table scope
5226     undef $i;
5227 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5228     my $node = $self->{open_elements}->[$_];
5229 wakaba 1.52 if ($node->[1] eq 'select') {
5230 wakaba 1.79 !!!cp ('t295');
5231 wakaba 1.1 $i = $_;
5232     last INSCOPE;
5233     } elsif ({
5234     table => 1, html => 1,
5235 wakaba 1.52 }->{$node->[1]}) {
5236 wakaba 1.83 ## ISSUE: Can this state be reached?
5237 wakaba 1.79 !!!cp ('t296');
5238 wakaba 1.52 last INSCOPE;
5239     }
5240     } # INSCOPE
5241     unless (defined $i) {
5242 wakaba 1.79 !!!cp ('t297');
5243 wakaba 1.83 ## TODO: The following error type is correct?
5244 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:select', token => $token);
5245 wakaba 1.52 ## Ignore the </select> token
5246     !!!next-token; ## TODO: ok?
5247     redo B;
5248     }
5249    
5250 wakaba 1.79 !!!cp ('t298');
5251 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5252    
5253     $self->_reset_insertion_mode;
5254    
5255     ## reprocess
5256     redo B;
5257 wakaba 1.58 } else {
5258 wakaba 1.79 !!!cp ('t299');
5259 wakaba 1.113 !!!parse-error (type => 'in select:/'.$token->{tag_name}, token => $token);
5260 wakaba 1.52 ## Ignore the token
5261     !!!next-token;
5262     redo B;
5263 wakaba 1.58 }
5264 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5265     unless ($self->{open_elements}->[-1]->[1] eq 'html' and
5266     @{$self->{open_elements}} == 1) { # redundant, maybe
5267     !!!cp ('t299.1');
5268 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5269 wakaba 1.104 } else {
5270     !!!cp ('t299.2');
5271     }
5272    
5273     ## Stop parsing.
5274     last B;
5275 wakaba 1.58 } else {
5276     die "$0: $token->{type}: Unknown token type";
5277     }
5278 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
5279 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5280 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5281     my $data = $1;
5282     ## As if in body
5283     $reconstruct_active_formatting_elements->($insert_to_current);
5284    
5285     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5286    
5287     unless (length $token->{data}) {
5288 wakaba 1.79 !!!cp ('t300');
5289 wakaba 1.52 !!!next-token;
5290     redo B;
5291     }
5292     }
5293    
5294 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5295 wakaba 1.79 !!!cp ('t301');
5296 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
5297 wakaba 1.52
5298 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5299 wakaba 1.79 } else {
5300     !!!cp ('t302');
5301 wakaba 1.52 }
5302    
5303     ## "after body" insertion mode
5304 wakaba 1.113 !!!parse-error (type => 'after body:#character', token => $token);
5305 wakaba 1.52
5306 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5307 wakaba 1.52 ## reprocess
5308     redo B;
5309 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5310 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5311 wakaba 1.79 !!!cp ('t303');
5312 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
5313 wakaba 1.52
5314 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5315 wakaba 1.79 } else {
5316     !!!cp ('t304');
5317 wakaba 1.52 }
5318    
5319     ## "after body" insertion mode
5320 wakaba 1.113 !!!parse-error (type => 'after body:'.$token->{tag_name}, token => $token);
5321 wakaba 1.52
5322 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5323 wakaba 1.52 ## reprocess
5324     redo B;
5325 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5326 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5327 wakaba 1.79 !!!cp ('t305');
5328 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
5329 wakaba 1.52
5330 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5331 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5332 wakaba 1.79 } else {
5333     !!!cp ('t306');
5334 wakaba 1.52 }
5335    
5336     ## "after body" insertion mode
5337     if ($token->{tag_name} eq 'html') {
5338     if (defined $self->{inner_html_node}) {
5339 wakaba 1.79 !!!cp ('t307');
5340 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:html', token => $token);
5341 wakaba 1.52 ## Ignore the token
5342     !!!next-token;
5343     redo B;
5344     } else {
5345 wakaba 1.79 !!!cp ('t308');
5346 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
5347 wakaba 1.52 !!!next-token;
5348     redo B;
5349     }
5350     } else {
5351 wakaba 1.79 !!!cp ('t309');
5352 wakaba 1.113 !!!parse-error (type => 'after body:/'.$token->{tag_name}, token => $token);
5353 wakaba 1.52
5354 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5355 wakaba 1.52 ## reprocess
5356     redo B;
5357     }
5358 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5359     !!!cp ('t309.2');
5360     ## Stop parsing
5361     last B;
5362 wakaba 1.52 } else {
5363     die "$0: $token->{type}: Unknown token type";
5364     }
5365 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
5366 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5367 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5368     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5369    
5370     unless (length $token->{data}) {
5371 wakaba 1.79 !!!cp ('t310');
5372 wakaba 1.52 !!!next-token;
5373     redo B;
5374     }
5375     }
5376    
5377     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
5378 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5379 wakaba 1.79 !!!cp ('t311');
5380 wakaba 1.113 !!!parse-error (type => 'in frameset:#character', token => $token);
5381 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
5382 wakaba 1.79 !!!cp ('t312');
5383 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
5384 wakaba 1.52 } else { # "after html frameset"
5385 wakaba 1.79 !!!cp ('t313');
5386 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
5387 wakaba 1.52
5388 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5389 wakaba 1.84 ## Reprocess in the "after frameset" insertion mode.
5390 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
5391 wakaba 1.52 }
5392    
5393     ## Ignore the token.
5394     if (length $token->{data}) {
5395 wakaba 1.79 !!!cp ('t314');
5396 wakaba 1.52 ## reprocess the rest of characters
5397     } else {
5398 wakaba 1.79 !!!cp ('t315');
5399 wakaba 1.52 !!!next-token;
5400     }
5401     redo B;
5402     }
5403    
5404     die qq[$0: Character "$token->{data}"];
5405 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5406 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5407 wakaba 1.79 !!!cp ('t316');
5408 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
5409 wakaba 1.1
5410 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5411 wakaba 1.84 ## Process in the "after frameset" insertion mode.
5412 wakaba 1.79 } else {
5413     !!!cp ('t317');
5414     }
5415 wakaba 1.1
5416 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
5417 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5418 wakaba 1.79 !!!cp ('t318');
5419 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5420 wakaba 1.52 !!!next-token;
5421     redo B;
5422     } elsif ($token->{tag_name} eq 'frame' and
5423 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5424 wakaba 1.79 !!!cp ('t319');
5425 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5426 wakaba 1.52 pop @{$self->{open_elements}};
5427     !!!next-token;
5428     redo B;
5429     } elsif ($token->{tag_name} eq 'noframes') {
5430 wakaba 1.79 !!!cp ('t320');
5431 wakaba 1.52 ## NOTE: As if in body.
5432 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5433 wakaba 1.52 redo B;
5434     } else {
5435 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5436 wakaba 1.79 !!!cp ('t321');
5437 wakaba 1.113 !!!parse-error (type => 'in frameset:'.$token->{tag_name}, token => $token);
5438 wakaba 1.52 } else {
5439 wakaba 1.79 !!!cp ('t322');
5440 wakaba 1.113 !!!parse-error (type => 'after frameset:'.$token->{tag_name}, token => $token);
5441 wakaba 1.52 }
5442     ## Ignore the token
5443     !!!next-token;
5444     redo B;
5445     }
5446 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5447 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5448 wakaba 1.79 !!!cp ('t323');
5449 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
5450 wakaba 1.1
5451 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5452 wakaba 1.84 ## Process in the "after frameset" insertion mode.
5453 wakaba 1.79 } else {
5454     !!!cp ('t324');
5455 wakaba 1.52 }
5456 wakaba 1.1
5457 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
5458 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5459 wakaba 1.52 if ($self->{open_elements}->[-1]->[1] eq 'html' and
5460     @{$self->{open_elements}} == 1) {
5461 wakaba 1.79 !!!cp ('t325');
5462 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5463 wakaba 1.52 ## Ignore the token
5464     !!!next-token;
5465     } else {
5466 wakaba 1.79 !!!cp ('t326');
5467 wakaba 1.52 pop @{$self->{open_elements}};
5468     !!!next-token;
5469     }
5470 wakaba 1.47
5471 wakaba 1.52 if (not defined $self->{inner_html_node} and
5472     $self->{open_elements}->[-1]->[1] ne 'frameset') {
5473 wakaba 1.79 !!!cp ('t327');
5474 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5475 wakaba 1.79 } else {
5476     !!!cp ('t328');
5477 wakaba 1.52 }
5478     redo B;
5479     } elsif ($token->{tag_name} eq 'html' and
5480 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
5481 wakaba 1.79 !!!cp ('t329');
5482 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
5483 wakaba 1.52 !!!next-token;
5484     redo B;
5485     } else {
5486 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5487 wakaba 1.79 !!!cp ('t330');
5488 wakaba 1.113 !!!parse-error (type => 'in frameset:/'.$token->{tag_name}, token => $token);
5489 wakaba 1.52 } else {
5490 wakaba 1.79 !!!cp ('t331');
5491 wakaba 1.113 !!!parse-error (type => 'after frameset:/'.$token->{tag_name}, token => $token);
5492 wakaba 1.52 }
5493     ## Ignore the token
5494     !!!next-token;
5495     redo B;
5496     }
5497 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5498     unless ($self->{open_elements}->[-1]->[1] eq 'html' and
5499     @{$self->{open_elements}} == 1) { # redundant, maybe
5500     !!!cp ('t331.1');
5501 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5502 wakaba 1.104 } else {
5503     !!!cp ('t331.2');
5504     }
5505    
5506     ## Stop parsing
5507     last B;
5508 wakaba 1.52 } else {
5509     die "$0: $token->{type}: Unknown token type";
5510     }
5511 wakaba 1.47
5512 wakaba 1.52 ## ISSUE: An issue in spec here
5513     } else {
5514     die "$0: $self->{insertion_mode}: Unknown insertion mode";
5515     }
5516 wakaba 1.47
5517 wakaba 1.52 ## "in body" insertion mode
5518 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
5519 wakaba 1.52 if ($token->{tag_name} eq 'script') {
5520 wakaba 1.79 !!!cp ('t332');
5521 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5522 wakaba 1.100 $script_start_tag->();
5523 wakaba 1.53 redo B;
5524 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
5525 wakaba 1.79 !!!cp ('t333');
5526 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5527 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5528 wakaba 1.53 redo B;
5529 wakaba 1.52 } elsif ({
5530     base => 1, link => 1,
5531     }->{$token->{tag_name}}) {
5532 wakaba 1.79 !!!cp ('t334');
5533 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
5534 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5535 wakaba 1.52 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5536     !!!next-token;
5537 wakaba 1.53 redo B;
5538 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
5539     ## NOTE: This is an "as if in head" code clone, only "-t" differs
5540 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5541 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5542 wakaba 1.46
5543 wakaba 1.52 unless ($self->{confident}) {
5544     if ($token->{attributes}->{charset}) { ## TODO: And if supported
5545 wakaba 1.79 !!!cp ('t335');
5546 wakaba 1.63 $self->{change_encoding}
5547 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
5548 wakaba 1.66
5549     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
5550     ->set_user_data (manakai_has_reference =>
5551     $token->{attributes}->{charset}
5552     ->{has_reference});
5553 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
5554 wakaba 1.52 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
5555 wakaba 1.63 if ($token->{attributes}->{content}->{value}
5556 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
5557     [\x09-\x0D\x20]*=
5558 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
5559     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
5560 wakaba 1.79 !!!cp ('t336');
5561 wakaba 1.63 $self->{change_encoding}
5562 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
5563 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
5564     ->set_user_data (manakai_has_reference =>
5565     $token->{attributes}->{content}
5566     ->{has_reference});
5567 wakaba 1.63 }
5568 wakaba 1.52 }
5569 wakaba 1.66 } else {
5570     if ($token->{attributes}->{charset}) {
5571 wakaba 1.79 !!!cp ('t337');
5572 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
5573     ->set_user_data (manakai_has_reference =>
5574     $token->{attributes}->{charset}
5575     ->{has_reference});
5576     }
5577 wakaba 1.68 if ($token->{attributes}->{content}) {
5578 wakaba 1.79 !!!cp ('t338');
5579 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
5580     ->set_user_data (manakai_has_reference =>
5581     $token->{attributes}->{content}
5582     ->{has_reference});
5583     }
5584 wakaba 1.52 }
5585 wakaba 1.1
5586 wakaba 1.52 !!!next-token;
5587 wakaba 1.53 redo B;
5588 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
5589 wakaba 1.79 !!!cp ('t341');
5590 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5591 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
5592 wakaba 1.53 redo B;
5593 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
5594 wakaba 1.113 !!!parse-error (type => 'in body:body', token => $token);
5595 wakaba 1.46
5596 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
5597     $self->{open_elements}->[1]->[1] ne 'body') {
5598 wakaba 1.79 !!!cp ('t342');
5599 wakaba 1.52 ## Ignore the token
5600     } else {
5601     my $body_el = $self->{open_elements}->[1]->[0];
5602     for my $attr_name (keys %{$token->{attributes}}) {
5603     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
5604 wakaba 1.79 !!!cp ('t343');
5605 wakaba 1.52 $body_el->set_attribute_ns
5606     (undef, [undef, $attr_name],
5607     $token->{attributes}->{$attr_name}->{value});
5608     }
5609     }
5610     }
5611     !!!next-token;
5612 wakaba 1.53 redo B;
5613 wakaba 1.52 } elsif ({
5614     address => 1, blockquote => 1, center => 1, dir => 1,
5615 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
5616     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5617 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
5618     pre => 1, listing => 1,
5619 wakaba 1.109 form => 1,
5620     table => 1,
5621     hr => 1,
5622 wakaba 1.52 }->{$token->{tag_name}}) {
5623 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
5624     !!!cp ('t350');
5625 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
5626 wakaba 1.109 ## Ignore the token
5627     !!!next-token;
5628     redo B;
5629     }
5630    
5631 wakaba 1.52 ## has a p element in scope
5632     INSCOPE: for (reverse @{$self->{open_elements}}) {
5633     if ($_->[1] eq 'p') {
5634 wakaba 1.79 !!!cp ('t344');
5635 wakaba 1.52 !!!back-token;
5636 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
5637     line => $token->{line}, column => $token->{column}};
5638 wakaba 1.53 redo B;
5639 wakaba 1.52 } elsif ({
5640 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
5641 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
5642     }->{$_->[1]}) {
5643 wakaba 1.79 !!!cp ('t345');
5644 wakaba 1.52 last INSCOPE;
5645     }
5646     } # INSCOPE
5647    
5648 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5649 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
5650 wakaba 1.52 !!!next-token;
5651 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5652 wakaba 1.52 $token->{data} =~ s/^\x0A//;
5653     unless (length $token->{data}) {
5654 wakaba 1.79 !!!cp ('t346');
5655 wakaba 1.1 !!!next-token;
5656 wakaba 1.79 } else {
5657     !!!cp ('t349');
5658 wakaba 1.52 }
5659 wakaba 1.79 } else {
5660     !!!cp ('t348');
5661 wakaba 1.52 }
5662 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
5663     !!!cp ('t347.1');
5664     $self->{form_element} = $self->{open_elements}->[-1]->[0];
5665    
5666     !!!next-token;
5667     } elsif ($token->{tag_name} eq 'table') {
5668     !!!cp ('t382');
5669     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
5670    
5671     $self->{insertion_mode} = IN_TABLE_IM;
5672    
5673     !!!next-token;
5674     } elsif ($token->{tag_name} eq 'hr') {
5675     !!!cp ('t386');
5676     pop @{$self->{open_elements}};
5677    
5678     !!!next-token;
5679 wakaba 1.52 } else {
5680 wakaba 1.79 !!!cp ('t347');
5681 wakaba 1.52 !!!next-token;
5682     }
5683 wakaba 1.53 redo B;
5684 wakaba 1.109 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
5685 wakaba 1.52 ## has a p element in scope
5686     INSCOPE: for (reverse @{$self->{open_elements}}) {
5687     if ($_->[1] eq 'p') {
5688 wakaba 1.79 !!!cp ('t353');
5689 wakaba 1.52 !!!back-token;
5690 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
5691     line => $token->{line}, column => $token->{column}};
5692 wakaba 1.53 redo B;
5693 wakaba 1.52 } elsif ({
5694 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
5695 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
5696     }->{$_->[1]}) {
5697 wakaba 1.79 !!!cp ('t354');
5698 wakaba 1.52 last INSCOPE;
5699     }
5700     } # INSCOPE
5701    
5702     ## Step 1
5703     my $i = -1;
5704     my $node = $self->{open_elements}->[$i];
5705 wakaba 1.109 my $li_or_dtdd = {li => {li => 1},
5706     dt => {dt => 1, dd => 1},
5707     dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
5708 wakaba 1.52 LI: {
5709     ## Step 2
5710 wakaba 1.109 if ($li_or_dtdd->{$node->[1]}) {
5711 wakaba 1.52 if ($i != -1) {
5712 wakaba 1.79 !!!cp ('t355');
5713 wakaba 1.52 !!!parse-error (type => 'end tag missing:'.
5714 wakaba 1.113 $self->{open_elements}->[-1]->[1], token => $token);
5715 wakaba 1.79 } else {
5716     !!!cp ('t356');
5717 wakaba 1.52 }
5718     splice @{$self->{open_elements}}, $i;
5719     last LI;
5720 wakaba 1.79 } else {
5721     !!!cp ('t357');
5722 wakaba 1.52 }
5723    
5724     ## Step 3
5725     if (not $formatting_category->{$node->[1]} and
5726     #not $phrasing_category->{$node->[1]} and
5727     ($special_category->{$node->[1]} or
5728     $scoping_category->{$node->[1]}) and
5729     $node->[1] ne 'address' and $node->[1] ne 'div') {
5730 wakaba 1.79 !!!cp ('t358');
5731 wakaba 1.52 last LI;
5732     }
5733    
5734 wakaba 1.79 !!!cp ('t359');
5735 wakaba 1.52 ## Step 4
5736     $i--;
5737     $node = $self->{open_elements}->[$i];
5738     redo LI;
5739     } # LI
5740    
5741 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5742 wakaba 1.52 !!!next-token;
5743 wakaba 1.53 redo B;
5744 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
5745     ## has a p element in scope
5746     INSCOPE: for (reverse @{$self->{open_elements}}) {
5747     if ($_->[1] eq 'p') {
5748 wakaba 1.79 !!!cp ('t367');
5749 wakaba 1.52 !!!back-token;
5750 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
5751     line => $token->{line}, column => $token->{column}};
5752 wakaba 1.53 redo B;
5753 wakaba 1.52 } elsif ({
5754 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
5755 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
5756     }->{$_->[1]}) {
5757 wakaba 1.79 !!!cp ('t368');
5758 wakaba 1.52 last INSCOPE;
5759 wakaba 1.46 }
5760 wakaba 1.52 } # INSCOPE
5761    
5762 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5763 wakaba 1.52
5764     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
5765    
5766     !!!next-token;
5767 wakaba 1.53 redo B;
5768 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
5769     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
5770     my $node = $active_formatting_elements->[$i];
5771     if ($node->[1] eq 'a') {
5772 wakaba 1.79 !!!cp ('t371');
5773 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
5774 wakaba 1.52
5775     !!!back-token;
5776 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
5777     line => $token->{line}, column => $token->{column}};
5778 wakaba 1.113 $formatting_end_tag->($token);
5779 wakaba 1.52
5780     AFE2: for (reverse 0..$#$active_formatting_elements) {
5781     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
5782 wakaba 1.79 !!!cp ('t372');
5783 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
5784     last AFE2;
5785 wakaba 1.1 }
5786 wakaba 1.52 } # AFE2
5787     OE: for (reverse 0..$#{$self->{open_elements}}) {
5788     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
5789 wakaba 1.79 !!!cp ('t373');
5790 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
5791     last OE;
5792 wakaba 1.1 }
5793 wakaba 1.52 } # OE
5794     last AFE;
5795     } elsif ($node->[0] eq '#marker') {
5796 wakaba 1.79 !!!cp ('t374');
5797 wakaba 1.52 last AFE;
5798     }
5799     } # AFE
5800    
5801     $reconstruct_active_formatting_elements->($insert_to_current);
5802 wakaba 1.1
5803 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5804 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
5805 wakaba 1.1
5806 wakaba 1.52 !!!next-token;
5807 wakaba 1.53 redo B;
5808 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
5809     $reconstruct_active_formatting_elements->($insert_to_current);
5810 wakaba 1.1
5811 wakaba 1.52 ## has a |nobr| element in scope
5812     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5813     my $node = $self->{open_elements}->[$_];
5814     if ($node->[1] eq 'nobr') {
5815 wakaba 1.79 !!!cp ('t376');
5816 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
5817 wakaba 1.52 !!!back-token;
5818 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
5819     line => $token->{line}, column => $token->{column}};
5820 wakaba 1.53 redo B;
5821 wakaba 1.52 } elsif ({
5822 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
5823 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
5824     }->{$node->[1]}) {
5825 wakaba 1.79 !!!cp ('t377');
5826 wakaba 1.52 last INSCOPE;
5827     }
5828     } # INSCOPE
5829    
5830 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5831 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
5832    
5833     !!!next-token;
5834 wakaba 1.53 redo B;
5835 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
5836     ## has a button element in scope
5837     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5838     my $node = $self->{open_elements}->[$_];
5839     if ($node->[1] eq 'button') {
5840 wakaba 1.79 !!!cp ('t378');
5841 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
5842 wakaba 1.52 !!!back-token;
5843 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
5844     line => $token->{line}, column => $token->{column}};
5845 wakaba 1.53 redo B;
5846 wakaba 1.52 } elsif ({
5847 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
5848 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
5849     }->{$node->[1]}) {
5850 wakaba 1.79 !!!cp ('t379');
5851 wakaba 1.52 last INSCOPE;
5852     }
5853     } # INSCOPE
5854    
5855     $reconstruct_active_formatting_elements->($insert_to_current);
5856    
5857 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5858 wakaba 1.85
5859     ## TODO: associate with $self->{form_element} if defined
5860    
5861 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
5862 wakaba 1.1
5863 wakaba 1.52 !!!next-token;
5864 wakaba 1.53 redo B;
5865 wakaba 1.103 } elsif ({
5866 wakaba 1.109 xmp => 1,
5867     iframe => 1,
5868     noembed => 1,
5869     noframes => 1,
5870     noscript => 0, ## TODO: 1 if scripting is enabled
5871 wakaba 1.103 }->{$token->{tag_name}}) {
5872 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
5873     !!!cp ('t381');
5874     $reconstruct_active_formatting_elements->($insert_to_current);
5875     } else {
5876     !!!cp ('t399');
5877     }
5878     ## NOTE: There is an "as if in body" code clone.
5879 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5880 wakaba 1.53 redo B;
5881 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
5882 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
5883 wakaba 1.52
5884     if (defined $self->{form_element}) {
5885 wakaba 1.79 !!!cp ('t389');
5886 wakaba 1.52 ## Ignore the token
5887     !!!next-token;
5888 wakaba 1.53 redo B;
5889 wakaba 1.52 } else {
5890     my $at = $token->{attributes};
5891     my $form_attrs;
5892     $form_attrs->{action} = $at->{action} if $at->{action};
5893     my $prompt_attr = $at->{prompt};
5894     $at->{name} = {name => 'name', value => 'isindex'};
5895     delete $at->{action};
5896     delete $at->{prompt};
5897     my @tokens = (
5898 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
5899 wakaba 1.114 attributes => $form_attrs,
5900     line => $token->{line}, column => $token->{column}},
5901     {type => START_TAG_TOKEN, tag_name => 'hr',
5902     line => $token->{line}, column => $token->{column}},
5903     {type => START_TAG_TOKEN, tag_name => 'p',
5904     line => $token->{line}, column => $token->{column}},
5905     {type => START_TAG_TOKEN, tag_name => 'label',
5906     line => $token->{line}, column => $token->{column}},
5907 wakaba 1.52 );
5908     if ($prompt_attr) {
5909 wakaba 1.79 !!!cp ('t390');
5910 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
5911     line => $token->{line}, column => $token->{column}};
5912 wakaba 1.1 } else {
5913 wakaba 1.79 !!!cp ('t391');
5914 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
5915 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
5916     line => $token->{line}, column => $token->{column}}; # SHOULD
5917 wakaba 1.52 ## TODO: make this configurable
5918 wakaba 1.1 }
5919 wakaba 1.52 push @tokens,
5920 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
5921     line => $token->{line}, column => $token->{column}},
5922 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
5923 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
5924     line => $token->{line}, column => $token->{column}},
5925     {type => END_TAG_TOKEN, tag_name => 'p',
5926     line => $token->{line}, column => $token->{column}},
5927     {type => START_TAG_TOKEN, tag_name => 'hr',
5928     line => $token->{line}, column => $token->{column}},
5929     {type => END_TAG_TOKEN, tag_name => 'form',
5930     line => $token->{line}, column => $token->{column}};
5931 wakaba 1.52 $token = shift @tokens;
5932     !!!back-token (@tokens);
5933 wakaba 1.53 redo B;
5934 wakaba 1.52 }
5935     } elsif ($token->{tag_name} eq 'textarea') {
5936     my $tag_name = $token->{tag_name};
5937     my $el;
5938 wakaba 1.116 !!!create-element ($el, $token->{tag_name}, $token->{attributes}, $token);
5939 wakaba 1.52
5940     ## TODO: $self->{form_element} if defined
5941     $self->{content_model} = RCDATA_CONTENT_MODEL;
5942     delete $self->{escape}; # MUST
5943    
5944     $insert->($el);
5945    
5946     my $text = '';
5947     !!!next-token;
5948 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5949 wakaba 1.52 $token->{data} =~ s/^\x0A//;
5950 wakaba 1.51 unless (length $token->{data}) {
5951 wakaba 1.79 !!!cp ('t392');
5952 wakaba 1.51 !!!next-token;
5953 wakaba 1.79 } else {
5954     !!!cp ('t393');
5955 wakaba 1.51 }
5956 wakaba 1.79 } else {
5957     !!!cp ('t394');
5958 wakaba 1.51 }
5959 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
5960 wakaba 1.79 !!!cp ('t395');
5961 wakaba 1.52 $text .= $token->{data};
5962     !!!next-token;
5963     }
5964     if (length $text) {
5965 wakaba 1.79 !!!cp ('t396');
5966 wakaba 1.52 $el->manakai_append_text ($text);
5967     }
5968    
5969     $self->{content_model} = PCDATA_CONTENT_MODEL;
5970 wakaba 1.51
5971 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
5972 wakaba 1.52 $token->{tag_name} eq $tag_name) {
5973 wakaba 1.79 !!!cp ('t397');
5974 wakaba 1.52 ## Ignore the token
5975     } else {
5976 wakaba 1.79 !!!cp ('t398');
5977 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
5978 wakaba 1.51 }
5979 wakaba 1.52 !!!next-token;
5980 wakaba 1.53 redo B;
5981 wakaba 1.52 } elsif ({
5982     caption => 1, col => 1, colgroup => 1, frame => 1,
5983     frameset => 1, head => 1, option => 1, optgroup => 1,
5984     tbody => 1, td => 1, tfoot => 1, th => 1,
5985     thead => 1, tr => 1,
5986     }->{$token->{tag_name}}) {
5987 wakaba 1.79 !!!cp ('t401');
5988 wakaba 1.113 !!!parse-error (type => 'in body:'.$token->{tag_name}, token => $token);
5989 wakaba 1.52 ## Ignore the token
5990     !!!next-token;
5991 wakaba 1.53 redo B;
5992 wakaba 1.52
5993     ## ISSUE: An issue on HTML5 new elements in the spec.
5994     } else {
5995 wakaba 1.110 if ($token->{tag_name} eq 'image') {
5996     !!!cp ('t384');
5997 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
5998 wakaba 1.110 $token->{tag_name} = 'img';
5999     } else {
6000     !!!cp ('t385');
6001     }
6002    
6003     ## NOTE: There is an "as if <br>" code clone.
6004 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
6005    
6006 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6007 wakaba 1.109
6008 wakaba 1.110 if ({
6009     applet => 1, marquee => 1, object => 1,
6010     }->{$token->{tag_name}}) {
6011     !!!cp ('t380');
6012     push @$active_formatting_elements, ['#marker', ''];
6013     } elsif ({
6014     b => 1, big => 1, em => 1, font => 1, i => 1,
6015     s => 1, small => 1, strile => 1,
6016     strong => 1, tt => 1, u => 1,
6017     }->{$token->{tag_name}}) {
6018     !!!cp ('t375');
6019     push @$active_formatting_elements, $self->{open_elements}->[-1];
6020     } elsif ($token->{tag_name} eq 'input') {
6021     !!!cp ('t388');
6022     ## TODO: associate with $self->{form_element} if defined
6023     pop @{$self->{open_elements}};
6024     } elsif ({
6025     area => 1, basefont => 1, bgsound => 1, br => 1,
6026     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
6027     #image => 1,
6028     }->{$token->{tag_name}}) {
6029     !!!cp ('t388.1');
6030     pop @{$self->{open_elements}};
6031     } elsif ($token->{tag_name} eq 'select') {
6032 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
6033    
6034     if ($self->{insertion_mode} & TABLE_IMS or
6035     $self->{insertion_mode} & BODY_TABLE_IMS or
6036     $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6037     !!!cp ('t400.1');
6038     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
6039     } else {
6040     !!!cp ('t400.2');
6041     $self->{insertion_mode} = IN_SELECT_IM;
6042     }
6043 wakaba 1.110 } else {
6044     !!!cp ('t402');
6045 wakaba 1.109 }
6046 wakaba 1.51
6047 wakaba 1.52 !!!next-token;
6048 wakaba 1.53 redo B;
6049 wakaba 1.52 }
6050 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6051 wakaba 1.52 if ($token->{tag_name} eq 'body') {
6052 wakaba 1.107 ## has a |body| element in scope
6053     my $i;
6054 wakaba 1.111 INSCOPE: {
6055     for (reverse @{$self->{open_elements}}) {
6056     if ($_->[1] eq 'body') {
6057     !!!cp ('t405');
6058     $i = $_;
6059     last INSCOPE;
6060     } elsif ({
6061     applet => 1, table => 1, caption => 1, td => 1, th => 1,
6062     button => 1, marquee => 1, object => 1, html => 1,
6063     }->{$_->[1]}) {
6064     !!!cp ('t405.1');
6065     last;
6066     }
6067 wakaba 1.52 }
6068 wakaba 1.111
6069     !!!parse-error (type => 'start tag not allowed',
6070 wakaba 1.113 value => $token->{tag_name}, token => $token);
6071 wakaba 1.107 ## NOTE: Ignore the token.
6072 wakaba 1.52 !!!next-token;
6073 wakaba 1.53 redo B;
6074 wakaba 1.111 } # INSCOPE
6075 wakaba 1.107
6076     for (@{$self->{open_elements}}) {
6077     unless ({
6078     dd => 1, dt => 1, li => 1, p => 1, td => 1,
6079     th => 1, tr => 1, body => 1, html => 1,
6080     tbody => 1, tfoot => 1, thead => 1,
6081     }->{$_->[1]}) {
6082     !!!cp ('t403');
6083 wakaba 1.113 !!!parse-error (type => 'not closed:'.$_->[1], token => $token);
6084 wakaba 1.107 last;
6085     } else {
6086     !!!cp ('t404');
6087     }
6088     }
6089    
6090     $self->{insertion_mode} = AFTER_BODY_IM;
6091     !!!next-token;
6092     redo B;
6093 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
6094     if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
6095     ## ISSUE: There is an issue in the spec.
6096     if ($self->{open_elements}->[-1]->[1] ne 'body') {
6097 wakaba 1.79 !!!cp ('t406');
6098 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1], token => $token);
6099 wakaba 1.79 } else {
6100     !!!cp ('t407');
6101 wakaba 1.1 }
6102 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6103 wakaba 1.52 ## reprocess
6104 wakaba 1.53 redo B;
6105 wakaba 1.51 } else {
6106 wakaba 1.79 !!!cp ('t408');
6107 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6108 wakaba 1.52 ## Ignore the token
6109     !!!next-token;
6110 wakaba 1.53 redo B;
6111 wakaba 1.51 }
6112 wakaba 1.52 } elsif ({
6113     address => 1, blockquote => 1, center => 1, dir => 1,
6114     div => 1, dl => 1, fieldset => 1, listing => 1,
6115     menu => 1, ol => 1, pre => 1, ul => 1,
6116     dd => 1, dt => 1, li => 1,
6117 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6118 wakaba 1.52 }->{$token->{tag_name}}) {
6119     ## has an element in scope
6120     my $i;
6121     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6122     my $node = $self->{open_elements}->[$_];
6123     if ($node->[1] eq $token->{tag_name}) {
6124 wakaba 1.79 !!!cp ('t410');
6125 wakaba 1.52 $i = $_;
6126 wakaba 1.87 last INSCOPE;
6127 wakaba 1.52 } elsif ({
6128 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
6129 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
6130     }->{$node->[1]}) {
6131 wakaba 1.79 !!!cp ('t411');
6132 wakaba 1.52 last INSCOPE;
6133 wakaba 1.51 }
6134 wakaba 1.52 } # INSCOPE
6135 wakaba 1.89
6136     unless (defined $i) { # has an element in scope
6137     !!!cp ('t413');
6138 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6139 wakaba 1.89 } else {
6140     ## Step 1. generate implied end tags
6141     while ({
6142     dd => ($token->{tag_name} ne 'dd'),
6143     dt => ($token->{tag_name} ne 'dt'),
6144     li => ($token->{tag_name} ne 'li'),
6145     p => 1,
6146     }->{$self->{open_elements}->[-1]->[1]}) {
6147     !!!cp ('t409');
6148     pop @{$self->{open_elements}};
6149     }
6150    
6151     ## Step 2.
6152     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6153 wakaba 1.79 !!!cp ('t412');
6154 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
6155 wakaba 1.51 } else {
6156 wakaba 1.89 !!!cp ('t414');
6157 wakaba 1.51 }
6158 wakaba 1.89
6159     ## Step 3.
6160 wakaba 1.52 splice @{$self->{open_elements}}, $i;
6161 wakaba 1.89
6162     ## Step 4.
6163     $clear_up_to_marker->()
6164     if {
6165 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6166 wakaba 1.89 }->{$token->{tag_name}};
6167 wakaba 1.51 }
6168 wakaba 1.52 !!!next-token;
6169 wakaba 1.53 redo B;
6170 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
6171 wakaba 1.92 undef $self->{form_element};
6172    
6173 wakaba 1.52 ## has an element in scope
6174 wakaba 1.92 my $i;
6175 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6176     my $node = $self->{open_elements}->[$_];
6177     if ($node->[1] eq $token->{tag_name}) {
6178 wakaba 1.79 !!!cp ('t418');
6179 wakaba 1.92 $i = $_;
6180 wakaba 1.52 last INSCOPE;
6181     } elsif ({
6182 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
6183 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
6184     }->{$node->[1]}) {
6185 wakaba 1.79 !!!cp ('t419');
6186 wakaba 1.52 last INSCOPE;
6187     }
6188     } # INSCOPE
6189 wakaba 1.92
6190     unless (defined $i) { # has an element in scope
6191 wakaba 1.79 !!!cp ('t421');
6192 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6193 wakaba 1.92 } else {
6194     ## Step 1. generate implied end tags
6195     while ({
6196     dd => 1, dt => 1, li => 1, p => 1,
6197     }->{$self->{open_elements}->[-1]->[1]}) {
6198     !!!cp ('t417');
6199     pop @{$self->{open_elements}};
6200     }
6201    
6202     ## Step 2.
6203     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6204     !!!cp ('t417.1');
6205 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
6206 wakaba 1.92 } else {
6207     !!!cp ('t420');
6208     }
6209    
6210     ## Step 3.
6211     splice @{$self->{open_elements}}, $i;
6212 wakaba 1.52 }
6213    
6214     !!!next-token;
6215 wakaba 1.53 redo B;
6216 wakaba 1.52 } elsif ({
6217     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6218     }->{$token->{tag_name}}) {
6219     ## has an element in scope
6220     my $i;
6221     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6222     my $node = $self->{open_elements}->[$_];
6223     if ({
6224     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6225     }->{$node->[1]}) {
6226 wakaba 1.79 !!!cp ('t423');
6227 wakaba 1.52 $i = $_;
6228     last INSCOPE;
6229     } elsif ({
6230 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
6231 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
6232     }->{$node->[1]}) {
6233 wakaba 1.79 !!!cp ('t424');
6234 wakaba 1.52 last INSCOPE;
6235 wakaba 1.51 }
6236 wakaba 1.52 } # INSCOPE
6237 wakaba 1.93
6238     unless (defined $i) { # has an element in scope
6239     !!!cp ('t425.1');
6240 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6241 wakaba 1.79 } else {
6242 wakaba 1.93 ## Step 1. generate implied end tags
6243     while ({
6244     dd => 1, dt => 1, li => 1, p => 1,
6245     }->{$self->{open_elements}->[-1]->[1]}) {
6246     !!!cp ('t422');
6247     pop @{$self->{open_elements}};
6248     }
6249    
6250     ## Step 2.
6251     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6252     !!!cp ('t425');
6253 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6254 wakaba 1.93 } else {
6255     !!!cp ('t426');
6256     }
6257    
6258     ## Step 3.
6259     splice @{$self->{open_elements}}, $i;
6260 wakaba 1.36 }
6261 wakaba 1.52
6262     !!!next-token;
6263 wakaba 1.53 redo B;
6264 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
6265     ## has an element in scope
6266     my $i;
6267     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6268     my $node = $self->{open_elements}->[$_];
6269     if ($node->[1] eq $token->{tag_name}) {
6270     !!!cp ('t410.1');
6271     $i = $_;
6272 wakaba 1.88 last INSCOPE;
6273 wakaba 1.87 } elsif ({
6274 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
6275 wakaba 1.87 button => 1, marquee => 1, object => 1, html => 1,
6276     }->{$node->[1]}) {
6277     !!!cp ('t411.1');
6278     last INSCOPE;
6279     }
6280     } # INSCOPE
6281 wakaba 1.91
6282     if (defined $i) {
6283     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6284 wakaba 1.87 !!!cp ('t412.1');
6285 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
6286 wakaba 1.87 } else {
6287 wakaba 1.91 !!!cp ('t414.1');
6288 wakaba 1.87 }
6289 wakaba 1.91
6290 wakaba 1.87 splice @{$self->{open_elements}}, $i;
6291     } else {
6292 wakaba 1.91 !!!cp ('t413.1');
6293 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6294 wakaba 1.91
6295 wakaba 1.87 !!!cp ('t415.1');
6296     ## As if <p>, then reprocess the current token
6297     my $el;
6298 wakaba 1.116 !!!create-element ($el, 'p',, $token);
6299 wakaba 1.87 $insert->($el);
6300 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
6301 wakaba 1.87 }
6302 wakaba 1.91
6303 wakaba 1.87 !!!next-token;
6304     redo B;
6305 wakaba 1.52 } elsif ({
6306     a => 1,
6307     b => 1, big => 1, em => 1, font => 1, i => 1,
6308     nobr => 1, s => 1, small => 1, strile => 1,
6309     strong => 1, tt => 1, u => 1,
6310     }->{$token->{tag_name}}) {
6311 wakaba 1.79 !!!cp ('t427');
6312 wakaba 1.113 $formatting_end_tag->($token);
6313 wakaba 1.53 redo B;
6314 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
6315 wakaba 1.79 !!!cp ('t428');
6316 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:br', token => $token);
6317 wakaba 1.52
6318     ## As if <br>
6319     $reconstruct_active_formatting_elements->($insert_to_current);
6320    
6321     my $el;
6322 wakaba 1.116 !!!create-element ($el, 'br',, $token);
6323 wakaba 1.52 $insert->($el);
6324    
6325     ## Ignore the token.
6326     !!!next-token;
6327 wakaba 1.53 redo B;
6328 wakaba 1.52 } elsif ({
6329     caption => 1, col => 1, colgroup => 1, frame => 1,
6330     frameset => 1, head => 1, option => 1, optgroup => 1,
6331     tbody => 1, td => 1, tfoot => 1, th => 1,
6332     thead => 1, tr => 1,
6333     area => 1, basefont => 1, bgsound => 1,
6334     embed => 1, hr => 1, iframe => 1, image => 1,
6335     img => 1, input => 1, isindex => 1, noembed => 1,
6336     noframes => 1, param => 1, select => 1, spacer => 1,
6337     table => 1, textarea => 1, wbr => 1,
6338     noscript => 0, ## TODO: if scripting is enabled
6339     }->{$token->{tag_name}}) {
6340 wakaba 1.79 !!!cp ('t429');
6341 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6342 wakaba 1.52 ## Ignore the token
6343     !!!next-token;
6344 wakaba 1.53 redo B;
6345 wakaba 1.52
6346     ## ISSUE: Issue on HTML5 new elements in spec
6347    
6348     } else {
6349     ## Step 1
6350     my $node_i = -1;
6351     my $node = $self->{open_elements}->[$node_i];
6352 wakaba 1.51
6353 wakaba 1.52 ## Step 2
6354     S2: {
6355     if ($node->[1] eq $token->{tag_name}) {
6356     ## Step 1
6357     ## generate implied end tags
6358 wakaba 1.86 while ({
6359     dd => 1, dt => 1, li => 1, p => 1,
6360     }->{$self->{open_elements}->[-1]->[1]}) {
6361 wakaba 1.79 !!!cp ('t430');
6362 wakaba 1.83 ## ISSUE: Can this case be reached?
6363 wakaba 1.86 pop @{$self->{open_elements}};
6364 wakaba 1.52 }
6365    
6366     ## Step 2
6367     if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
6368 wakaba 1.79 !!!cp ('t431');
6369 wakaba 1.58 ## NOTE: <x><y></x>
6370 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
6371 wakaba 1.79 } else {
6372     !!!cp ('t432');
6373 wakaba 1.52 }
6374    
6375     ## Step 3
6376     splice @{$self->{open_elements}}, $node_i;
6377 wakaba 1.51
6378 wakaba 1.1 !!!next-token;
6379 wakaba 1.52 last S2;
6380 wakaba 1.1 } else {
6381 wakaba 1.52 ## Step 3
6382     if (not $formatting_category->{$node->[1]} and
6383     #not $phrasing_category->{$node->[1]} and
6384     ($special_category->{$node->[1]} or
6385     $scoping_category->{$node->[1]})) {
6386 wakaba 1.79 !!!cp ('t433');
6387 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6388 wakaba 1.52 ## Ignore the token
6389     !!!next-token;
6390     last S2;
6391     }
6392 wakaba 1.79
6393     !!!cp ('t434');
6394 wakaba 1.1 }
6395 wakaba 1.52
6396     ## Step 4
6397     $node_i--;
6398     $node = $self->{open_elements}->[$node_i];
6399    
6400     ## Step 5;
6401     redo S2;
6402     } # S2
6403 wakaba 1.53 redo B;
6404 wakaba 1.1 }
6405     }
6406 wakaba 1.52 redo B;
6407 wakaba 1.1 } # B
6408    
6409     ## Stop parsing # MUST
6410    
6411     ## TODO: script stuffs
6412 wakaba 1.3 } # _tree_construct_main
6413    
6414     sub set_inner_html ($$$) {
6415     my $class = shift;
6416     my $node = shift;
6417     my $s = \$_[0];
6418     my $onerror = $_[1];
6419    
6420 wakaba 1.63 ## ISSUE: Should {confident} be true?
6421    
6422 wakaba 1.3 my $nt = $node->node_type;
6423     if ($nt == 9) {
6424     # MUST
6425    
6426     ## Step 1 # MUST
6427     ## TODO: If the document has an active parser, ...
6428     ## ISSUE: There is an issue in the spec.
6429    
6430     ## Step 2 # MUST
6431     my @cn = @{$node->child_nodes};
6432     for (@cn) {
6433     $node->remove_child ($_);
6434     }
6435    
6436     ## Step 3, 4, 5 # MUST
6437     $class->parse_string ($$s => $node, $onerror);
6438     } elsif ($nt == 1) {
6439     ## TODO: If non-html element
6440    
6441     ## NOTE: Most of this code is copied from |parse_string|
6442    
6443     ## Step 1 # MUST
6444 wakaba 1.14 my $this_doc = $node->owner_document;
6445     my $doc = $this_doc->implementation->create_document;
6446 wakaba 1.18 $doc->manakai_is_html (1);
6447 wakaba 1.3 my $p = $class->new;
6448     $p->{document} = $doc;
6449    
6450 wakaba 1.84 ## Step 8 # MUST
6451 wakaba 1.3 my $i = 0;
6452     my $line = 1;
6453     my $column = 0;
6454 wakaba 1.76 $p->{set_next_char} = sub {
6455 wakaba 1.3 my $self = shift;
6456 wakaba 1.14
6457 wakaba 1.76 pop @{$self->{prev_char}};
6458     unshift @{$self->{prev_char}}, $self->{next_char};
6459 wakaba 1.14
6460 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
6461     $self->{next_char} = ord substr $$s, $i++, 1;
6462 wakaba 1.3 $column++;
6463 wakaba 1.4
6464 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
6465 wakaba 1.4 $line++;
6466     $column = 0;
6467 wakaba 1.79 !!!cp ('i1');
6468 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
6469 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6470 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
6471 wakaba 1.3 $line++;
6472 wakaba 1.4 $column = 0;
6473 wakaba 1.79 !!!cp ('i2');
6474 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
6475     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6476 wakaba 1.79 !!!cp ('i3');
6477 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
6478 wakaba 1.79 !!!cp ('i4');
6479 wakaba 1.14 !!!parse-error (type => 'NULL');
6480 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6481 wakaba 1.3 }
6482     };
6483 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
6484     $p->{next_char} = -1;
6485 wakaba 1.3
6486     my $ponerror = $onerror || sub {
6487     my (%opt) = @_;
6488     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
6489     };
6490     $p->{parse_error} = sub {
6491     $ponerror->(@_, line => $line, column => $column);
6492     };
6493    
6494     $p->_initialize_tokenizer;
6495     $p->_initialize_tree_constructor;
6496    
6497     ## Step 2
6498 wakaba 1.71 my $node_ln = $node->manakai_local_name;
6499 wakaba 1.40 $p->{content_model} = {
6500     title => RCDATA_CONTENT_MODEL,
6501     textarea => RCDATA_CONTENT_MODEL,
6502     style => CDATA_CONTENT_MODEL,
6503     script => CDATA_CONTENT_MODEL,
6504     xmp => CDATA_CONTENT_MODEL,
6505     iframe => CDATA_CONTENT_MODEL,
6506     noembed => CDATA_CONTENT_MODEL,
6507     noframes => CDATA_CONTENT_MODEL,
6508     noscript => CDATA_CONTENT_MODEL,
6509     plaintext => PLAINTEXT_CONTENT_MODEL,
6510     }->{$node_ln};
6511     $p->{content_model} = PCDATA_CONTENT_MODEL
6512     unless defined $p->{content_model};
6513     ## ISSUE: What is "the name of the element"? local name?
6514 wakaba 1.3
6515     $p->{inner_html_node} = [$node, $node_ln];
6516    
6517 wakaba 1.84 ## Step 3
6518 wakaba 1.3 my $root = $doc->create_element_ns
6519     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6520    
6521 wakaba 1.84 ## Step 4 # MUST
6522 wakaba 1.3 $doc->append_child ($root);
6523    
6524 wakaba 1.84 ## Step 5 # MUST
6525 wakaba 1.3 push @{$p->{open_elements}}, [$root, 'html'];
6526    
6527     undef $p->{head_element};
6528    
6529 wakaba 1.84 ## Step 6 # MUST
6530 wakaba 1.3 $p->_reset_insertion_mode;
6531    
6532 wakaba 1.84 ## Step 7 # MUST
6533 wakaba 1.3 my $anode = $node;
6534     AN: while (defined $anode) {
6535     if ($anode->node_type == 1) {
6536     my $nsuri = $anode->namespace_uri;
6537     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6538 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
6539 wakaba 1.79 !!!cp ('i5');
6540 wakaba 1.3 $p->{form_element} = $anode;
6541     last AN;
6542     }
6543     }
6544     }
6545     $anode = $anode->parent_node;
6546     } # AN
6547    
6548 wakaba 1.84 ## Step 9 # MUST
6549 wakaba 1.3 {
6550     my $self = $p;
6551     !!!next-token;
6552     }
6553     $p->_tree_construction_main;
6554    
6555 wakaba 1.84 ## Step 10 # MUST
6556 wakaba 1.3 my @cn = @{$node->child_nodes};
6557     for (@cn) {
6558     $node->remove_child ($_);
6559     }
6560     ## ISSUE: mutation events? read-only?
6561    
6562 wakaba 1.84 ## Step 11 # MUST
6563 wakaba 1.3 @cn = @{$root->child_nodes};
6564     for (@cn) {
6565 wakaba 1.14 $this_doc->adopt_node ($_);
6566 wakaba 1.3 $node->append_child ($_);
6567     }
6568 wakaba 1.14 ## ISSUE: mutation events?
6569 wakaba 1.3
6570     $p->_terminate_tree_constructor;
6571     } else {
6572     die "$0: |set_inner_html| is not defined for node of type $nt";
6573     }
6574     } # set_inner_html
6575    
6576     } # tree construction stage
6577 wakaba 1.1
6578 wakaba 1.63 package Whatpm::HTML::RestartParser;
6579     push our @ISA, 'Error';
6580    
6581 wakaba 1.1 1;
6582 wakaba 1.117 # $Date: 2008/03/17 13:23:39 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24