/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.120 - (hide annotations) (download) (as text)
Thu Mar 20 03:57:00 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.119: +28 -28 lines
File MIME type: application/x-wais-source
++ whatpm/Whatpm/ChangeLog	20 Mar 2008 03:54:17 -0000
	* HTML.pm.src: Better line/column reporting for "duplicate attribute"
	errors.  Line/column markings for DOCTYPE, comment, and
	character tokens are reintroduced; otherwise,
	error location for "not HTML5" error and errors
	for implied elements are not attached.

2008-03-20  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.119 our $VERSION=do{my @r=(q$Revision: 1.118 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.70 ## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263)
12     ## TODO: 1252 parse error (revision 1264)
13     ## TODO: 8859-11 = 874 (revision 1271)
14    
15 wakaba 1.1 my $permitted_slash_tag_name = {
16     base => 1,
17     link => 1,
18     meta => 1,
19     hr => 1,
20     br => 1,
21 wakaba 1.71 img => 1,
22 wakaba 1.1 embed => 1,
23     param => 1,
24     area => 1,
25     col => 1,
26     input => 1,
27     };
28    
29 wakaba 1.4 my $c1_entity_char = {
30 wakaba 1.10 0x80 => 0x20AC,
31     0x81 => 0xFFFD,
32     0x82 => 0x201A,
33     0x83 => 0x0192,
34     0x84 => 0x201E,
35     0x85 => 0x2026,
36     0x86 => 0x2020,
37     0x87 => 0x2021,
38     0x88 => 0x02C6,
39     0x89 => 0x2030,
40     0x8A => 0x0160,
41     0x8B => 0x2039,
42     0x8C => 0x0152,
43     0x8D => 0xFFFD,
44     0x8E => 0x017D,
45     0x8F => 0xFFFD,
46     0x90 => 0xFFFD,
47     0x91 => 0x2018,
48     0x92 => 0x2019,
49     0x93 => 0x201C,
50     0x94 => 0x201D,
51     0x95 => 0x2022,
52     0x96 => 0x2013,
53     0x97 => 0x2014,
54     0x98 => 0x02DC,
55     0x99 => 0x2122,
56     0x9A => 0x0161,
57     0x9B => 0x203A,
58     0x9C => 0x0153,
59     0x9D => 0xFFFD,
60     0x9E => 0x017E,
61     0x9F => 0x0178,
62 wakaba 1.4 }; # $c1_entity_char
63 wakaba 1.1
64     my $special_category = {
65     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
66     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
67     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
68     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
69     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
70     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
71     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
72     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
73     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
74     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
75     };
76     my $scoping_category = {
77 wakaba 1.103 applet => 1, button => 1, caption => 1, html => 1, marquee => 1, object => 1,
78 wakaba 1.1 table => 1, td => 1, th => 1,
79     };
80     my $formatting_category = {
81     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
82     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
83     };
84     # $phrasing_category: all other elements
85    
86 wakaba 1.63 sub parse_byte_string ($$$$;$) {
87     my $self = ref $_[0] ? shift : shift->new;
88     my $charset = shift;
89     my $bytes_s = ref $_[0] ? $_[0] : \($_[0]);
90     my $s;
91    
92     if (defined $charset) {
93 wakaba 1.64 require Encode; ## TODO: decode(utf8) don't delete BOM
94 wakaba 1.63 $s = \ (Encode::decode ($charset, $$bytes_s));
95 wakaba 1.64 $self->{input_encoding} = lc $charset; ## TODO: normalize name
96 wakaba 1.63 $self->{confident} = 1;
97     } else {
98 wakaba 1.65 ## TODO: Implement HTML5 detection algorithm
99     require Whatpm::Charset::UniversalCharDet;
100     $charset = Whatpm::Charset::UniversalCharDet->detect_byte_string
101     (substr ($$bytes_s, 0, 1024));
102     $charset ||= 'windows-1252';
103 wakaba 1.64 $s = \ (Encode::decode ($charset, $$bytes_s));
104     $self->{input_encoding} = $charset;
105 wakaba 1.63 $self->{confident} = 0;
106     }
107    
108     $self->{change_encoding} = sub {
109     my $self = shift;
110     my $charset = lc shift;
111 wakaba 1.114 my $token = shift;
112 wakaba 1.63 ## TODO: if $charset is supported
113     ## TODO: normalize charset name
114    
115     ## "Change the encoding" algorithm:
116    
117     ## Step 1
118     if ($charset eq 'utf-16') { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8?
119     $charset = 'utf-8';
120     }
121    
122     ## Step 2
123     if (defined $self->{input_encoding} and
124     $self->{input_encoding} eq $charset) {
125     $self->{confident} = 1;
126     return;
127     }
128    
129 wakaba 1.64 !!!parse-error (type => 'charset label detected:'.$self->{input_encoding}.
130 wakaba 1.114 ':'.$charset, level => 'w', token => $token);
131 wakaba 1.63
132     ## Step 3
133     # if (can) {
134     ## change the encoding on the fly.
135     #$self->{confident} = 1;
136     #return;
137     # }
138    
139     ## Step 4
140     throw Whatpm::HTML::RestartParser (charset => $charset);
141     }; # $self->{change_encoding}
142    
143     my @args = @_; shift @args; # $s
144     my $return;
145     try {
146     $return = $self->parse_char_string ($s, @args);
147     } catch Whatpm::HTML::RestartParser with {
148     my $charset = shift->{charset};
149     $s = \ (Encode::decode ($charset, $$bytes_s));
150 wakaba 1.64 $self->{input_encoding} = $charset; ## TODO: normalize
151 wakaba 1.63 $self->{confident} = 1;
152     $return = $self->parse_char_string ($s, @args);
153     };
154     return $return;
155     } # parse_byte_string
156    
157 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
158     ## and the HTML layer MUST ignore it. However, we does strip BOM in
159     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
160     ## because the core part of our HTML parser expects a string of character,
161     ## not a string of bytes or code units or anything which might contain a BOM.
162     ## Therefore, any parser interface that accepts a string of bytes,
163     ## such as |parse_byte_string| in this module, must ensure that it does
164     ## strip the BOM and never strip any ZWNBSP.
165    
166 wakaba 1.63 *parse_char_string = \&parse_string;
167    
168 wakaba 1.1 sub parse_string ($$$;$) {
169 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
170     my $s = ref $_[0] ? $_[0] : \($_[0]);
171 wakaba 1.1 $self->{document} = $_[1];
172 wakaba 1.63 @{$self->{document}->child_nodes} = ();
173 wakaba 1.1
174 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
175    
176 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
177 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
178     if defined $self->{input_encoding};
179 wakaba 1.63
180 wakaba 1.1 my $i = 0;
181 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
182     $self->{column_prev} = $self->{column} = 0;
183 wakaba 1.76 $self->{set_next_char} = sub {
184 wakaba 1.1 my $self = shift;
185 wakaba 1.13
186 wakaba 1.76 pop @{$self->{prev_char}};
187     unshift @{$self->{prev_char}}, $self->{next_char};
188 wakaba 1.13
189 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
190     $self->{next_char} = ord substr $$s, $i++, 1;
191 wakaba 1.112
192     ($self->{line_prev}, $self->{column_prev})
193     = ($self->{line}, $self->{column});
194     $self->{column}++;
195 wakaba 1.1
196 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
197 wakaba 1.112 $self->{line}++;
198     $self->{column} = 0;
199 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
200 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
201 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
202 wakaba 1.112 $self->{line}++;
203     $self->{column} = 0;
204 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
205     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
206     } elsif ($self->{next_char} == 0x0000) { # NULL
207 wakaba 1.8 !!!parse-error (type => 'NULL');
208 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
209 wakaba 1.1 }
210     };
211 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
212     $self->{next_char} = -1;
213 wakaba 1.1
214 wakaba 1.3 my $onerror = $_[2] || sub {
215     my (%opt) = @_;
216 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
217     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
218     warn "Parse error ($opt{type}) at line $line column $column\n";
219 wakaba 1.3 };
220     $self->{parse_error} = sub {
221 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
222 wakaba 1.1 };
223    
224     $self->_initialize_tokenizer;
225     $self->_initialize_tree_constructor;
226     $self->_construct_tree;
227     $self->_terminate_tree_constructor;
228    
229 wakaba 1.112 delete $self->{parse_error}; # remove loop
230    
231 wakaba 1.1 return $self->{document};
232     } # parse_string
233    
234     sub new ($) {
235     my $class = shift;
236     my $self = bless {}, $class;
237 wakaba 1.76 $self->{set_next_char} = sub {
238     $self->{next_char} = -1;
239 wakaba 1.1 };
240     $self->{parse_error} = sub {
241     #
242     };
243 wakaba 1.63 $self->{change_encoding} = sub {
244     # if ($_[0] is a supported encoding) {
245     # run "change the encoding" algorithm;
246     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
247     # }
248     };
249 wakaba 1.61 $self->{application_cache_selection} = sub {
250     #
251     };
252 wakaba 1.1 return $self;
253     } # new
254    
255 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
256     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
257     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
258    
259     sub PLAINTEXT_CONTENT_MODEL () { 0 }
260     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
261     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
262     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
263    
264 wakaba 1.57 sub DATA_STATE () { 0 }
265     sub ENTITY_DATA_STATE () { 1 }
266     sub TAG_OPEN_STATE () { 2 }
267     sub CLOSE_TAG_OPEN_STATE () { 3 }
268     sub TAG_NAME_STATE () { 4 }
269     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
270     sub ATTRIBUTE_NAME_STATE () { 6 }
271     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
272     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
273     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
274     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
275     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
276     sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
277     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
278     sub COMMENT_START_STATE () { 14 }
279     sub COMMENT_START_DASH_STATE () { 15 }
280     sub COMMENT_STATE () { 16 }
281     sub COMMENT_END_STATE () { 17 }
282     sub COMMENT_END_DASH_STATE () { 18 }
283     sub BOGUS_COMMENT_STATE () { 19 }
284     sub DOCTYPE_STATE () { 20 }
285     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
286     sub DOCTYPE_NAME_STATE () { 22 }
287     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
288     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
289     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
290     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
291     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
292     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
293     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
294     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
295     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
296     sub BOGUS_DOCTYPE_STATE () { 32 }
297 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
298 wakaba 1.57
299 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
300     sub COMMENT_TOKEN () { 2 }
301     sub START_TAG_TOKEN () { 3 }
302     sub END_TAG_TOKEN () { 4 }
303     sub END_OF_FILE_TOKEN () { 5 }
304     sub CHARACTER_TOKEN () { 6 }
305    
306 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
307     sub HEAD_IMS () { 0b1000 }
308     sub BODY_IMS () { 0b10000 }
309 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
310 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
311 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
312 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
313     sub FRAME_IMS () { 0b1000000000 }
314 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
315 wakaba 1.54
316 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
317    
318     ## NOTE: "after after body" insertion mode.
319 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
320 wakaba 1.84
321     ## NOTE: "after after frameset" insertion mode.
322 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
323 wakaba 1.84
324 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
325     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
326     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
327     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
328     sub IN_BODY_IM () { BODY_IMS }
329 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
330     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
331     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
332     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
333 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
334     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
335     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
336     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
337 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
338     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
339 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
340    
341 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
342    
343     sub _initialize_tokenizer ($) {
344     my $self = shift;
345 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
346 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
347 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
348     undef $self->{current_attribute};
349     undef $self->{last_emitted_start_tag_name};
350     undef $self->{last_attribute_value_state};
351     $self->{char} = [];
352 wakaba 1.76 # $self->{next_char}
353 wakaba 1.1 !!!next-input-character;
354     $self->{token} = [];
355 wakaba 1.18 # $self->{escape}
356 wakaba 1.1 } # _initialize_tokenizer
357    
358     ## A token has:
359 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
360     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
361     ## ->{name} (DOCTYPE_TOKEN)
362     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
363     ## ->{public_identifier} (DOCTYPE_TOKEN)
364     ## ->{system_identifier} (DOCTYPE_TOKEN)
365 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
366 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
367 wakaba 1.66 ## ->{name}
368     ## ->{value}
369     ## ->{has_reference} == 1 or 0
370 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
371 wakaba 1.1
372     ## Emitted token MUST immediately be handled by the tree construction state.
373    
374     ## Before each step, UA MAY check to see if either one of the scripts in
375     ## "list of scripts that will execute as soon as possible" or the first
376     ## script in the "list of scripts that will execute asynchronously",
377     ## has completed loading. If one has, then it MUST be executed
378     ## and removed from the list.
379    
380 wakaba 1.59 ## NOTE: HTML5 "Writing HTML documents" section, applied to
381     ## documents and not to user agents and conformance checkers,
382     ## contains some requirements that are not detected by the
383     ## parsing algorithm:
384     ## - Some requirements on character encoding declarations. ## TODO
385     ## - "Elements MUST NOT contain content that their content model disallows."
386     ## ... Some are parse error, some are not (will be reported by c.c.).
387     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
388     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
389     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
390    
391     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
392     ## be detected by the HTML5 parsing algorithm:
393     ## - Text,
394    
395 wakaba 1.1 sub _get_next_token ($) {
396     my $self = shift;
397     if (@{$self->{token}}) {
398     return shift @{$self->{token}};
399     }
400    
401     A: {
402 wakaba 1.57 if ($self->{state} == DATA_STATE) {
403 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
404 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
405     not $self->{escape}) {
406 wakaba 1.77 !!!cp (1);
407 wakaba 1.57 $self->{state} = ENTITY_DATA_STATE;
408 wakaba 1.1 !!!next-input-character;
409     redo A;
410     } else {
411 wakaba 1.77 !!!cp (2);
412 wakaba 1.1 #
413     }
414 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
415 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
416 wakaba 1.13 unless ($self->{escape}) {
417 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
418     $self->{prev_char}->[1] == 0x0021 and # !
419     $self->{prev_char}->[2] == 0x003C) { # <
420 wakaba 1.77 !!!cp (3);
421 wakaba 1.13 $self->{escape} = 1;
422 wakaba 1.77 } else {
423     !!!cp (4);
424 wakaba 1.13 }
425 wakaba 1.77 } else {
426     !!!cp (5);
427 wakaba 1.13 }
428     }
429    
430     #
431 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
432 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
433     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
434 wakaba 1.13 not $self->{escape})) {
435 wakaba 1.77 !!!cp (6);
436 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
437 wakaba 1.1 !!!next-input-character;
438     redo A;
439     } else {
440 wakaba 1.77 !!!cp (7);
441 wakaba 1.1 #
442     }
443 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
444 wakaba 1.13 if ($self->{escape} and
445 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
446 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
447     $self->{prev_char}->[1] == 0x002D) { # -
448 wakaba 1.77 !!!cp (8);
449 wakaba 1.13 delete $self->{escape};
450 wakaba 1.77 } else {
451     !!!cp (9);
452 wakaba 1.13 }
453 wakaba 1.77 } else {
454     !!!cp (10);
455 wakaba 1.13 }
456    
457     #
458 wakaba 1.76 } elsif ($self->{next_char} == -1) {
459 wakaba 1.77 !!!cp (11);
460 wakaba 1.112 !!!emit ({type => END_OF_FILE_TOKEN,
461     line => $self->{line}, column => $self->{column}});
462 wakaba 1.1 last A; ## TODO: ok?
463 wakaba 1.77 } else {
464     !!!cp (12);
465 wakaba 1.1 }
466     # Anything else
467 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
468 wakaba 1.112 data => chr $self->{next_char},
469 wakaba 1.120 line => $self->{line}, column => $self->{column},
470 wakaba 1.118 };
471 wakaba 1.1 ## Stay in the data state
472     !!!next-input-character;
473    
474     !!!emit ($token);
475    
476     redo A;
477 wakaba 1.57 } elsif ($self->{state} == ENTITY_DATA_STATE) {
478 wakaba 1.1 ## (cannot happen in CDATA state)
479 wakaba 1.112
480 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
481 wakaba 1.1
482 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
483 wakaba 1.1
484 wakaba 1.57 $self->{state} = DATA_STATE;
485 wakaba 1.1 # next-input-character is already done
486    
487     unless (defined $token) {
488 wakaba 1.77 !!!cp (13);
489 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '&',
490 wakaba 1.120 line => $l, column => $c,
491 wakaba 1.118 });
492 wakaba 1.1 } else {
493 wakaba 1.77 !!!cp (14);
494 wakaba 1.1 !!!emit ($token);
495     }
496    
497     redo A;
498 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
499 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
500 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
501 wakaba 1.77 !!!cp (15);
502 wakaba 1.1 !!!next-input-character;
503 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
504 wakaba 1.1 redo A;
505     } else {
506 wakaba 1.77 !!!cp (16);
507 wakaba 1.1 ## reconsume
508 wakaba 1.57 $self->{state} = DATA_STATE;
509 wakaba 1.1
510 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
511 wakaba 1.120 line => $self->{line_prev},
512     column => $self->{column_prev},
513 wakaba 1.118 });
514 wakaba 1.1
515     redo A;
516     }
517 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
518 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
519 wakaba 1.77 !!!cp (17);
520 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
521 wakaba 1.1 !!!next-input-character;
522     redo A;
523 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
524 wakaba 1.77 !!!cp (18);
525 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
526 wakaba 1.1 !!!next-input-character;
527     redo A;
528 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
529     $self->{next_char} <= 0x005A) { # A..Z
530 wakaba 1.77 !!!cp (19);
531 wakaba 1.1 $self->{current_token}
532 wakaba 1.55 = {type => START_TAG_TOKEN,
533 wakaba 1.112 tag_name => chr ($self->{next_char} + 0x0020),
534     line => $self->{line_prev},
535     column => $self->{column_prev}};
536 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
537 wakaba 1.1 !!!next-input-character;
538     redo A;
539 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
540     $self->{next_char} <= 0x007A) { # a..z
541 wakaba 1.77 !!!cp (20);
542 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
543 wakaba 1.112 tag_name => chr ($self->{next_char}),
544     line => $self->{line_prev},
545     column => $self->{column_prev}};
546 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
547 wakaba 1.1 !!!next-input-character;
548     redo A;
549 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
550 wakaba 1.77 !!!cp (21);
551 wakaba 1.115 !!!parse-error (type => 'empty start tag',
552     line => $self->{line_prev},
553     column => $self->{column_prev});
554 wakaba 1.57 $self->{state} = DATA_STATE;
555 wakaba 1.1 !!!next-input-character;
556    
557 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
558 wakaba 1.120 line => $self->{line_prev},
559     column => $self->{column_prev},
560 wakaba 1.118 });
561 wakaba 1.1
562     redo A;
563 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
564 wakaba 1.77 !!!cp (22);
565 wakaba 1.115 !!!parse-error (type => 'pio',
566     line => $self->{line_prev},
567     column => $self->{column_prev});
568 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
569 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
570 wakaba 1.120 line => $self->{line_prev},
571     column => $self->{column_prev},
572 wakaba 1.118 };
573 wakaba 1.76 ## $self->{next_char} is intentionally left as is
574 wakaba 1.1 redo A;
575     } else {
576 wakaba 1.77 !!!cp (23);
577 wakaba 1.3 !!!parse-error (type => 'bare stago');
578 wakaba 1.57 $self->{state} = DATA_STATE;
579 wakaba 1.1 ## reconsume
580    
581 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
582 wakaba 1.120 line => $self->{line_prev},
583     column => $self->{column_prev},
584 wakaba 1.118 });
585 wakaba 1.1
586     redo A;
587     }
588     } else {
589 wakaba 1.40 die "$0: $self->{content_model} in tag open";
590 wakaba 1.1 }
591 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
592 wakaba 1.113 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
593 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
594 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
595 wakaba 1.112
596 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
597 wakaba 1.23 my @next_char;
598     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
599 wakaba 1.76 push @next_char, $self->{next_char};
600 wakaba 1.23 my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
601     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
602 wakaba 1.76 if ($self->{next_char} == $c or $self->{next_char} == $C) {
603 wakaba 1.77 !!!cp (24);
604 wakaba 1.23 !!!next-input-character;
605     next TAGNAME;
606     } else {
607 wakaba 1.77 !!!cp (25);
608 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
609 wakaba 1.23 !!!back-next-input-character (@next_char);
610 wakaba 1.57 $self->{state} = DATA_STATE;
611 wakaba 1.23
612 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
613 wakaba 1.120 line => $l, column => $c,
614 wakaba 1.118 });
615 wakaba 1.23
616     redo A;
617     }
618     }
619 wakaba 1.76 push @next_char, $self->{next_char};
620 wakaba 1.23
621 wakaba 1.76 unless ($self->{next_char} == 0x0009 or # HT
622     $self->{next_char} == 0x000A or # LF
623     $self->{next_char} == 0x000B or # VT
624     $self->{next_char} == 0x000C or # FF
625     $self->{next_char} == 0x0020 or # SP
626     $self->{next_char} == 0x003E or # >
627     $self->{next_char} == 0x002F or # /
628     $self->{next_char} == -1) {
629 wakaba 1.77 !!!cp (26);
630 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
631 wakaba 1.1 !!!back-next-input-character (@next_char);
632 wakaba 1.57 $self->{state} = DATA_STATE;
633 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
634 wakaba 1.120 line => $l, column => $c,
635 wakaba 1.118 });
636 wakaba 1.1 redo A;
637 wakaba 1.23 } else {
638 wakaba 1.77 !!!cp (27);
639 wakaba 1.76 $self->{next_char} = shift @next_char;
640 wakaba 1.23 !!!back-next-input-character (@next_char);
641     # and consume...
642 wakaba 1.1 }
643 wakaba 1.23 } else {
644     ## No start tag token has ever been emitted
645 wakaba 1.77 !!!cp (28);
646 wakaba 1.23 # next-input-character is already done
647 wakaba 1.57 $self->{state} = DATA_STATE;
648 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
649 wakaba 1.120 line => $l, column => $c,
650 wakaba 1.118 });
651 wakaba 1.1 redo A;
652     }
653     }
654    
655 wakaba 1.76 if (0x0041 <= $self->{next_char} and
656     $self->{next_char} <= 0x005A) { # A..Z
657 wakaba 1.77 !!!cp (29);
658 wakaba 1.112 $self->{current_token}
659     = {type => END_TAG_TOKEN,
660     tag_name => chr ($self->{next_char} + 0x0020),
661     line => $l, column => $c};
662 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
663 wakaba 1.1 !!!next-input-character;
664     redo A;
665 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
666     $self->{next_char} <= 0x007A) { # a..z
667 wakaba 1.77 !!!cp (30);
668 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
669 wakaba 1.112 tag_name => chr ($self->{next_char}),
670     line => $l, column => $c};
671 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
672 wakaba 1.1 !!!next-input-character;
673     redo A;
674 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
675 wakaba 1.77 !!!cp (31);
676 wakaba 1.115 !!!parse-error (type => 'empty end tag',
677     line => $self->{line_prev}, ## "<" in "</>"
678     column => $self->{column_prev} - 1);
679 wakaba 1.57 $self->{state} = DATA_STATE;
680 wakaba 1.1 !!!next-input-character;
681     redo A;
682 wakaba 1.76 } elsif ($self->{next_char} == -1) {
683 wakaba 1.77 !!!cp (32);
684 wakaba 1.3 !!!parse-error (type => 'bare etago');
685 wakaba 1.57 $self->{state} = DATA_STATE;
686 wakaba 1.1 # reconsume
687    
688 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
689 wakaba 1.120 line => $l, column => $c,
690 wakaba 1.118 });
691 wakaba 1.1
692     redo A;
693     } else {
694 wakaba 1.77 !!!cp (33);
695 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
696 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
697 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
698 wakaba 1.120 line => $self->{line_prev}, # "<" of "</"
699     column => $self->{column_prev} - 1,
700 wakaba 1.118 };
701 wakaba 1.76 ## $self->{next_char} is intentionally left as is
702 wakaba 1.1 redo A;
703     }
704 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
705 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
706     $self->{next_char} == 0x000A or # LF
707     $self->{next_char} == 0x000B or # VT
708     $self->{next_char} == 0x000C or # FF
709     $self->{next_char} == 0x0020) { # SP
710 wakaba 1.77 !!!cp (34);
711 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
712 wakaba 1.1 !!!next-input-character;
713     redo A;
714 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
715 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
716 wakaba 1.77 !!!cp (35);
717 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
718 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
719 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
720 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
721     # ## NOTE: This should never be reached.
722     # !!! cp (36);
723     # !!! parse-error (type => 'end tag attribute');
724     #} else {
725 wakaba 1.77 !!!cp (37);
726 wakaba 1.78 #}
727 wakaba 1.1 } else {
728     die "$0: $self->{current_token}->{type}: Unknown token type";
729     }
730 wakaba 1.57 $self->{state} = DATA_STATE;
731 wakaba 1.1 !!!next-input-character;
732    
733     !!!emit ($self->{current_token}); # start tag or end tag
734    
735     redo A;
736 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
737     $self->{next_char} <= 0x005A) { # A..Z
738 wakaba 1.77 !!!cp (38);
739 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
740 wakaba 1.1 # start tag or end tag
741     ## Stay in this state
742     !!!next-input-character;
743     redo A;
744 wakaba 1.76 } elsif ($self->{next_char} == -1) {
745 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
746 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
747 wakaba 1.77 !!!cp (39);
748 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
749 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
750 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
751 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
752     # ## NOTE: This state should never be reached.
753     # !!! cp (40);
754     # !!! parse-error (type => 'end tag attribute');
755     #} else {
756 wakaba 1.77 !!!cp (41);
757 wakaba 1.78 #}
758 wakaba 1.1 } else {
759     die "$0: $self->{current_token}->{type}: Unknown token type";
760     }
761 wakaba 1.57 $self->{state} = DATA_STATE;
762 wakaba 1.1 # reconsume
763    
764     !!!emit ($self->{current_token}); # start tag or end tag
765    
766     redo A;
767 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
768 wakaba 1.1 !!!next-input-character;
769 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
770 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
771 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
772     # permitted slash
773 wakaba 1.77 !!!cp (42);
774 wakaba 1.1 #
775     } else {
776 wakaba 1.77 !!!cp (43);
777 wakaba 1.3 !!!parse-error (type => 'nestc');
778 wakaba 1.1 }
779 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
780 wakaba 1.1 # next-input-character is already done
781     redo A;
782     } else {
783 wakaba 1.77 !!!cp (44);
784 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
785 wakaba 1.1 # start tag or end tag
786     ## Stay in the state
787     !!!next-input-character;
788     redo A;
789     }
790 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
791 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
792     $self->{next_char} == 0x000A or # LF
793     $self->{next_char} == 0x000B or # VT
794     $self->{next_char} == 0x000C or # FF
795     $self->{next_char} == 0x0020) { # SP
796 wakaba 1.77 !!!cp (45);
797 wakaba 1.1 ## Stay in the state
798     !!!next-input-character;
799     redo A;
800 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
801 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
802 wakaba 1.77 !!!cp (46);
803 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
804 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
805 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
806 wakaba 1.1 if ($self->{current_token}->{attributes}) {
807 wakaba 1.77 !!!cp (47);
808 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
809 wakaba 1.77 } else {
810     !!!cp (48);
811 wakaba 1.1 }
812     } else {
813     die "$0: $self->{current_token}->{type}: Unknown token type";
814     }
815 wakaba 1.57 $self->{state} = DATA_STATE;
816 wakaba 1.1 !!!next-input-character;
817    
818     !!!emit ($self->{current_token}); # start tag or end tag
819    
820     redo A;
821 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
822     $self->{next_char} <= 0x005A) { # A..Z
823 wakaba 1.77 !!!cp (49);
824 wakaba 1.119 $self->{current_attribute}
825     = {name => chr ($self->{next_char} + 0x0020),
826     value => '',
827     line => $self->{line}, column => $self->{column}};
828 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
829 wakaba 1.1 !!!next-input-character;
830     redo A;
831 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
832 wakaba 1.1 !!!next-input-character;
833 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
834 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
835 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
836     # permitted slash
837 wakaba 1.77 !!!cp (50);
838 wakaba 1.1 #
839     } else {
840 wakaba 1.77 !!!cp (51);
841 wakaba 1.3 !!!parse-error (type => 'nestc');
842 wakaba 1.1 }
843     ## Stay in the state
844     # next-input-character is already done
845     redo A;
846 wakaba 1.76 } elsif ($self->{next_char} == -1) {
847 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
848 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
849 wakaba 1.77 !!!cp (52);
850 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
851 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
852 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
853 wakaba 1.1 if ($self->{current_token}->{attributes}) {
854 wakaba 1.77 !!!cp (53);
855 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
856 wakaba 1.77 } else {
857     !!!cp (54);
858 wakaba 1.1 }
859     } else {
860     die "$0: $self->{current_token}->{type}: Unknown token type";
861     }
862 wakaba 1.57 $self->{state} = DATA_STATE;
863 wakaba 1.1 # reconsume
864    
865     !!!emit ($self->{current_token}); # start tag or end tag
866    
867     redo A;
868     } else {
869 wakaba 1.72 if ({
870     0x0022 => 1, # "
871     0x0027 => 1, # '
872     0x003D => 1, # =
873 wakaba 1.76 }->{$self->{next_char}}) {
874 wakaba 1.77 !!!cp (55);
875 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
876 wakaba 1.77 } else {
877     !!!cp (56);
878 wakaba 1.72 }
879 wakaba 1.119 $self->{current_attribute}
880     = {name => chr ($self->{next_char}),
881     value => '',
882     line => $self->{line}, column => $self->{column}};
883 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
884 wakaba 1.1 !!!next-input-character;
885     redo A;
886     }
887 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
888 wakaba 1.1 my $before_leave = sub {
889     if (exists $self->{current_token}->{attributes} # start tag or end tag
890     ->{$self->{current_attribute}->{name}}) { # MUST
891 wakaba 1.77 !!!cp (57);
892 wakaba 1.120 !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
893 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
894     } else {
895 wakaba 1.77 !!!cp (58);
896 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
897     = $self->{current_attribute};
898     }
899     }; # $before_leave
900    
901 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
902     $self->{next_char} == 0x000A or # LF
903     $self->{next_char} == 0x000B or # VT
904     $self->{next_char} == 0x000C or # FF
905     $self->{next_char} == 0x0020) { # SP
906 wakaba 1.77 !!!cp (59);
907 wakaba 1.1 $before_leave->();
908 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
909 wakaba 1.1 !!!next-input-character;
910     redo A;
911 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
912 wakaba 1.77 !!!cp (60);
913 wakaba 1.1 $before_leave->();
914 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
915 wakaba 1.1 !!!next-input-character;
916     redo A;
917 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
918 wakaba 1.1 $before_leave->();
919 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
920 wakaba 1.77 !!!cp (61);
921 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
922 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
923 wakaba 1.77 !!!cp (62);
924 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
925 wakaba 1.1 if ($self->{current_token}->{attributes}) {
926 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
927 wakaba 1.1 }
928     } else {
929     die "$0: $self->{current_token}->{type}: Unknown token type";
930     }
931 wakaba 1.57 $self->{state} = DATA_STATE;
932 wakaba 1.1 !!!next-input-character;
933    
934     !!!emit ($self->{current_token}); # start tag or end tag
935    
936     redo A;
937 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
938     $self->{next_char} <= 0x005A) { # A..Z
939 wakaba 1.77 !!!cp (63);
940 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
941 wakaba 1.1 ## Stay in the state
942     !!!next-input-character;
943     redo A;
944 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
945 wakaba 1.1 $before_leave->();
946     !!!next-input-character;
947 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
948 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
949 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
950     # permitted slash
951 wakaba 1.77 !!!cp (64);
952 wakaba 1.1 #
953     } else {
954 wakaba 1.77 !!!cp (65);
955 wakaba 1.3 !!!parse-error (type => 'nestc');
956 wakaba 1.1 }
957 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
958 wakaba 1.1 # next-input-character is already done
959     redo A;
960 wakaba 1.76 } elsif ($self->{next_char} == -1) {
961 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
962 wakaba 1.1 $before_leave->();
963 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
964 wakaba 1.77 !!!cp (66);
965 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
966 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
967 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
968 wakaba 1.1 if ($self->{current_token}->{attributes}) {
969 wakaba 1.77 !!!cp (67);
970 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
971 wakaba 1.77 } else {
972 wakaba 1.78 ## NOTE: This state should never be reached.
973 wakaba 1.77 !!!cp (68);
974 wakaba 1.1 }
975     } else {
976     die "$0: $self->{current_token}->{type}: Unknown token type";
977     }
978 wakaba 1.57 $self->{state} = DATA_STATE;
979 wakaba 1.1 # reconsume
980    
981     !!!emit ($self->{current_token}); # start tag or end tag
982    
983     redo A;
984     } else {
985 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
986     $self->{next_char} == 0x0027) { # '
987 wakaba 1.77 !!!cp (69);
988 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
989 wakaba 1.77 } else {
990     !!!cp (70);
991 wakaba 1.72 }
992 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
993 wakaba 1.1 ## Stay in the state
994     !!!next-input-character;
995     redo A;
996     }
997 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
998 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
999     $self->{next_char} == 0x000A or # LF
1000     $self->{next_char} == 0x000B or # VT
1001     $self->{next_char} == 0x000C or # FF
1002     $self->{next_char} == 0x0020) { # SP
1003 wakaba 1.77 !!!cp (71);
1004 wakaba 1.1 ## Stay in the state
1005     !!!next-input-character;
1006     redo A;
1007 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1008 wakaba 1.77 !!!cp (72);
1009 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1010 wakaba 1.1 !!!next-input-character;
1011     redo A;
1012 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1013 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1014 wakaba 1.77 !!!cp (73);
1015 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1016 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1017 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1018 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1019 wakaba 1.77 !!!cp (74);
1020 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1021 wakaba 1.77 } else {
1022 wakaba 1.78 ## NOTE: This state should never be reached.
1023 wakaba 1.77 !!!cp (75);
1024 wakaba 1.1 }
1025     } else {
1026     die "$0: $self->{current_token}->{type}: Unknown token type";
1027     }
1028 wakaba 1.57 $self->{state} = DATA_STATE;
1029 wakaba 1.1 !!!next-input-character;
1030    
1031     !!!emit ($self->{current_token}); # start tag or end tag
1032    
1033     redo A;
1034 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1035     $self->{next_char} <= 0x005A) { # A..Z
1036 wakaba 1.77 !!!cp (76);
1037 wakaba 1.119 $self->{current_attribute}
1038     = {name => chr ($self->{next_char} + 0x0020),
1039     value => '',
1040     line => $self->{line}, column => $self->{column}};
1041 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1042 wakaba 1.1 !!!next-input-character;
1043     redo A;
1044 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1045 wakaba 1.1 !!!next-input-character;
1046 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
1047 wakaba 1.55 $self->{current_token}->{type} == START_TAG_TOKEN and
1048 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1049     # permitted slash
1050 wakaba 1.77 !!!cp (77);
1051 wakaba 1.1 #
1052     } else {
1053 wakaba 1.77 !!!cp (78);
1054 wakaba 1.3 !!!parse-error (type => 'nestc');
1055 wakaba 1.33 ## TODO: Different error type for <aa / bb> than <aa/>
1056 wakaba 1.1 }
1057 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1058 wakaba 1.1 # next-input-character is already done
1059     redo A;
1060 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1061 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1062 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1063 wakaba 1.77 !!!cp (79);
1064 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1065 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1066 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1067 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1068 wakaba 1.77 !!!cp (80);
1069 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1070 wakaba 1.77 } else {
1071 wakaba 1.78 ## NOTE: This state should never be reached.
1072 wakaba 1.77 !!!cp (81);
1073 wakaba 1.1 }
1074     } else {
1075     die "$0: $self->{current_token}->{type}: Unknown token type";
1076     }
1077 wakaba 1.57 $self->{state} = DATA_STATE;
1078 wakaba 1.1 # reconsume
1079    
1080     !!!emit ($self->{current_token}); # start tag or end tag
1081    
1082     redo A;
1083     } else {
1084 wakaba 1.77 !!!cp (82);
1085 wakaba 1.119 $self->{current_attribute}
1086     = {name => chr ($self->{next_char}),
1087     value => '',
1088     line => $self->{line}, column => $self->{column}};
1089 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1090 wakaba 1.1 !!!next-input-character;
1091     redo A;
1092     }
1093 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1094 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1095     $self->{next_char} == 0x000A or # LF
1096     $self->{next_char} == 0x000B or # VT
1097     $self->{next_char} == 0x000C or # FF
1098     $self->{next_char} == 0x0020) { # SP
1099 wakaba 1.77 !!!cp (83);
1100 wakaba 1.1 ## Stay in the state
1101     !!!next-input-character;
1102     redo A;
1103 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1104 wakaba 1.77 !!!cp (84);
1105 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1106 wakaba 1.1 !!!next-input-character;
1107     redo A;
1108 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1109 wakaba 1.77 !!!cp (85);
1110 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1111 wakaba 1.1 ## reconsume
1112     redo A;
1113 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1114 wakaba 1.77 !!!cp (86);
1115 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1116 wakaba 1.1 !!!next-input-character;
1117     redo A;
1118 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1119 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1120 wakaba 1.77 !!!cp (87);
1121 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1122 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1123 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1124 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1125 wakaba 1.77 !!!cp (88);
1126 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1127 wakaba 1.77 } else {
1128 wakaba 1.78 ## NOTE: This state should never be reached.
1129 wakaba 1.77 !!!cp (89);
1130 wakaba 1.1 }
1131     } else {
1132     die "$0: $self->{current_token}->{type}: Unknown token type";
1133     }
1134 wakaba 1.57 $self->{state} = DATA_STATE;
1135 wakaba 1.1 !!!next-input-character;
1136    
1137     !!!emit ($self->{current_token}); # start tag or end tag
1138    
1139     redo A;
1140 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1141 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1142 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1143 wakaba 1.77 !!!cp (90);
1144 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1145 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1146 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1147 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1148 wakaba 1.77 !!!cp (91);
1149 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1150 wakaba 1.77 } else {
1151 wakaba 1.78 ## NOTE: This state should never be reached.
1152 wakaba 1.77 !!!cp (92);
1153 wakaba 1.1 }
1154     } else {
1155     die "$0: $self->{current_token}->{type}: Unknown token type";
1156     }
1157 wakaba 1.57 $self->{state} = DATA_STATE;
1158 wakaba 1.1 ## reconsume
1159    
1160     !!!emit ($self->{current_token}); # start tag or end tag
1161    
1162     redo A;
1163     } else {
1164 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1165 wakaba 1.77 !!!cp (93);
1166 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1167 wakaba 1.77 } else {
1168     !!!cp (94);
1169 wakaba 1.72 }
1170 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1171 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1172 wakaba 1.1 !!!next-input-character;
1173     redo A;
1174     }
1175 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1176 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1177 wakaba 1.77 !!!cp (95);
1178 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1179 wakaba 1.1 !!!next-input-character;
1180     redo A;
1181 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1182 wakaba 1.77 !!!cp (96);
1183 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1184     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1185 wakaba 1.1 !!!next-input-character;
1186     redo A;
1187 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1188 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1189 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1190 wakaba 1.77 !!!cp (97);
1191 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1192 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1193 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1194 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1195 wakaba 1.77 !!!cp (98);
1196 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1197 wakaba 1.77 } else {
1198 wakaba 1.78 ## NOTE: This state should never be reached.
1199 wakaba 1.77 !!!cp (99);
1200 wakaba 1.1 }
1201     } else {
1202     die "$0: $self->{current_token}->{type}: Unknown token type";
1203     }
1204 wakaba 1.57 $self->{state} = DATA_STATE;
1205 wakaba 1.1 ## reconsume
1206    
1207     !!!emit ($self->{current_token}); # start tag or end tag
1208    
1209     redo A;
1210     } else {
1211 wakaba 1.77 !!!cp (100);
1212 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1213 wakaba 1.1 ## Stay in the state
1214     !!!next-input-character;
1215     redo A;
1216     }
1217 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1218 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1219 wakaba 1.77 !!!cp (101);
1220 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1221 wakaba 1.1 !!!next-input-character;
1222     redo A;
1223 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1224 wakaba 1.77 !!!cp (102);
1225 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1226     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1227 wakaba 1.1 !!!next-input-character;
1228     redo A;
1229 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1230 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1231 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1232 wakaba 1.77 !!!cp (103);
1233 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1234 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1235 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1236 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1237 wakaba 1.77 !!!cp (104);
1238 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1239 wakaba 1.77 } else {
1240 wakaba 1.78 ## NOTE: This state should never be reached.
1241 wakaba 1.77 !!!cp (105);
1242 wakaba 1.1 }
1243     } else {
1244     die "$0: $self->{current_token}->{type}: Unknown token type";
1245     }
1246 wakaba 1.57 $self->{state} = DATA_STATE;
1247 wakaba 1.1 ## reconsume
1248    
1249     !!!emit ($self->{current_token}); # start tag or end tag
1250    
1251     redo A;
1252     } else {
1253 wakaba 1.77 !!!cp (106);
1254 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1255 wakaba 1.1 ## Stay in the state
1256     !!!next-input-character;
1257     redo A;
1258     }
1259 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1260 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1261     $self->{next_char} == 0x000A or # LF
1262     $self->{next_char} == 0x000B or # HT
1263     $self->{next_char} == 0x000C or # FF
1264     $self->{next_char} == 0x0020) { # SP
1265 wakaba 1.77 !!!cp (107);
1266 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1267 wakaba 1.1 !!!next-input-character;
1268     redo A;
1269 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1270 wakaba 1.77 !!!cp (108);
1271 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1272     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1273 wakaba 1.1 !!!next-input-character;
1274     redo A;
1275 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1276 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1277 wakaba 1.77 !!!cp (109);
1278 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1279 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1280 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1281 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1282 wakaba 1.77 !!!cp (110);
1283 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1284 wakaba 1.77 } else {
1285 wakaba 1.78 ## NOTE: This state should never be reached.
1286 wakaba 1.77 !!!cp (111);
1287 wakaba 1.1 }
1288     } else {
1289     die "$0: $self->{current_token}->{type}: Unknown token type";
1290     }
1291 wakaba 1.57 $self->{state} = DATA_STATE;
1292 wakaba 1.1 !!!next-input-character;
1293    
1294     !!!emit ($self->{current_token}); # start tag or end tag
1295    
1296     redo A;
1297 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1298 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1299 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1300 wakaba 1.77 !!!cp (112);
1301 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1302 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1303 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1304 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1305 wakaba 1.77 !!!cp (113);
1306 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1307 wakaba 1.77 } else {
1308 wakaba 1.78 ## NOTE: This state should never be reached.
1309 wakaba 1.77 !!!cp (114);
1310 wakaba 1.1 }
1311     } else {
1312     die "$0: $self->{current_token}->{type}: Unknown token type";
1313     }
1314 wakaba 1.57 $self->{state} = DATA_STATE;
1315 wakaba 1.1 ## reconsume
1316    
1317     !!!emit ($self->{current_token}); # start tag or end tag
1318    
1319     redo A;
1320     } else {
1321 wakaba 1.72 if ({
1322     0x0022 => 1, # "
1323     0x0027 => 1, # '
1324     0x003D => 1, # =
1325 wakaba 1.76 }->{$self->{next_char}}) {
1326 wakaba 1.77 !!!cp (115);
1327 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1328 wakaba 1.77 } else {
1329     !!!cp (116);
1330 wakaba 1.72 }
1331 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1332 wakaba 1.1 ## Stay in the state
1333     !!!next-input-character;
1334     redo A;
1335     }
1336 wakaba 1.57 } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1337 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity
1338     (1,
1339     $self->{last_attribute_value_state}
1340     == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # "
1341     $self->{last_attribute_value_state}
1342     == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # '
1343     -1);
1344 wakaba 1.1
1345     unless (defined $token) {
1346 wakaba 1.77 !!!cp (117);
1347 wakaba 1.1 $self->{current_attribute}->{value} .= '&';
1348     } else {
1349 wakaba 1.77 !!!cp (118);
1350 wakaba 1.1 $self->{current_attribute}->{value} .= $token->{data};
1351 wakaba 1.66 $self->{current_attribute}->{has_reference} = $token->{has_reference};
1352 wakaba 1.1 ## ISSUE: spec says "append the returned character token to the current attribute's value"
1353     }
1354    
1355     $self->{state} = $self->{last_attribute_value_state};
1356     # next-input-character is already done
1357     redo A;
1358 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1359 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1360     $self->{next_char} == 0x000A or # LF
1361     $self->{next_char} == 0x000B or # VT
1362     $self->{next_char} == 0x000C or # FF
1363     $self->{next_char} == 0x0020) { # SP
1364 wakaba 1.77 !!!cp (118);
1365 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1366     !!!next-input-character;
1367     redo A;
1368 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1369 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1370 wakaba 1.77 !!!cp (119);
1371 wakaba 1.72 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1372     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1373     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1374     if ($self->{current_token}->{attributes}) {
1375 wakaba 1.77 !!!cp (120);
1376 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1377 wakaba 1.77 } else {
1378 wakaba 1.78 ## NOTE: This state should never be reached.
1379 wakaba 1.77 !!!cp (121);
1380 wakaba 1.72 }
1381     } else {
1382     die "$0: $self->{current_token}->{type}: Unknown token type";
1383     }
1384     $self->{state} = DATA_STATE;
1385     !!!next-input-character;
1386    
1387     !!!emit ($self->{current_token}); # start tag or end tag
1388    
1389     redo A;
1390 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1391 wakaba 1.72 !!!next-input-character;
1392 wakaba 1.76 if ($self->{next_char} == 0x003E and # >
1393 wakaba 1.72 $self->{current_token}->{type} == START_TAG_TOKEN and
1394     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1395     # permitted slash
1396 wakaba 1.77 !!!cp (122);
1397 wakaba 1.72 #
1398     } else {
1399 wakaba 1.77 !!!cp (123);
1400 wakaba 1.72 !!!parse-error (type => 'nestc');
1401     }
1402     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1403     # next-input-character is already done
1404     redo A;
1405     } else {
1406 wakaba 1.77 !!!cp (124);
1407 wakaba 1.72 !!!parse-error (type => 'no space between attributes');
1408     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1409     ## reconsume
1410     redo A;
1411     }
1412 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1413 wakaba 1.1 ## (only happen if PCDATA state)
1414    
1415 wakaba 1.112 ## NOTE: Set by the previous state
1416     #my $token = {type => COMMENT_TOKEN, data => ''};
1417 wakaba 1.1
1418     BC: {
1419 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1420 wakaba 1.77 !!!cp (124);
1421 wakaba 1.57 $self->{state} = DATA_STATE;
1422 wakaba 1.1 !!!next-input-character;
1423    
1424 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1425 wakaba 1.1
1426     redo A;
1427 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1428 wakaba 1.77 !!!cp (125);
1429 wakaba 1.57 $self->{state} = DATA_STATE;
1430 wakaba 1.1 ## reconsume
1431    
1432 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1433 wakaba 1.1
1434     redo A;
1435     } else {
1436 wakaba 1.77 !!!cp (126);
1437 wakaba 1.112 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1438 wakaba 1.1 !!!next-input-character;
1439     redo BC;
1440     }
1441     } # BC
1442 wakaba 1.77
1443     die "$0: _get_next_token: unexpected case [BC]";
1444 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1445 wakaba 1.1 ## (only happen if PCDATA state)
1446    
1447 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1);
1448 wakaba 1.112
1449 wakaba 1.1 my @next_char;
1450 wakaba 1.76 push @next_char, $self->{next_char};
1451 wakaba 1.1
1452 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1453 wakaba 1.1 !!!next-input-character;
1454 wakaba 1.76 push @next_char, $self->{next_char};
1455     if ($self->{next_char} == 0x002D) { # -
1456 wakaba 1.77 !!!cp (127);
1457 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1458 wakaba 1.120 line => $l, column => $c,
1459 wakaba 1.118 };
1460 wakaba 1.57 $self->{state} = COMMENT_START_STATE;
1461 wakaba 1.1 !!!next-input-character;
1462     redo A;
1463 wakaba 1.77 } else {
1464     !!!cp (128);
1465 wakaba 1.1 }
1466 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
1467     $self->{next_char} == 0x0064) { # d
1468 wakaba 1.1 !!!next-input-character;
1469 wakaba 1.76 push @next_char, $self->{next_char};
1470     if ($self->{next_char} == 0x004F or # O
1471     $self->{next_char} == 0x006F) { # o
1472 wakaba 1.1 !!!next-input-character;
1473 wakaba 1.76 push @next_char, $self->{next_char};
1474     if ($self->{next_char} == 0x0043 or # C
1475     $self->{next_char} == 0x0063) { # c
1476 wakaba 1.1 !!!next-input-character;
1477 wakaba 1.76 push @next_char, $self->{next_char};
1478     if ($self->{next_char} == 0x0054 or # T
1479     $self->{next_char} == 0x0074) { # t
1480 wakaba 1.1 !!!next-input-character;
1481 wakaba 1.76 push @next_char, $self->{next_char};
1482     if ($self->{next_char} == 0x0059 or # Y
1483     $self->{next_char} == 0x0079) { # y
1484 wakaba 1.1 !!!next-input-character;
1485 wakaba 1.76 push @next_char, $self->{next_char};
1486     if ($self->{next_char} == 0x0050 or # P
1487     $self->{next_char} == 0x0070) { # p
1488 wakaba 1.1 !!!next-input-character;
1489 wakaba 1.76 push @next_char, $self->{next_char};
1490     if ($self->{next_char} == 0x0045 or # E
1491     $self->{next_char} == 0x0065) { # e
1492 wakaba 1.77 !!!cp (129);
1493     ## TODO: What a stupid code this is!
1494 wakaba 1.57 $self->{state} = DOCTYPE_STATE;
1495 wakaba 1.112 $self->{current_token} = {type => DOCTYPE_TOKEN,
1496     quirks => 1,
1497 wakaba 1.120 line => $l, column => $c,
1498 wakaba 1.118 };
1499 wakaba 1.1 !!!next-input-character;
1500     redo A;
1501 wakaba 1.77 } else {
1502     !!!cp (130);
1503 wakaba 1.1 }
1504 wakaba 1.77 } else {
1505     !!!cp (131);
1506 wakaba 1.1 }
1507 wakaba 1.77 } else {
1508     !!!cp (132);
1509 wakaba 1.1 }
1510 wakaba 1.77 } else {
1511     !!!cp (133);
1512 wakaba 1.1 }
1513 wakaba 1.77 } else {
1514     !!!cp (134);
1515 wakaba 1.1 }
1516 wakaba 1.77 } else {
1517     !!!cp (135);
1518 wakaba 1.1 }
1519 wakaba 1.77 } else {
1520     !!!cp (136);
1521 wakaba 1.1 }
1522    
1523 wakaba 1.30 !!!parse-error (type => 'bogus comment');
1524 wakaba 1.76 $self->{next_char} = shift @next_char;
1525 wakaba 1.1 !!!back-next-input-character (@next_char);
1526 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1527 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1528 wakaba 1.120 line => $l, column => $c,
1529 wakaba 1.118 };
1530 wakaba 1.1 redo A;
1531    
1532     ## ISSUE: typos in spec: chacacters, is is a parse error
1533     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1534 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
1535 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1536 wakaba 1.77 !!!cp (137);
1537 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
1538 wakaba 1.23 !!!next-input-character;
1539     redo A;
1540 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1541 wakaba 1.77 !!!cp (138);
1542 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1543 wakaba 1.57 $self->{state} = DATA_STATE;
1544 wakaba 1.23 !!!next-input-character;
1545    
1546     !!!emit ($self->{current_token}); # comment
1547    
1548     redo A;
1549 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1550 wakaba 1.77 !!!cp (139);
1551 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1552 wakaba 1.57 $self->{state} = DATA_STATE;
1553 wakaba 1.23 ## reconsume
1554    
1555     !!!emit ($self->{current_token}); # comment
1556    
1557     redo A;
1558     } else {
1559 wakaba 1.77 !!!cp (140);
1560 wakaba 1.23 $self->{current_token}->{data} # comment
1561 wakaba 1.76 .= chr ($self->{next_char});
1562 wakaba 1.57 $self->{state} = COMMENT_STATE;
1563 wakaba 1.23 !!!next-input-character;
1564     redo A;
1565     }
1566 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
1567 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1568 wakaba 1.77 !!!cp (141);
1569 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1570 wakaba 1.23 !!!next-input-character;
1571     redo A;
1572 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1573 wakaba 1.77 !!!cp (142);
1574 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1575 wakaba 1.57 $self->{state} = DATA_STATE;
1576 wakaba 1.23 !!!next-input-character;
1577    
1578     !!!emit ($self->{current_token}); # comment
1579    
1580     redo A;
1581 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1582 wakaba 1.77 !!!cp (143);
1583 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1584 wakaba 1.57 $self->{state} = DATA_STATE;
1585 wakaba 1.23 ## reconsume
1586    
1587     !!!emit ($self->{current_token}); # comment
1588    
1589     redo A;
1590     } else {
1591 wakaba 1.77 !!!cp (144);
1592 wakaba 1.23 $self->{current_token}->{data} # comment
1593 wakaba 1.76 .= '-' . chr ($self->{next_char});
1594 wakaba 1.57 $self->{state} = COMMENT_STATE;
1595 wakaba 1.23 !!!next-input-character;
1596     redo A;
1597     }
1598 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
1599 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1600 wakaba 1.77 !!!cp (145);
1601 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
1602 wakaba 1.1 !!!next-input-character;
1603     redo A;
1604 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1605 wakaba 1.77 !!!cp (146);
1606 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1607 wakaba 1.57 $self->{state} = DATA_STATE;
1608 wakaba 1.1 ## reconsume
1609    
1610     !!!emit ($self->{current_token}); # comment
1611    
1612     redo A;
1613     } else {
1614 wakaba 1.77 !!!cp (147);
1615 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1616 wakaba 1.1 ## Stay in the state
1617     !!!next-input-character;
1618     redo A;
1619     }
1620 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
1621 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1622 wakaba 1.77 !!!cp (148);
1623 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1624 wakaba 1.1 !!!next-input-character;
1625     redo A;
1626 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1627 wakaba 1.77 !!!cp (149);
1628 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1629 wakaba 1.57 $self->{state} = DATA_STATE;
1630 wakaba 1.1 ## reconsume
1631    
1632     !!!emit ($self->{current_token}); # comment
1633    
1634     redo A;
1635     } else {
1636 wakaba 1.77 !!!cp (150);
1637 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
1638 wakaba 1.57 $self->{state} = COMMENT_STATE;
1639 wakaba 1.1 !!!next-input-character;
1640     redo A;
1641     }
1642 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
1643 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1644 wakaba 1.77 !!!cp (151);
1645 wakaba 1.57 $self->{state} = DATA_STATE;
1646 wakaba 1.1 !!!next-input-character;
1647    
1648     !!!emit ($self->{current_token}); # comment
1649    
1650     redo A;
1651 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
1652 wakaba 1.77 !!!cp (152);
1653 wakaba 1.114 !!!parse-error (type => 'dash in comment',
1654     line => $self->{line_prev},
1655     column => $self->{column_prev});
1656 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1657     ## Stay in the state
1658     !!!next-input-character;
1659     redo A;
1660 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1661 wakaba 1.77 !!!cp (153);
1662 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1663 wakaba 1.57 $self->{state} = DATA_STATE;
1664 wakaba 1.1 ## reconsume
1665    
1666     !!!emit ($self->{current_token}); # comment
1667    
1668     redo A;
1669     } else {
1670 wakaba 1.77 !!!cp (154);
1671 wakaba 1.114 !!!parse-error (type => 'dash in comment',
1672     line => $self->{line_prev},
1673     column => $self->{column_prev});
1674 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
1675 wakaba 1.57 $self->{state} = COMMENT_STATE;
1676 wakaba 1.1 !!!next-input-character;
1677     redo A;
1678     }
1679 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
1680 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1681     $self->{next_char} == 0x000A or # LF
1682     $self->{next_char} == 0x000B or # VT
1683     $self->{next_char} == 0x000C or # FF
1684     $self->{next_char} == 0x0020) { # SP
1685 wakaba 1.77 !!!cp (155);
1686 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1687 wakaba 1.1 !!!next-input-character;
1688     redo A;
1689     } else {
1690 wakaba 1.77 !!!cp (156);
1691 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1692 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1693 wakaba 1.1 ## reconsume
1694     redo A;
1695     }
1696 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
1697 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1698     $self->{next_char} == 0x000A or # LF
1699     $self->{next_char} == 0x000B or # VT
1700     $self->{next_char} == 0x000C or # FF
1701     $self->{next_char} == 0x0020) { # SP
1702 wakaba 1.77 !!!cp (157);
1703 wakaba 1.1 ## Stay in the state
1704     !!!next-input-character;
1705     redo A;
1706 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1707 wakaba 1.77 !!!cp (158);
1708 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1709 wakaba 1.57 $self->{state} = DATA_STATE;
1710 wakaba 1.1 !!!next-input-character;
1711    
1712 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
1713 wakaba 1.1
1714     redo A;
1715 wakaba 1.77 } elsif ($self->{next_char} == -1) {
1716     !!!cp (159);
1717 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1718 wakaba 1.57 $self->{state} = DATA_STATE;
1719 wakaba 1.1 ## reconsume
1720    
1721 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
1722 wakaba 1.1
1723     redo A;
1724     } else {
1725 wakaba 1.77 !!!cp (160);
1726 wakaba 1.112 $self->{current_token}->{name} = chr $self->{next_char};
1727     delete $self->{current_token}->{quirks};
1728 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1729 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
1730 wakaba 1.1 !!!next-input-character;
1731     redo A;
1732     }
1733 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
1734 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
1735 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1736     $self->{next_char} == 0x000A or # LF
1737     $self->{next_char} == 0x000B or # VT
1738     $self->{next_char} == 0x000C or # FF
1739     $self->{next_char} == 0x0020) { # SP
1740 wakaba 1.77 !!!cp (161);
1741 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
1742 wakaba 1.1 !!!next-input-character;
1743     redo A;
1744 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1745 wakaba 1.77 !!!cp (162);
1746 wakaba 1.57 $self->{state} = DATA_STATE;
1747 wakaba 1.1 !!!next-input-character;
1748    
1749     !!!emit ($self->{current_token}); # DOCTYPE
1750    
1751     redo A;
1752 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1753 wakaba 1.77 !!!cp (163);
1754 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1755 wakaba 1.57 $self->{state} = DATA_STATE;
1756 wakaba 1.1 ## reconsume
1757    
1758 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1759 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1760 wakaba 1.1
1761     redo A;
1762     } else {
1763 wakaba 1.77 !!!cp (164);
1764 wakaba 1.1 $self->{current_token}->{name}
1765 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
1766 wakaba 1.1 ## Stay in the state
1767     !!!next-input-character;
1768     redo A;
1769     }
1770 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
1771 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1772     $self->{next_char} == 0x000A or # LF
1773     $self->{next_char} == 0x000B or # VT
1774     $self->{next_char} == 0x000C or # FF
1775     $self->{next_char} == 0x0020) { # SP
1776 wakaba 1.77 !!!cp (165);
1777 wakaba 1.1 ## Stay in the state
1778     !!!next-input-character;
1779     redo A;
1780 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1781 wakaba 1.77 !!!cp (166);
1782 wakaba 1.57 $self->{state} = DATA_STATE;
1783 wakaba 1.1 !!!next-input-character;
1784    
1785     !!!emit ($self->{current_token}); # DOCTYPE
1786    
1787     redo A;
1788 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1789 wakaba 1.77 !!!cp (167);
1790 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1791 wakaba 1.57 $self->{state} = DATA_STATE;
1792 wakaba 1.1 ## reconsume
1793    
1794 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1795 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1796    
1797     redo A;
1798 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
1799     $self->{next_char} == 0x0070) { # p
1800 wakaba 1.18 !!!next-input-character;
1801 wakaba 1.76 if ($self->{next_char} == 0x0055 or # U
1802     $self->{next_char} == 0x0075) { # u
1803 wakaba 1.18 !!!next-input-character;
1804 wakaba 1.76 if ($self->{next_char} == 0x0042 or # B
1805     $self->{next_char} == 0x0062) { # b
1806 wakaba 1.18 !!!next-input-character;
1807 wakaba 1.76 if ($self->{next_char} == 0x004C or # L
1808     $self->{next_char} == 0x006C) { # l
1809 wakaba 1.18 !!!next-input-character;
1810 wakaba 1.76 if ($self->{next_char} == 0x0049 or # I
1811     $self->{next_char} == 0x0069) { # i
1812 wakaba 1.18 !!!next-input-character;
1813 wakaba 1.76 if ($self->{next_char} == 0x0043 or # C
1814     $self->{next_char} == 0x0063) { # c
1815 wakaba 1.77 !!!cp (168);
1816 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1817 wakaba 1.18 !!!next-input-character;
1818     redo A;
1819 wakaba 1.77 } else {
1820     !!!cp (169);
1821 wakaba 1.18 }
1822 wakaba 1.77 } else {
1823     !!!cp (170);
1824 wakaba 1.18 }
1825 wakaba 1.77 } else {
1826     !!!cp (171);
1827 wakaba 1.18 }
1828 wakaba 1.77 } else {
1829     !!!cp (172);
1830 wakaba 1.18 }
1831 wakaba 1.77 } else {
1832     !!!cp (173);
1833 wakaba 1.18 }
1834    
1835     #
1836 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
1837     $self->{next_char} == 0x0073) { # s
1838 wakaba 1.18 !!!next-input-character;
1839 wakaba 1.76 if ($self->{next_char} == 0x0059 or # Y
1840     $self->{next_char} == 0x0079) { # y
1841 wakaba 1.18 !!!next-input-character;
1842 wakaba 1.76 if ($self->{next_char} == 0x0053 or # S
1843     $self->{next_char} == 0x0073) { # s
1844 wakaba 1.18 !!!next-input-character;
1845 wakaba 1.76 if ($self->{next_char} == 0x0054 or # T
1846     $self->{next_char} == 0x0074) { # t
1847 wakaba 1.18 !!!next-input-character;
1848 wakaba 1.76 if ($self->{next_char} == 0x0045 or # E
1849     $self->{next_char} == 0x0065) { # e
1850 wakaba 1.18 !!!next-input-character;
1851 wakaba 1.76 if ($self->{next_char} == 0x004D or # M
1852     $self->{next_char} == 0x006D) { # m
1853 wakaba 1.77 !!!cp (174);
1854 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
1855 wakaba 1.18 !!!next-input-character;
1856     redo A;
1857 wakaba 1.77 } else {
1858     !!!cp (175);
1859 wakaba 1.18 }
1860 wakaba 1.77 } else {
1861     !!!cp (176);
1862 wakaba 1.18 }
1863 wakaba 1.77 } else {
1864     !!!cp (177);
1865 wakaba 1.18 }
1866 wakaba 1.77 } else {
1867     !!!cp (178);
1868 wakaba 1.18 }
1869 wakaba 1.77 } else {
1870     !!!cp (179);
1871 wakaba 1.18 }
1872    
1873     #
1874     } else {
1875 wakaba 1.77 !!!cp (180);
1876 wakaba 1.18 !!!next-input-character;
1877     #
1878     }
1879    
1880     !!!parse-error (type => 'string after DOCTYPE name');
1881 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1882 wakaba 1.73
1883 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
1884 wakaba 1.18 # next-input-character is already done
1885     redo A;
1886 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
1887 wakaba 1.18 if ({
1888     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1889     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1890 wakaba 1.76 }->{$self->{next_char}}) {
1891 wakaba 1.77 !!!cp (181);
1892 wakaba 1.18 ## Stay in the state
1893     !!!next-input-character;
1894     redo A;
1895 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
1896 wakaba 1.77 !!!cp (182);
1897 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1898 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
1899 wakaba 1.18 !!!next-input-character;
1900     redo A;
1901 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
1902 wakaba 1.77 !!!cp (183);
1903 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1904 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
1905 wakaba 1.18 !!!next-input-character;
1906     redo A;
1907 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
1908 wakaba 1.77 !!!cp (184);
1909 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
1910    
1911 wakaba 1.57 $self->{state} = DATA_STATE;
1912 wakaba 1.18 !!!next-input-character;
1913    
1914 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1915 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1916    
1917     redo A;
1918 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1919 wakaba 1.77 !!!cp (185);
1920 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
1921    
1922 wakaba 1.57 $self->{state} = DATA_STATE;
1923 wakaba 1.18 ## reconsume
1924    
1925 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1926 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1927    
1928     redo A;
1929     } else {
1930 wakaba 1.77 !!!cp (186);
1931 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
1932 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1933 wakaba 1.73
1934 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
1935 wakaba 1.18 !!!next-input-character;
1936     redo A;
1937     }
1938 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
1939 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1940 wakaba 1.77 !!!cp (187);
1941 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1942 wakaba 1.18 !!!next-input-character;
1943     redo A;
1944 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1945 wakaba 1.77 !!!cp (188);
1946 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
1947    
1948     $self->{state} = DATA_STATE;
1949     !!!next-input-character;
1950    
1951 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1952 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
1953    
1954     redo A;
1955 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1956 wakaba 1.77 !!!cp (189);
1957 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
1958    
1959 wakaba 1.57 $self->{state} = DATA_STATE;
1960 wakaba 1.18 ## reconsume
1961    
1962 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1963 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1964    
1965     redo A;
1966     } else {
1967 wakaba 1.77 !!!cp (190);
1968 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
1969 wakaba 1.76 .= chr $self->{next_char};
1970 wakaba 1.18 ## Stay in the state
1971     !!!next-input-character;
1972     redo A;
1973     }
1974 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
1975 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1976 wakaba 1.77 !!!cp (191);
1977 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1978 wakaba 1.18 !!!next-input-character;
1979     redo A;
1980 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1981 wakaba 1.77 !!!cp (192);
1982 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
1983    
1984     $self->{state} = DATA_STATE;
1985     !!!next-input-character;
1986    
1987 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1988 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
1989    
1990     redo A;
1991 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1992 wakaba 1.77 !!!cp (193);
1993 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
1994    
1995 wakaba 1.57 $self->{state} = DATA_STATE;
1996 wakaba 1.18 ## reconsume
1997    
1998 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1999 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2000    
2001     redo A;
2002     } else {
2003 wakaba 1.77 !!!cp (194);
2004 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2005 wakaba 1.76 .= chr $self->{next_char};
2006 wakaba 1.18 ## Stay in the state
2007     !!!next-input-character;
2008     redo A;
2009     }
2010 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2011 wakaba 1.18 if ({
2012     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2013     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2014 wakaba 1.76 }->{$self->{next_char}}) {
2015 wakaba 1.77 !!!cp (195);
2016 wakaba 1.18 ## Stay in the state
2017     !!!next-input-character;
2018     redo A;
2019 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2020 wakaba 1.77 !!!cp (196);
2021 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2022 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2023 wakaba 1.18 !!!next-input-character;
2024     redo A;
2025 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2026 wakaba 1.77 !!!cp (197);
2027 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2028 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2029 wakaba 1.18 !!!next-input-character;
2030     redo A;
2031 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2032 wakaba 1.77 !!!cp (198);
2033 wakaba 1.57 $self->{state} = DATA_STATE;
2034 wakaba 1.18 !!!next-input-character;
2035    
2036     !!!emit ($self->{current_token}); # DOCTYPE
2037    
2038     redo A;
2039 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2040 wakaba 1.77 !!!cp (199);
2041 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2042    
2043 wakaba 1.57 $self->{state} = DATA_STATE;
2044 wakaba 1.26 ## reconsume
2045 wakaba 1.18
2046 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2047 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2048    
2049     redo A;
2050     } else {
2051 wakaba 1.77 !!!cp (200);
2052 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2053 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2054 wakaba 1.73
2055 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2056 wakaba 1.18 !!!next-input-character;
2057     redo A;
2058     }
2059 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2060 wakaba 1.18 if ({
2061     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2062     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2063 wakaba 1.76 }->{$self->{next_char}}) {
2064 wakaba 1.77 !!!cp (201);
2065 wakaba 1.18 ## Stay in the state
2066     !!!next-input-character;
2067     redo A;
2068 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2069 wakaba 1.77 !!!cp (202);
2070 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2071 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2072 wakaba 1.18 !!!next-input-character;
2073     redo A;
2074 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2075 wakaba 1.77 !!!cp (203);
2076 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2077 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2078 wakaba 1.18 !!!next-input-character;
2079     redo A;
2080 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2081 wakaba 1.77 !!!cp (204);
2082 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2083 wakaba 1.57 $self->{state} = DATA_STATE;
2084 wakaba 1.18 !!!next-input-character;
2085    
2086 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2087 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2088    
2089     redo A;
2090 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2091 wakaba 1.77 !!!cp (205);
2092 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2093    
2094 wakaba 1.57 $self->{state} = DATA_STATE;
2095 wakaba 1.26 ## reconsume
2096 wakaba 1.18
2097 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2098 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2099    
2100     redo A;
2101     } else {
2102 wakaba 1.77 !!!cp (206);
2103 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2104 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2105 wakaba 1.73
2106 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2107 wakaba 1.18 !!!next-input-character;
2108     redo A;
2109     }
2110 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2111 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2112 wakaba 1.77 !!!cp (207);
2113 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2114 wakaba 1.18 !!!next-input-character;
2115     redo A;
2116 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2117 wakaba 1.77 !!!cp (208);
2118 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2119    
2120     $self->{state} = DATA_STATE;
2121     !!!next-input-character;
2122    
2123 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2124 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2125    
2126     redo A;
2127 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2128 wakaba 1.77 !!!cp (209);
2129 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2130    
2131 wakaba 1.57 $self->{state} = DATA_STATE;
2132 wakaba 1.18 ## reconsume
2133    
2134 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2135 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2136    
2137     redo A;
2138     } else {
2139 wakaba 1.77 !!!cp (210);
2140 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2141 wakaba 1.76 .= chr $self->{next_char};
2142 wakaba 1.18 ## Stay in the state
2143     !!!next-input-character;
2144     redo A;
2145     }
2146 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2147 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2148 wakaba 1.77 !!!cp (211);
2149 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2150 wakaba 1.18 !!!next-input-character;
2151     redo A;
2152 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2153 wakaba 1.77 !!!cp (212);
2154 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2155    
2156     $self->{state} = DATA_STATE;
2157     !!!next-input-character;
2158    
2159 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2160 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2161    
2162     redo A;
2163 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2164 wakaba 1.77 !!!cp (213);
2165 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2166    
2167 wakaba 1.57 $self->{state} = DATA_STATE;
2168 wakaba 1.18 ## reconsume
2169    
2170 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2171 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2172    
2173     redo A;
2174     } else {
2175 wakaba 1.77 !!!cp (214);
2176 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2177 wakaba 1.76 .= chr $self->{next_char};
2178 wakaba 1.18 ## Stay in the state
2179     !!!next-input-character;
2180     redo A;
2181     }
2182 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2183 wakaba 1.18 if ({
2184     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2185     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2186 wakaba 1.76 }->{$self->{next_char}}) {
2187 wakaba 1.77 !!!cp (215);
2188 wakaba 1.18 ## Stay in the state
2189     !!!next-input-character;
2190     redo A;
2191 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2192 wakaba 1.77 !!!cp (216);
2193 wakaba 1.57 $self->{state} = DATA_STATE;
2194 wakaba 1.18 !!!next-input-character;
2195    
2196     !!!emit ($self->{current_token}); # DOCTYPE
2197    
2198     redo A;
2199 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2200 wakaba 1.77 !!!cp (217);
2201 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2202    
2203 wakaba 1.57 $self->{state} = DATA_STATE;
2204 wakaba 1.26 ## reconsume
2205 wakaba 1.18
2206 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2207 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2208    
2209     redo A;
2210     } else {
2211 wakaba 1.77 !!!cp (218);
2212 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2213 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2214 wakaba 1.73
2215 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2216 wakaba 1.1 !!!next-input-character;
2217     redo A;
2218     }
2219 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2220 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2221 wakaba 1.77 !!!cp (219);
2222 wakaba 1.57 $self->{state} = DATA_STATE;
2223 wakaba 1.1 !!!next-input-character;
2224    
2225     !!!emit ($self->{current_token}); # DOCTYPE
2226    
2227     redo A;
2228 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2229 wakaba 1.77 !!!cp (220);
2230 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2231 wakaba 1.57 $self->{state} = DATA_STATE;
2232 wakaba 1.1 ## reconsume
2233    
2234     !!!emit ($self->{current_token}); # DOCTYPE
2235    
2236     redo A;
2237     } else {
2238 wakaba 1.77 !!!cp (221);
2239 wakaba 1.1 ## Stay in the state
2240     !!!next-input-character;
2241     redo A;
2242     }
2243     } else {
2244     die "$0: $self->{state}: Unknown state";
2245     }
2246     } # A
2247    
2248     die "$0: _get_next_token: unexpected case";
2249     } # _get_next_token
2250    
2251 wakaba 1.72 sub _tokenize_attempt_to_consume_an_entity ($$$) {
2252     my ($self, $in_attr, $additional) = @_;
2253 wakaba 1.20
2254 wakaba 1.112 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
2255    
2256 wakaba 1.20 if ({
2257     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2258     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2259 wakaba 1.72 $additional => 1,
2260 wakaba 1.76 }->{$self->{next_char}}) {
2261 wakaba 1.78 !!!cp (1001);
2262 wakaba 1.20 ## Don't consume
2263     ## No error
2264     return undef;
2265 wakaba 1.76 } elsif ($self->{next_char} == 0x0023) { # #
2266 wakaba 1.1 !!!next-input-character;
2267 wakaba 1.76 if ($self->{next_char} == 0x0078 or # x
2268     $self->{next_char} == 0x0058) { # X
2269 wakaba 1.26 my $code;
2270 wakaba 1.1 X: {
2271 wakaba 1.76 my $x_char = $self->{next_char};
2272 wakaba 1.1 !!!next-input-character;
2273 wakaba 1.76 if (0x0030 <= $self->{next_char} and
2274     $self->{next_char} <= 0x0039) { # 0..9
2275 wakaba 1.78 !!!cp (1002);
2276 wakaba 1.26 $code ||= 0;
2277     $code *= 0x10;
2278 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2279 wakaba 1.1 redo X;
2280 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
2281     $self->{next_char} <= 0x0066) { # a..f
2282 wakaba 1.78 !!!cp (1003);
2283 wakaba 1.26 $code ||= 0;
2284     $code *= 0x10;
2285 wakaba 1.76 $code += $self->{next_char} - 0x0060 + 9;
2286 wakaba 1.1 redo X;
2287 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
2288     $self->{next_char} <= 0x0046) { # A..F
2289 wakaba 1.78 !!!cp (1004);
2290 wakaba 1.26 $code ||= 0;
2291     $code *= 0x10;
2292 wakaba 1.76 $code += $self->{next_char} - 0x0040 + 9;
2293 wakaba 1.1 redo X;
2294 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2295 wakaba 1.78 !!!cp (1005);
2296 wakaba 1.112 !!!parse-error (type => 'bare hcro', line => $l, column => $c);
2297 wakaba 1.76 !!!back-next-input-character ($x_char, $self->{next_char});
2298     $self->{next_char} = 0x0023; # #
2299 wakaba 1.1 return undef;
2300 wakaba 1.76 } elsif ($self->{next_char} == 0x003B) { # ;
2301 wakaba 1.78 !!!cp (1006);
2302 wakaba 1.1 !!!next-input-character;
2303     } else {
2304 wakaba 1.78 !!!cp (1007);
2305 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2306 wakaba 1.1 }
2307    
2308 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2309 wakaba 1.78 !!!cp (1008);
2310 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2311 wakaba 1.26 $code = 0xFFFD;
2312     } elsif ($code > 0x10FFFF) {
2313 wakaba 1.78 !!!cp (1009);
2314 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2315 wakaba 1.26 $code = 0xFFFD;
2316     } elsif ($code == 0x000D) {
2317 wakaba 1.78 !!!cp (1010);
2318 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2319 wakaba 1.26 $code = 0x000A;
2320     } elsif (0x80 <= $code and $code <= 0x9F) {
2321 wakaba 1.78 !!!cp (1011);
2322 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2323 wakaba 1.26 $code = $c1_entity_char->{$code};
2324 wakaba 1.1 }
2325    
2326 wakaba 1.66 return {type => CHARACTER_TOKEN, data => chr $code,
2327 wakaba 1.118 has_reference => 1,
2328 wakaba 1.120 line => $l, column => $c,
2329 wakaba 1.118 };
2330 wakaba 1.1 } # X
2331 wakaba 1.76 } elsif (0x0030 <= $self->{next_char} and
2332     $self->{next_char} <= 0x0039) { # 0..9
2333     my $code = $self->{next_char} - 0x0030;
2334 wakaba 1.1 !!!next-input-character;
2335    
2336 wakaba 1.76 while (0x0030 <= $self->{next_char} and
2337     $self->{next_char} <= 0x0039) { # 0..9
2338 wakaba 1.78 !!!cp (1012);
2339 wakaba 1.1 $code *= 10;
2340 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2341 wakaba 1.1
2342     !!!next-input-character;
2343     }
2344    
2345 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2346 wakaba 1.78 !!!cp (1013);
2347 wakaba 1.1 !!!next-input-character;
2348     } else {
2349 wakaba 1.78 !!!cp (1014);
2350 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2351 wakaba 1.1 }
2352    
2353 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2354 wakaba 1.78 !!!cp (1015);
2355 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2356 wakaba 1.26 $code = 0xFFFD;
2357     } elsif ($code > 0x10FFFF) {
2358 wakaba 1.78 !!!cp (1016);
2359 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2360 wakaba 1.26 $code = 0xFFFD;
2361     } elsif ($code == 0x000D) {
2362 wakaba 1.78 !!!cp (1017);
2363 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2364 wakaba 1.26 $code = 0x000A;
2365 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2366 wakaba 1.78 !!!cp (1018);
2367 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2368 wakaba 1.4 $code = $c1_entity_char->{$code};
2369 wakaba 1.1 }
2370    
2371 wakaba 1.112 return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
2372 wakaba 1.120 line => $l, column => $c,
2373 wakaba 1.118 };
2374 wakaba 1.1 } else {
2375 wakaba 1.78 !!!cp (1019);
2376 wakaba 1.112 !!!parse-error (type => 'bare nero', line => $l, column => $c);
2377 wakaba 1.76 !!!back-next-input-character ($self->{next_char});
2378     $self->{next_char} = 0x0023; # #
2379 wakaba 1.1 return undef;
2380     }
2381 wakaba 1.76 } elsif ((0x0041 <= $self->{next_char} and
2382     $self->{next_char} <= 0x005A) or
2383     (0x0061 <= $self->{next_char} and
2384     $self->{next_char} <= 0x007A)) {
2385     my $entity_name = chr $self->{next_char};
2386 wakaba 1.1 !!!next-input-character;
2387    
2388     my $value = $entity_name;
2389 wakaba 1.37 my $match = 0;
2390 wakaba 1.16 require Whatpm::_NamedEntityList;
2391     our $EntityChar;
2392 wakaba 1.1
2393     while (length $entity_name < 10 and
2394     ## NOTE: Some number greater than the maximum length of entity name
2395 wakaba 1.76 ((0x0041 <= $self->{next_char} and # a
2396     $self->{next_char} <= 0x005A) or # x
2397     (0x0061 <= $self->{next_char} and # a
2398     $self->{next_char} <= 0x007A) or # z
2399     (0x0030 <= $self->{next_char} and # 0
2400     $self->{next_char} <= 0x0039) or # 9
2401     $self->{next_char} == 0x003B)) { # ;
2402     $entity_name .= chr $self->{next_char};
2403 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2404 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2405 wakaba 1.78 !!!cp (1020);
2406 wakaba 1.26 $value = $EntityChar->{$entity_name};
2407 wakaba 1.16 $match = 1;
2408     !!!next-input-character;
2409     last;
2410 wakaba 1.37 } else {
2411 wakaba 1.78 !!!cp (1021);
2412 wakaba 1.26 $value = $EntityChar->{$entity_name};
2413     $match = -1;
2414 wakaba 1.37 !!!next-input-character;
2415 wakaba 1.16 }
2416 wakaba 1.1 } else {
2417 wakaba 1.78 !!!cp (1022);
2418 wakaba 1.76 $value .= chr $self->{next_char};
2419 wakaba 1.37 $match *= 2;
2420     !!!next-input-character;
2421 wakaba 1.1 }
2422     }
2423    
2424 wakaba 1.16 if ($match > 0) {
2425 wakaba 1.78 !!!cp (1023);
2426 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2427 wakaba 1.120 line => $l, column => $c,
2428 wakaba 1.118 };
2429 wakaba 1.16 } elsif ($match < 0) {
2430 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2431 wakaba 1.37 if ($in_attr and $match < -1) {
2432 wakaba 1.78 !!!cp (1024);
2433 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
2434 wakaba 1.120 line => $l, column => $c,
2435 wakaba 1.118 };
2436 wakaba 1.37 } else {
2437 wakaba 1.78 !!!cp (1025);
2438 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2439 wakaba 1.120 line => $l, column => $c,
2440 wakaba 1.118 };
2441 wakaba 1.37 }
2442 wakaba 1.1 } else {
2443 wakaba 1.78 !!!cp (1026);
2444 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
2445 wakaba 1.66 ## NOTE: "No characters are consumed" in the spec.
2446 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$value,
2447 wakaba 1.120 line => $l, column => $c,
2448 wakaba 1.118 };
2449 wakaba 1.1 }
2450     } else {
2451 wakaba 1.78 !!!cp (1027);
2452 wakaba 1.1 ## no characters are consumed
2453 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
2454 wakaba 1.1 return undef;
2455     }
2456     } # _tokenize_attempt_to_consume_an_entity
2457    
2458     sub _initialize_tree_constructor ($) {
2459     my $self = shift;
2460     ## NOTE: $self->{document} MUST be specified before this method is called
2461     $self->{document}->strict_error_checking (0);
2462     ## TODO: Turn mutation events off # MUST
2463     ## TODO: Turn loose Document option (manakai extension) on
2464 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2465 wakaba 1.1 } # _initialize_tree_constructor
2466    
2467     sub _terminate_tree_constructor ($) {
2468     my $self = shift;
2469     $self->{document}->strict_error_checking (1);
2470     ## TODO: Turn mutation events on
2471     } # _terminate_tree_constructor
2472    
2473     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2474    
2475 wakaba 1.3 { # tree construction stage
2476     my $token;
2477    
2478 wakaba 1.1 sub _construct_tree ($) {
2479     my ($self) = @_;
2480    
2481     ## When an interactive UA render the $self->{document} available
2482     ## to the user, or when it begin accepting user input, are
2483     ## not defined.
2484    
2485     ## Append a character: collect it and all subsequent consecutive
2486     ## characters and insert one Text node whose data is concatenation
2487     ## of all those characters. # MUST
2488    
2489     !!!next-token;
2490    
2491 wakaba 1.3 undef $self->{form_element};
2492     undef $self->{head_element};
2493     $self->{open_elements} = [];
2494     undef $self->{inner_html_node};
2495    
2496 wakaba 1.84 ## NOTE: The "initial" insertion mode.
2497 wakaba 1.3 $self->_tree_construction_initial; # MUST
2498 wakaba 1.84
2499     ## NOTE: The "before html" insertion mode.
2500 wakaba 1.3 $self->_tree_construction_root_element;
2501 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
2502    
2503     ## NOTE: The "before head" insertion mode and so on.
2504 wakaba 1.3 $self->_tree_construction_main;
2505     } # _construct_tree
2506    
2507     sub _tree_construction_initial ($) {
2508     my $self = shift;
2509 wakaba 1.84
2510     ## NOTE: "initial" insertion mode
2511    
2512 wakaba 1.18 INITIAL: {
2513 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2514 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2515     ## error, switch to a conformance checking mode for another
2516     ## language.
2517     my $doctype_name = $token->{name};
2518     $doctype_name = '' unless defined $doctype_name;
2519     $doctype_name =~ tr/a-z/A-Z/;
2520     if (not defined $token->{name} or # <!DOCTYPE>
2521     defined $token->{public_identifier} or
2522     defined $token->{system_identifier}) {
2523 wakaba 1.79 !!!cp ('t1');
2524 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
2525 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
2526 wakaba 1.79 !!!cp ('t2');
2527 wakaba 1.18 ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2528 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
2529 wakaba 1.79 } else {
2530     !!!cp ('t3');
2531 wakaba 1.18 }
2532    
2533     my $doctype = $self->{document}->create_document_type_definition
2534     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2535     $doctype->public_id ($token->{public_identifier})
2536     if defined $token->{public_identifier};
2537     $doctype->system_id ($token->{system_identifier})
2538     if defined $token->{system_identifier};
2539     ## NOTE: Other DocumentType attributes are null or empty lists.
2540     ## ISSUE: internalSubset = null??
2541     $self->{document}->append_child ($doctype);
2542    
2543 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
2544 wakaba 1.79 !!!cp ('t4');
2545 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2546     } elsif (defined $token->{public_identifier}) {
2547     my $pubid = $token->{public_identifier};
2548     $pubid =~ tr/a-z/A-z/;
2549     if ({
2550     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2551     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2552     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2553     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2554     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2555     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2556     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2557     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2558     "-//IETF//DTD HTML 2.0//EN" => 1,
2559     "-//IETF//DTD HTML 2.1E//EN" => 1,
2560     "-//IETF//DTD HTML 3.0//EN" => 1,
2561     "-//IETF//DTD HTML 3.0//EN//" => 1,
2562     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2563     "-//IETF//DTD HTML 3.2//EN" => 1,
2564     "-//IETF//DTD HTML 3//EN" => 1,
2565     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2566     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2567     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2568     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2569     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2570     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2571     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2572     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2573     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2574     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2575     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2576     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2577     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2578     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2579     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2580     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2581     "-//IETF//DTD HTML STRICT//EN" => 1,
2582     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2583     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2584     "-//IETF//DTD HTML//EN" => 1,
2585     "-//IETF//DTD HTML//EN//2.0" => 1,
2586     "-//IETF//DTD HTML//EN//3.0" => 1,
2587     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2588     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2589     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2590     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2591     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2592     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2593     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2594     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2595     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2596     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2597     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2598 wakaba 1.72 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//EN" => 1,
2599     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//EN" => 1,
2600     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//EN" => 1,
2601 wakaba 1.18 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2602     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2603     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2604     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2605     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2606     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2607     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2608     "-//W3C//DTD HTML 3.2//EN" => 1,
2609     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2610     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2611     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2612     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2613     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2614     "-//W3C//DTD W3 HTML//EN" => 1,
2615     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2616     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2617     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2618     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2619     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2620     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2621     "HTML" => 1,
2622     }->{$pubid}) {
2623 wakaba 1.79 !!!cp ('t5');
2624 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2625     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2626     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2627     if (defined $token->{system_identifier}) {
2628 wakaba 1.79 !!!cp ('t6');
2629 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2630     } else {
2631 wakaba 1.79 !!!cp ('t7');
2632 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
2633 wakaba 1.3 }
2634 wakaba 1.80 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 FRAMESET//EN" or
2635     $pubid eq "-//W3C//DTD XHTML 1.0 TRANSITIONAL//EN") {
2636 wakaba 1.79 !!!cp ('t8');
2637 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
2638 wakaba 1.79 } else {
2639     !!!cp ('t9');
2640 wakaba 1.18 }
2641 wakaba 1.79 } else {
2642     !!!cp ('t10');
2643 wakaba 1.18 }
2644     if (defined $token->{system_identifier}) {
2645     my $sysid = $token->{system_identifier};
2646     $sysid =~ tr/A-Z/a-z/;
2647     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2648 wakaba 1.80 ## TODO: Check the spec: PUBLIC "(limited quirks)" "(quirks)"
2649 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2650 wakaba 1.79 !!!cp ('t11');
2651     } else {
2652     !!!cp ('t12');
2653 wakaba 1.18 }
2654 wakaba 1.79 } else {
2655     !!!cp ('t13');
2656 wakaba 1.18 }
2657    
2658 wakaba 1.84 ## Go to the "before html" insertion mode.
2659 wakaba 1.18 !!!next-token;
2660     return;
2661     } elsif ({
2662 wakaba 1.55 START_TAG_TOKEN, 1,
2663     END_TAG_TOKEN, 1,
2664     END_OF_FILE_TOKEN, 1,
2665 wakaba 1.18 }->{$token->{type}}) {
2666 wakaba 1.79 !!!cp ('t14');
2667 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
2668 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2669 wakaba 1.84 ## Go to the "before html" insertion mode.
2670 wakaba 1.18 ## reprocess
2671     return;
2672 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
2673 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2674     ## Ignore the token
2675 wakaba 1.26
2676 wakaba 1.18 unless (length $token->{data}) {
2677 wakaba 1.79 !!!cp ('t15');
2678 wakaba 1.84 ## Stay in the insertion mode.
2679 wakaba 1.18 !!!next-token;
2680     redo INITIAL;
2681 wakaba 1.79 } else {
2682     !!!cp ('t16');
2683 wakaba 1.3 }
2684 wakaba 1.79 } else {
2685     !!!cp ('t17');
2686 wakaba 1.3 }
2687 wakaba 1.18
2688 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
2689 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2690 wakaba 1.84 ## Go to the "before html" insertion mode.
2691 wakaba 1.18 ## reprocess
2692     return;
2693 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
2694 wakaba 1.79 !!!cp ('t18');
2695 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
2696     $self->{document}->append_child ($comment);
2697    
2698 wakaba 1.84 ## Stay in the insertion mode.
2699 wakaba 1.18 !!!next-token;
2700     redo INITIAL;
2701     } else {
2702 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
2703 wakaba 1.18 }
2704     } # INITIAL
2705 wakaba 1.79
2706     die "$0: _tree_construction_initial: This should be never reached";
2707 wakaba 1.3 } # _tree_construction_initial
2708    
2709     sub _tree_construction_root_element ($) {
2710     my $self = shift;
2711 wakaba 1.84
2712     ## NOTE: "before html" insertion mode.
2713 wakaba 1.3
2714     B: {
2715 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2716 wakaba 1.79 !!!cp ('t19');
2717 wakaba 1.113 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
2718 wakaba 1.3 ## Ignore the token
2719 wakaba 1.84 ## Stay in the insertion mode.
2720 wakaba 1.3 !!!next-token;
2721     redo B;
2722 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
2723 wakaba 1.79 !!!cp ('t20');
2724 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
2725     $self->{document}->append_child ($comment);
2726 wakaba 1.84 ## Stay in the insertion mode.
2727 wakaba 1.3 !!!next-token;
2728     redo B;
2729 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
2730 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2731     ## Ignore the token.
2732    
2733 wakaba 1.3 unless (length $token->{data}) {
2734 wakaba 1.79 !!!cp ('t21');
2735 wakaba 1.84 ## Stay in the insertion mode.
2736 wakaba 1.3 !!!next-token;
2737     redo B;
2738 wakaba 1.79 } else {
2739     !!!cp ('t22');
2740 wakaba 1.3 }
2741 wakaba 1.79 } else {
2742     !!!cp ('t23');
2743 wakaba 1.3 }
2744 wakaba 1.61
2745     $self->{application_cache_selection}->(undef);
2746    
2747     #
2748     } elsif ($token->{type} == START_TAG_TOKEN) {
2749 wakaba 1.84 if ($token->{tag_name} eq 'html') {
2750     my $root_element;
2751 wakaba 1.116 !!!create-element ($root_element, $token->{tag_name}, $token->{attributes}, $token);
2752 wakaba 1.84 $self->{document}->append_child ($root_element);
2753     push @{$self->{open_elements}}, [$root_element, 'html'];
2754    
2755     if ($token->{attributes}->{manifest}) {
2756     !!!cp ('t24');
2757     $self->{application_cache_selection}
2758     ->($token->{attributes}->{manifest}->{value});
2759 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
2760     ## According to Hixie (#whatwg 2008-03-19), it should be
2761     ## resolved against the base URI of the document in HTML
2762     ## or xml:base of the element in XHTML.
2763 wakaba 1.84 } else {
2764     !!!cp ('t25');
2765     $self->{application_cache_selection}->(undef);
2766     }
2767    
2768     !!!next-token;
2769     return; ## Go to the "before head" insertion mode.
2770 wakaba 1.61 } else {
2771 wakaba 1.84 !!!cp ('t25.1');
2772     #
2773 wakaba 1.61 }
2774 wakaba 1.3 } elsif ({
2775 wakaba 1.55 END_TAG_TOKEN, 1,
2776     END_OF_FILE_TOKEN, 1,
2777 wakaba 1.3 }->{$token->{type}}) {
2778 wakaba 1.79 !!!cp ('t26');
2779 wakaba 1.3 #
2780     } else {
2781 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
2782 wakaba 1.3 }
2783 wakaba 1.61
2784 wakaba 1.116 my $root_element; !!!create-element ($root_element, 'html',, $token);
2785 wakaba 1.84 $self->{document}->append_child ($root_element);
2786     push @{$self->{open_elements}}, [$root_element, 'html'];
2787    
2788     $self->{application_cache_selection}->(undef);
2789    
2790     ## NOTE: Reprocess the token.
2791     return; ## Go to the "before head" insertion mode.
2792    
2793     ## ISSUE: There is an issue in the spec
2794 wakaba 1.3 } # B
2795 wakaba 1.79
2796     die "$0: _tree_construction_root_element: This should never be reached";
2797 wakaba 1.3 } # _tree_construction_root_element
2798    
2799     sub _reset_insertion_mode ($) {
2800     my $self = shift;
2801    
2802     ## Step 1
2803     my $last;
2804    
2805     ## Step 2
2806     my $i = -1;
2807     my $node = $self->{open_elements}->[$i];
2808    
2809     ## Step 3
2810     S3: {
2811 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
2812     $last = 1;
2813     if (defined $self->{inner_html_node}) {
2814     if ($self->{inner_html_node}->[1] eq 'td' or
2815     $self->{inner_html_node}->[1] eq 'th') {
2816 wakaba 1.79 !!!cp ('t27');
2817 wakaba 1.29 #
2818     } else {
2819 wakaba 1.79 !!!cp ('t28');
2820 wakaba 1.29 $node = $self->{inner_html_node};
2821     }
2822 wakaba 1.3 }
2823     }
2824    
2825     ## Step 4..13
2826     my $new_mode = {
2827 wakaba 1.54 select => IN_SELECT_IM,
2828 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
2829     ## insertion mode to "in select" by themselves.
2830 wakaba 1.54 td => IN_CELL_IM,
2831     th => IN_CELL_IM,
2832     tr => IN_ROW_IM,
2833     tbody => IN_TABLE_BODY_IM,
2834     thead => IN_TABLE_BODY_IM,
2835     tfoot => IN_TABLE_BODY_IM,
2836     caption => IN_CAPTION_IM,
2837     colgroup => IN_COLUMN_GROUP_IM,
2838     table => IN_TABLE_IM,
2839     head => IN_BODY_IM, # not in head!
2840     body => IN_BODY_IM,
2841     frameset => IN_FRAMESET_IM,
2842 wakaba 1.3 }->{$node->[1]};
2843     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2844    
2845     ## Step 14
2846     if ($node->[1] eq 'html') {
2847     unless (defined $self->{head_element}) {
2848 wakaba 1.79 !!!cp ('t29');
2849 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
2850 wakaba 1.3 } else {
2851 wakaba 1.81 ## ISSUE: Can this state be reached?
2852 wakaba 1.79 !!!cp ('t30');
2853 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2854 wakaba 1.3 }
2855     return;
2856 wakaba 1.79 } else {
2857     !!!cp ('t31');
2858 wakaba 1.3 }
2859    
2860     ## Step 15
2861 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
2862 wakaba 1.3
2863     ## Step 16
2864     $i--;
2865     $node = $self->{open_elements}->[$i];
2866    
2867     ## Step 17
2868     redo S3;
2869     } # S3
2870 wakaba 1.79
2871     die "$0: _reset_insertion_mode: This line should never be reached";
2872 wakaba 1.3 } # _reset_insertion_mode
2873    
2874     sub _tree_construction_main ($) {
2875     my $self = shift;
2876    
2877 wakaba 1.1 my $active_formatting_elements = [];
2878    
2879     my $reconstruct_active_formatting_elements = sub { # MUST
2880     my $insert = shift;
2881    
2882     ## Step 1
2883     return unless @$active_formatting_elements;
2884    
2885     ## Step 3
2886     my $i = -1;
2887     my $entry = $active_formatting_elements->[$i];
2888    
2889     ## Step 2
2890     return if $entry->[0] eq '#marker';
2891 wakaba 1.3 for (@{$self->{open_elements}}) {
2892 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2893 wakaba 1.79 !!!cp ('t32');
2894 wakaba 1.1 return;
2895     }
2896     }
2897    
2898     S4: {
2899     ## Step 4
2900     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2901    
2902     ## Step 5
2903     $i--;
2904     $entry = $active_formatting_elements->[$i];
2905    
2906     ## Step 6
2907     if ($entry->[0] eq '#marker') {
2908 wakaba 1.81 !!!cp ('t33_1');
2909 wakaba 1.1 #
2910     } else {
2911     my $in_open_elements;
2912 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
2913 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2914 wakaba 1.79 !!!cp ('t33');
2915 wakaba 1.1 $in_open_elements = 1;
2916     last OE;
2917     }
2918     }
2919     if ($in_open_elements) {
2920 wakaba 1.79 !!!cp ('t34');
2921 wakaba 1.1 #
2922     } else {
2923 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
2924 wakaba 1.79 !!!cp ('t35');
2925 wakaba 1.1 redo S4;
2926     }
2927     }
2928    
2929     ## Step 7
2930     $i++;
2931     $entry = $active_formatting_elements->[$i];
2932     } # S4
2933    
2934     S7: {
2935     ## Step 8
2936     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2937    
2938     ## Step 9
2939     $insert->($clone->[0]);
2940 wakaba 1.3 push @{$self->{open_elements}}, $clone;
2941 wakaba 1.1
2942     ## Step 10
2943 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2944 wakaba 1.1
2945     ## Step 11
2946     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2947 wakaba 1.79 !!!cp ('t36');
2948 wakaba 1.1 ## Step 7'
2949     $i++;
2950     $entry = $active_formatting_elements->[$i];
2951    
2952     redo S7;
2953     }
2954 wakaba 1.79
2955     !!!cp ('t37');
2956 wakaba 1.1 } # S7
2957     }; # $reconstruct_active_formatting_elements
2958    
2959     my $clear_up_to_marker = sub {
2960     for (reverse 0..$#$active_formatting_elements) {
2961     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2962 wakaba 1.79 !!!cp ('t38');
2963 wakaba 1.1 splice @$active_formatting_elements, $_;
2964     return;
2965     }
2966     }
2967 wakaba 1.79
2968     !!!cp ('t39');
2969 wakaba 1.1 }; # $clear_up_to_marker
2970    
2971 wakaba 1.96 my $insert;
2972    
2973     my $parse_rcdata = sub ($) {
2974     my ($content_model_flag) = @_;
2975 wakaba 1.25
2976     ## Step 1
2977     my $start_tag_name = $token->{tag_name};
2978     my $el;
2979 wakaba 1.116 !!!create-element ($el, $start_tag_name, $token->{attributes}, $token);
2980 wakaba 1.25
2981     ## Step 2
2982 wakaba 1.96 $insert->($el);
2983 wakaba 1.25
2984     ## Step 3
2985 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
2986 wakaba 1.13 delete $self->{escape}; # MUST
2987 wakaba 1.25
2988     ## Step 4
2989 wakaba 1.1 my $text = '';
2990     !!!next-token;
2991 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
2992 wakaba 1.79 !!!cp ('t40');
2993 wakaba 1.1 $text .= $token->{data};
2994     !!!next-token;
2995 wakaba 1.25 }
2996    
2997     ## Step 5
2998 wakaba 1.1 if (length $text) {
2999 wakaba 1.79 !!!cp ('t41');
3000 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3001     $el->append_child ($text);
3002 wakaba 1.1 }
3003 wakaba 1.25
3004     ## Step 6
3005 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3006 wakaba 1.25
3007     ## Step 7
3008 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
3009     $token->{tag_name} eq $start_tag_name) {
3010     !!!cp ('t42');
3011 wakaba 1.1 ## Ignore the token
3012     } else {
3013 wakaba 1.96 ## NOTE: An end-of-file token.
3014     if ($content_model_flag == CDATA_CONTENT_MODEL) {
3015     !!!cp ('t43');
3016 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3017 wakaba 1.96 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3018     !!!cp ('t44');
3019 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
3020 wakaba 1.96 } else {
3021     die "$0: $content_model_flag in parse_rcdata";
3022     }
3023 wakaba 1.1 }
3024     !!!next-token;
3025 wakaba 1.25 }; # $parse_rcdata
3026 wakaba 1.1
3027 wakaba 1.96 my $script_start_tag = sub () {
3028 wakaba 1.1 my $script_el;
3029 wakaba 1.116 !!!create-element ($script_el, 'script', $token->{attributes}, $token);
3030 wakaba 1.1 ## TODO: mark as "parser-inserted"
3031    
3032 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
3033 wakaba 1.13 delete $self->{escape}; # MUST
3034 wakaba 1.1
3035     my $text = '';
3036     !!!next-token;
3037 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
3038 wakaba 1.79 !!!cp ('t45');
3039 wakaba 1.1 $text .= $token->{data};
3040     !!!next-token;
3041     } # stop if non-character token or tokenizer stops tokenising
3042     if (length $text) {
3043 wakaba 1.79 !!!cp ('t46');
3044 wakaba 1.1 $script_el->manakai_append_text ($text);
3045     }
3046    
3047 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3048 wakaba 1.1
3049 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
3050 wakaba 1.1 $token->{tag_name} eq 'script') {
3051 wakaba 1.79 !!!cp ('t47');
3052 wakaba 1.1 ## Ignore the token
3053     } else {
3054 wakaba 1.79 !!!cp ('t48');
3055 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3056 wakaba 1.1 ## ISSUE: And ignore?
3057     ## TODO: mark as "already executed"
3058     }
3059    
3060 wakaba 1.3 if (defined $self->{inner_html_node}) {
3061 wakaba 1.79 !!!cp ('t49');
3062 wakaba 1.3 ## TODO: mark as "already executed"
3063     } else {
3064 wakaba 1.79 !!!cp ('t50');
3065 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3066     ## TODO: insertion point = just before the next input character
3067 wakaba 1.25
3068     $insert->($script_el);
3069 wakaba 1.1
3070     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3071    
3072     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3073     }
3074    
3075     !!!next-token;
3076     }; # $script_start_tag
3077    
3078 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
3079     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
3080     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
3081    
3082 wakaba 1.1 my $formatting_end_tag = sub {
3083 wakaba 1.113 my $end_tag_token = shift;
3084     my $tag_name = $end_tag_token->{tag_name};
3085 wakaba 1.1
3086 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
3087 wakaba 1.102
3088 wakaba 1.1 FET: {
3089     ## Step 1
3090     my $formatting_element;
3091     my $formatting_element_i_in_active;
3092     AFE: for (reverse 0..$#$active_formatting_elements) {
3093     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
3094 wakaba 1.79 !!!cp ('t51');
3095 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
3096     $formatting_element_i_in_active = $_;
3097     last AFE;
3098     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
3099 wakaba 1.79 !!!cp ('t52');
3100 wakaba 1.1 last AFE;
3101     }
3102     } # AFE
3103     unless (defined $formatting_element) {
3104 wakaba 1.79 !!!cp ('t53');
3105 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$tag_name, token => $end_tag_token);
3106 wakaba 1.1 ## Ignore the token
3107     !!!next-token;
3108     return;
3109     }
3110     ## has an element in scope
3111     my $in_scope = 1;
3112     my $formatting_element_i_in_open;
3113 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3114     my $node = $self->{open_elements}->[$_];
3115 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3116     if ($in_scope) {
3117 wakaba 1.79 !!!cp ('t54');
3118 wakaba 1.1 $formatting_element_i_in_open = $_;
3119     last INSCOPE;
3120     } else { # in open elements but not in scope
3121 wakaba 1.79 !!!cp ('t55');
3122 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3123     token => $end_tag_token);
3124 wakaba 1.1 ## Ignore the token
3125     !!!next-token;
3126     return;
3127     }
3128     } elsif ({
3129 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
3130 wakaba 1.1 button => 1, marquee => 1, object => 1, html => 1,
3131     }->{$node->[1]}) {
3132 wakaba 1.79 !!!cp ('t56');
3133 wakaba 1.1 $in_scope = 0;
3134     }
3135     } # INSCOPE
3136     unless (defined $formatting_element_i_in_open) {
3137 wakaba 1.79 !!!cp ('t57');
3138 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3139     token => $end_tag_token);
3140 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3141     !!!next-token; ## TODO: ok?
3142     return;
3143     }
3144 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3145 wakaba 1.79 !!!cp ('t58');
3146 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1],
3147     token => $end_tag_token);
3148 wakaba 1.1 }
3149    
3150     ## Step 2
3151     my $furthest_block;
3152     my $furthest_block_i_in_open;
3153 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3154     my $node = $self->{open_elements}->[$_];
3155 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
3156     #not $phrasing_category->{$node->[1]} and
3157     ($special_category->{$node->[1]} or
3158 wakaba 1.103 $scoping_category->{$node->[1]})) { ## Scoping is redundant, maybe
3159 wakaba 1.79 !!!cp ('t59');
3160 wakaba 1.1 $furthest_block = $node;
3161     $furthest_block_i_in_open = $_;
3162     } elsif ($node->[0] eq $formatting_element->[0]) {
3163 wakaba 1.79 !!!cp ('t60');
3164 wakaba 1.1 last OE;
3165     }
3166     } # OE
3167    
3168     ## Step 3
3169     unless (defined $furthest_block) { # MUST
3170 wakaba 1.79 !!!cp ('t61');
3171 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3172 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3173     !!!next-token;
3174     return;
3175     }
3176    
3177     ## Step 4
3178 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3179 wakaba 1.1
3180     ## Step 5
3181     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3182     if (defined $furthest_block_parent) {
3183 wakaba 1.79 !!!cp ('t62');
3184 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
3185     }
3186    
3187     ## Step 6
3188     my $bookmark_prev_el
3189     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3190     ->[0];
3191    
3192     ## Step 7
3193     my $node = $furthest_block;
3194     my $node_i_in_open = $furthest_block_i_in_open;
3195     my $last_node = $furthest_block;
3196     S7: {
3197     ## Step 1
3198     $node_i_in_open--;
3199 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3200 wakaba 1.1
3201     ## Step 2
3202     my $node_i_in_active;
3203     S7S2: {
3204     for (reverse 0..$#$active_formatting_elements) {
3205     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3206 wakaba 1.79 !!!cp ('t63');
3207 wakaba 1.1 $node_i_in_active = $_;
3208     last S7S2;
3209     }
3210     }
3211 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3212 wakaba 1.1 redo S7;
3213     } # S7S2
3214    
3215     ## Step 3
3216     last S7 if $node->[0] eq $formatting_element->[0];
3217    
3218     ## Step 4
3219     if ($last_node->[0] eq $furthest_block->[0]) {
3220 wakaba 1.79 !!!cp ('t64');
3221 wakaba 1.1 $bookmark_prev_el = $node->[0];
3222     }
3223    
3224     ## Step 5
3225     if ($node->[0]->has_child_nodes ()) {
3226 wakaba 1.79 !!!cp ('t65');
3227 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
3228     $active_formatting_elements->[$node_i_in_active] = $clone;
3229 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3230 wakaba 1.1 $node = $clone;
3231     }
3232    
3233     ## Step 6
3234     $node->[0]->append_child ($last_node->[0]);
3235    
3236     ## Step 7
3237     $last_node = $node;
3238    
3239     ## Step 8
3240     redo S7;
3241     } # S7
3242    
3243     ## Step 8
3244 wakaba 1.102 if ({
3245     table => 1, tbody => 1, tfoot => 1, thead => 1, tr => 1,
3246     }->{$common_ancestor_node->[1]}) {
3247     my $foster_parent_element;
3248     my $next_sibling;
3249     OE: for (reverse 0..$#{$self->{open_elements}}) {
3250     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3251     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3252     if (defined $parent and $parent->node_type == 1) {
3253     !!!cp ('t65.1');
3254     $foster_parent_element = $parent;
3255     $next_sibling = $self->{open_elements}->[$_]->[0];
3256     } else {
3257     !!!cp ('t65.2');
3258     $foster_parent_element
3259     = $self->{open_elements}->[$_ - 1]->[0];
3260     }
3261     last OE;
3262     }
3263     } # OE
3264     $foster_parent_element = $self->{open_elements}->[0]->[0]
3265     unless defined $foster_parent_element;
3266     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
3267     $open_tables->[-1]->[1] = 1; # tainted
3268     } else {
3269     !!!cp ('t65.3');
3270     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3271     }
3272 wakaba 1.1
3273     ## Step 9
3274     my $clone = [$formatting_element->[0]->clone_node (0),
3275     $formatting_element->[1]];
3276    
3277     ## Step 10
3278     my @cn = @{$furthest_block->[0]->child_nodes};
3279     $clone->[0]->append_child ($_) for @cn;
3280    
3281     ## Step 11
3282     $furthest_block->[0]->append_child ($clone->[0]);
3283    
3284     ## Step 12
3285     my $i;
3286     AFE: for (reverse 0..$#$active_formatting_elements) {
3287     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3288 wakaba 1.79 !!!cp ('t66');
3289 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
3290     $i-- and last AFE if defined $i;
3291     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3292 wakaba 1.79 !!!cp ('t67');
3293 wakaba 1.1 $i = $_;
3294     }
3295     } # AFE
3296     splice @$active_formatting_elements, $i + 1, 0, $clone;
3297    
3298     ## Step 13
3299     undef $i;
3300 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3301     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3302 wakaba 1.79 !!!cp ('t68');
3303 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
3304 wakaba 1.1 $i-- and last OE if defined $i;
3305 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3306 wakaba 1.79 !!!cp ('t69');
3307 wakaba 1.1 $i = $_;
3308     }
3309     } # OE
3310 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3311 wakaba 1.1
3312     ## Step 14
3313     redo FET;
3314     } # FET
3315     }; # $formatting_end_tag
3316    
3317 wakaba 1.96 $insert = my $insert_to_current = sub {
3318 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3319 wakaba 1.1 }; # $insert_to_current
3320    
3321     my $insert_to_foster = sub {
3322 wakaba 1.95 my $child = shift;
3323     if ({
3324     table => 1, tbody => 1, tfoot => 1, thead => 1, tr => 1,
3325     }->{$self->{open_elements}->[-1]->[1]}) {
3326     # MUST
3327     my $foster_parent_element;
3328     my $next_sibling;
3329 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3330     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3331     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3332 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3333 wakaba 1.79 !!!cp ('t70');
3334 wakaba 1.1 $foster_parent_element = $parent;
3335 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3336 wakaba 1.1 } else {
3337 wakaba 1.79 !!!cp ('t71');
3338 wakaba 1.1 $foster_parent_element
3339 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3340 wakaba 1.1 }
3341     last OE;
3342     }
3343     } # OE
3344 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3345 wakaba 1.1 unless defined $foster_parent_element;
3346     $foster_parent_element->insert_before
3347     ($child, $next_sibling);
3348 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
3349     } else {
3350     !!!cp ('t72');
3351     $self->{open_elements}->[-1]->[0]->append_child ($child);
3352     }
3353 wakaba 1.1 }; # $insert_to_foster
3354    
3355 wakaba 1.52 B: {
3356 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3357 wakaba 1.79 !!!cp ('t73');
3358 wakaba 1.113 !!!parse-error (type => 'DOCTYPE in the middle', token => $token);
3359 wakaba 1.52 ## Ignore the token
3360     ## Stay in the phase
3361     !!!next-token;
3362     redo B;
3363 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
3364 wakaba 1.52 $token->{tag_name} eq 'html') {
3365 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
3366 wakaba 1.79 !!!cp ('t79');
3367 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3368 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
3369     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
3370 wakaba 1.79 !!!cp ('t80');
3371 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3372 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
3373 wakaba 1.79 } else {
3374     !!!cp ('t81');
3375 wakaba 1.52 }
3376    
3377 wakaba 1.84 !!!cp ('t82');
3378 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
3379 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
3380     for my $attr_name (keys %{$token->{attributes}}) {
3381     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3382 wakaba 1.79 !!!cp ('t84');
3383 wakaba 1.52 $top_el->set_attribute_ns
3384     (undef, [undef, $attr_name],
3385     $token->{attributes}->{$attr_name}->{value});
3386     }
3387     }
3388     !!!next-token;
3389     redo B;
3390 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3391 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
3392 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
3393 wakaba 1.79 !!!cp ('t85');
3394 wakaba 1.52 $self->{document}->append_child ($comment);
3395 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
3396 wakaba 1.79 !!!cp ('t86');
3397 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
3398     } else {
3399 wakaba 1.79 !!!cp ('t87');
3400 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3401     }
3402     !!!next-token;
3403     redo B;
3404 wakaba 1.56 } elsif ($self->{insertion_mode} & HEAD_IMS) {
3405 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3406 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3407 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3408     !!!cp ('t88.2');
3409     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3410     } else {
3411     !!!cp ('t88.1');
3412     ## Ignore the token.
3413     !!!next-token;
3414     redo B;
3415     }
3416 wakaba 1.52 unless (length $token->{data}) {
3417 wakaba 1.79 !!!cp ('t88');
3418 wakaba 1.52 !!!next-token;
3419     redo B;
3420 wakaba 1.1 }
3421     }
3422 wakaba 1.52
3423 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3424 wakaba 1.79 !!!cp ('t89');
3425 wakaba 1.52 ## As if <head>
3426 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3427 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3428     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3429    
3430     ## Reprocess in the "in head" insertion mode...
3431     pop @{$self->{open_elements}};
3432    
3433     ## Reprocess in the "after head" insertion mode...
3434 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3435 wakaba 1.79 !!!cp ('t90');
3436 wakaba 1.52 ## As if </noscript>
3437     pop @{$self->{open_elements}};
3438 wakaba 1.113 !!!parse-error (type => 'in noscript:#character', token => $token);
3439 wakaba 1.1
3440 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
3441     ## As if </head>
3442     pop @{$self->{open_elements}};
3443    
3444     ## Reprocess in the "after head" insertion mode...
3445 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3446 wakaba 1.79 !!!cp ('t91');
3447 wakaba 1.52 pop @{$self->{open_elements}};
3448    
3449     ## Reprocess in the "after head" insertion mode...
3450 wakaba 1.79 } else {
3451     !!!cp ('t92');
3452 wakaba 1.1 }
3453 wakaba 1.52
3454     ## "after head" insertion mode
3455     ## As if <body>
3456 wakaba 1.116 !!!insert-element ('body',, $token);
3457 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3458 wakaba 1.52 ## reprocess
3459     redo B;
3460 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
3461 wakaba 1.52 if ($token->{tag_name} eq 'head') {
3462 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3463 wakaba 1.79 !!!cp ('t93');
3464 wakaba 1.116 !!!create-element ($self->{head_element}, $token->{tag_name}, $token->{attributes}, $token);
3465 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3466     push @{$self->{open_elements}}, [$self->{head_element}, $token->{tag_name}];
3467 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3468 wakaba 1.52 !!!next-token;
3469     redo B;
3470 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3471 wakaba 1.79 !!!cp ('t94');
3472 wakaba 1.54 #
3473     } else {
3474 wakaba 1.79 !!!cp ('t95');
3475 wakaba 1.113 !!!parse-error (type => 'in head:head', token => $token); # or in head noscript
3476 wakaba 1.52 ## Ignore the token
3477     !!!next-token;
3478     redo B;
3479     }
3480 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3481 wakaba 1.79 !!!cp ('t96');
3482 wakaba 1.52 ## As if <head>
3483 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3484 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3485     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3486    
3487 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3488 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
3489 wakaba 1.79 } else {
3490     !!!cp ('t97');
3491 wakaba 1.1 }
3492 wakaba 1.52
3493 wakaba 1.49 if ($token->{tag_name} eq 'base') {
3494 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3495 wakaba 1.79 !!!cp ('t98');
3496 wakaba 1.49 ## As if </noscript>
3497     pop @{$self->{open_elements}};
3498 wakaba 1.113 !!!parse-error (type => 'in noscript:base', token => $token);
3499 wakaba 1.49
3500 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3501 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3502 wakaba 1.79 } else {
3503     !!!cp ('t99');
3504 wakaba 1.49 }
3505    
3506     ## NOTE: There is a "as if in head" code clone.
3507 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3508 wakaba 1.79 !!!cp ('t100');
3509 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3510 wakaba 1.49 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3511 wakaba 1.79 } else {
3512     !!!cp ('t101');
3513 wakaba 1.49 }
3514 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3515 wakaba 1.49 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3516 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3517 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3518 wakaba 1.49 !!!next-token;
3519     redo B;
3520     } elsif ($token->{tag_name} eq 'link') {
3521 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3522 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3523 wakaba 1.79 !!!cp ('t102');
3524 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3525 wakaba 1.25 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3526 wakaba 1.79 } else {
3527     !!!cp ('t103');
3528 wakaba 1.25 }
3529 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3530 wakaba 1.25 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3531 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3532 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3533 wakaba 1.1 !!!next-token;
3534 wakaba 1.25 redo B;
3535 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
3536     ## NOTE: There is a "as if in head" code clone.
3537 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3538 wakaba 1.79 !!!cp ('t104');
3539 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3540 wakaba 1.34 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3541 wakaba 1.79 } else {
3542     !!!cp ('t105');
3543 wakaba 1.34 }
3544 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3545 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3546 wakaba 1.34
3547     unless ($self->{confident}) {
3548     if ($token->{attributes}->{charset}) { ## TODO: And if supported
3549 wakaba 1.79 !!!cp ('t106');
3550 wakaba 1.63 $self->{change_encoding}
3551 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
3552     $token);
3553 wakaba 1.66
3554     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
3555     ->set_user_data (manakai_has_reference =>
3556     $token->{attributes}->{charset}
3557     ->{has_reference});
3558 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
3559 wakaba 1.35 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
3560 wakaba 1.63 if ($token->{attributes}->{content}->{value}
3561 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
3562     [\x09-\x0D\x20]*=
3563 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
3564     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
3565 wakaba 1.79 !!!cp ('t107');
3566 wakaba 1.63 $self->{change_encoding}
3567 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
3568     $token);
3569 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
3570     ->set_user_data (manakai_has_reference =>
3571     $token->{attributes}->{content}
3572     ->{has_reference});
3573 wakaba 1.79 } else {
3574     !!!cp ('t108');
3575 wakaba 1.63 }
3576 wakaba 1.34 }
3577 wakaba 1.66 } else {
3578     if ($token->{attributes}->{charset}) {
3579 wakaba 1.79 !!!cp ('t109');
3580 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
3581     ->set_user_data (manakai_has_reference =>
3582     $token->{attributes}->{charset}
3583     ->{has_reference});
3584     }
3585 wakaba 1.68 if ($token->{attributes}->{content}) {
3586 wakaba 1.79 !!!cp ('t110');
3587 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
3588     ->set_user_data (manakai_has_reference =>
3589     $token->{attributes}->{content}
3590     ->{has_reference});
3591     }
3592 wakaba 1.34 }
3593    
3594 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3595 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3596 wakaba 1.34 !!!next-token;
3597     redo B;
3598 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
3599 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3600 wakaba 1.79 !!!cp ('t111');
3601 wakaba 1.49 ## As if </noscript>
3602     pop @{$self->{open_elements}};
3603 wakaba 1.113 !!!parse-error (type => 'in noscript:title', token => $token);
3604 wakaba 1.49
3605 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3606 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3607 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3608 wakaba 1.79 !!!cp ('t112');
3609 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3610 wakaba 1.25 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3611 wakaba 1.79 } else {
3612     !!!cp ('t113');
3613 wakaba 1.25 }
3614 wakaba 1.49
3615     ## NOTE: There is a "as if in head" code clone.
3616 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
3617     : $self->{open_elements}->[-1]->[0];
3618 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
3619 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3620 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3621 wakaba 1.25 redo B;
3622     } elsif ($token->{tag_name} eq 'style') {
3623     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
3624 wakaba 1.54 ## insertion mode IN_HEAD_IM)
3625 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3626 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3627 wakaba 1.79 !!!cp ('t114');
3628 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3629 wakaba 1.25 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3630 wakaba 1.79 } else {
3631     !!!cp ('t115');
3632 wakaba 1.25 }
3633 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
3634 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3635 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3636 wakaba 1.25 redo B;
3637     } elsif ($token->{tag_name} eq 'noscript') {
3638 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
3639 wakaba 1.79 !!!cp ('t116');
3640 wakaba 1.25 ## NOTE: and scripting is disalbed
3641 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3642 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
3643 wakaba 1.1 !!!next-token;
3644 wakaba 1.25 redo B;
3645 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3646 wakaba 1.79 !!!cp ('t117');
3647 wakaba 1.113 !!!parse-error (type => 'in noscript:noscript', token => $token);
3648 wakaba 1.1 ## Ignore the token
3649 wakaba 1.41 !!!next-token;
3650 wakaba 1.25 redo B;
3651 wakaba 1.1 } else {
3652 wakaba 1.79 !!!cp ('t118');
3653 wakaba 1.25 #
3654 wakaba 1.1 }
3655 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
3656 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3657 wakaba 1.79 !!!cp ('t119');
3658 wakaba 1.49 ## As if </noscript>
3659     pop @{$self->{open_elements}};
3660 wakaba 1.113 !!!parse-error (type => 'in noscript:script', token => $token);
3661 wakaba 1.49
3662 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3663 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3664 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3665 wakaba 1.79 !!!cp ('t120');
3666 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3667 wakaba 1.25 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3668 wakaba 1.79 } else {
3669     !!!cp ('t121');
3670 wakaba 1.25 }
3671 wakaba 1.49
3672 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3673 wakaba 1.100 $script_start_tag->();
3674     pop @{$self->{open_elements}} # <head>
3675 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3676 wakaba 1.1 redo B;
3677 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
3678 wakaba 1.25 $token->{tag_name} eq 'frameset') {
3679 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3680 wakaba 1.79 !!!cp ('t122');
3681 wakaba 1.49 ## As if </noscript>
3682     pop @{$self->{open_elements}};
3683 wakaba 1.113 !!!parse-error (type => 'in noscript:'.$token->{tag_name}, token => $token);
3684 wakaba 1.49
3685     ## Reprocess in the "in head" insertion mode...
3686     ## As if </head>
3687     pop @{$self->{open_elements}};
3688    
3689     ## Reprocess in the "after head" insertion mode...
3690 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3691 wakaba 1.79 !!!cp ('t124');
3692 wakaba 1.49 pop @{$self->{open_elements}};
3693    
3694     ## Reprocess in the "after head" insertion mode...
3695 wakaba 1.79 } else {
3696     !!!cp ('t125');
3697 wakaba 1.49 }
3698    
3699     ## "after head" insertion mode
3700 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3701 wakaba 1.54 if ($token->{tag_name} eq 'body') {
3702 wakaba 1.79 !!!cp ('t126');
3703 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3704     } elsif ($token->{tag_name} eq 'frameset') {
3705 wakaba 1.79 !!!cp ('t127');
3706 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
3707     } else {
3708     die "$0: tag name: $self->{tag_name}";
3709     }
3710 wakaba 1.1 !!!next-token;
3711     redo B;
3712     } else {
3713 wakaba 1.79 !!!cp ('t128');
3714 wakaba 1.1 #
3715     }
3716 wakaba 1.49
3717 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3718 wakaba 1.79 !!!cp ('t129');
3719 wakaba 1.49 ## As if </noscript>
3720     pop @{$self->{open_elements}};
3721 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
3722 wakaba 1.49
3723     ## Reprocess in the "in head" insertion mode...
3724     ## As if </head>
3725 wakaba 1.25 pop @{$self->{open_elements}};
3726 wakaba 1.49
3727     ## Reprocess in the "after head" insertion mode...
3728 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3729 wakaba 1.79 !!!cp ('t130');
3730 wakaba 1.49 ## As if </head>
3731 wakaba 1.25 pop @{$self->{open_elements}};
3732 wakaba 1.49
3733     ## Reprocess in the "after head" insertion mode...
3734 wakaba 1.79 } else {
3735     !!!cp ('t131');
3736 wakaba 1.49 }
3737    
3738     ## "after head" insertion mode
3739     ## As if <body>
3740 wakaba 1.116 !!!insert-element ('body',, $token);
3741 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3742 wakaba 1.49 ## reprocess
3743     redo B;
3744 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
3745 wakaba 1.49 if ($token->{tag_name} eq 'head') {
3746 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3747 wakaba 1.79 !!!cp ('t132');
3748 wakaba 1.50 ## As if <head>
3749 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3750 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3751     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3752    
3753     ## Reprocess in the "in head" insertion mode...
3754     pop @{$self->{open_elements}};
3755 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3756 wakaba 1.50 !!!next-token;
3757     redo B;
3758 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3759 wakaba 1.79 !!!cp ('t133');
3760 wakaba 1.49 ## As if </noscript>
3761     pop @{$self->{open_elements}};
3762 wakaba 1.113 !!!parse-error (type => 'in noscript:/head', token => $token);
3763 wakaba 1.49
3764     ## Reprocess in the "in head" insertion mode...
3765 wakaba 1.50 pop @{$self->{open_elements}};
3766 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3767 wakaba 1.50 !!!next-token;
3768     redo B;
3769 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3770 wakaba 1.79 !!!cp ('t134');
3771 wakaba 1.49 pop @{$self->{open_elements}};
3772 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3773 wakaba 1.49 !!!next-token;
3774     redo B;
3775     } else {
3776 wakaba 1.79 !!!cp ('t135');
3777 wakaba 1.49 #
3778     }
3779     } elsif ($token->{tag_name} eq 'noscript') {
3780 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3781 wakaba 1.79 !!!cp ('t136');
3782 wakaba 1.49 pop @{$self->{open_elements}};
3783 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3784 wakaba 1.49 !!!next-token;
3785     redo B;
3786 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3787 wakaba 1.79 !!!cp ('t137');
3788 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:noscript', token => $token);
3789 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
3790     !!!next-token;
3791     redo B;
3792 wakaba 1.49 } else {
3793 wakaba 1.79 !!!cp ('t138');
3794 wakaba 1.49 #
3795     }
3796     } elsif ({
3797 wakaba 1.31 body => 1, html => 1,
3798     }->{$token->{tag_name}}) {
3799 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3800 wakaba 1.79 !!!cp ('t139');
3801 wakaba 1.50 ## As if <head>
3802 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3803 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3804     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3805    
3806 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3807 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
3808 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3809 wakaba 1.79 !!!cp ('t140');
3810 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
3811 wakaba 1.49 ## Ignore the token
3812     !!!next-token;
3813     redo B;
3814 wakaba 1.79 } else {
3815     !!!cp ('t141');
3816 wakaba 1.49 }
3817 wakaba 1.50
3818     #
3819 wakaba 1.49 } elsif ({
3820 wakaba 1.31 p => 1, br => 1,
3821     }->{$token->{tag_name}}) {
3822 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3823 wakaba 1.79 !!!cp ('t142');
3824 wakaba 1.50 ## As if <head>
3825 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3826 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3827     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3828    
3829 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3830 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
3831 wakaba 1.79 } else {
3832     !!!cp ('t143');
3833 wakaba 1.50 }
3834    
3835 wakaba 1.1 #
3836 wakaba 1.25 } else {
3837 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3838 wakaba 1.79 !!!cp ('t144');
3839 wakaba 1.54 #
3840     } else {
3841 wakaba 1.79 !!!cp ('t145');
3842 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
3843 wakaba 1.49 ## Ignore the token
3844     !!!next-token;
3845     redo B;
3846     }
3847     }
3848    
3849 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3850 wakaba 1.79 !!!cp ('t146');
3851 wakaba 1.49 ## As if </noscript>
3852     pop @{$self->{open_elements}};
3853 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
3854 wakaba 1.49
3855     ## Reprocess in the "in head" insertion mode...
3856     ## As if </head>
3857     pop @{$self->{open_elements}};
3858    
3859     ## Reprocess in the "after head" insertion mode...
3860 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3861 wakaba 1.79 !!!cp ('t147');
3862 wakaba 1.49 ## As if </head>
3863     pop @{$self->{open_elements}};
3864    
3865     ## Reprocess in the "after head" insertion mode...
3866 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3867 wakaba 1.82 ## ISSUE: This case cannot be reached?
3868 wakaba 1.79 !!!cp ('t148');
3869 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
3870 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
3871     !!!next-token;
3872     redo B;
3873 wakaba 1.79 } else {
3874     !!!cp ('t149');
3875 wakaba 1.1 }
3876    
3877 wakaba 1.49 ## "after head" insertion mode
3878     ## As if <body>
3879 wakaba 1.116 !!!insert-element ('body',, $token);
3880 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
3881 wakaba 1.52 ## reprocess
3882     redo B;
3883 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3884     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3885     !!!cp ('t149.1');
3886    
3887     ## NOTE: As if <head>
3888 wakaba 1.116 !!!create-element ($self->{head_element}, 'head',, $token);
3889 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
3890     ($self->{head_element});
3891     #push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3892     #$self->{insertion_mode} = IN_HEAD_IM;
3893     ## NOTE: Reprocess.
3894    
3895     ## NOTE: As if </head>
3896     #pop @{$self->{open_elements}};
3897     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
3898     ## NOTE: Reprocess.
3899    
3900     #
3901     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3902     !!!cp ('t149.2');
3903    
3904     ## NOTE: As if </head>
3905     pop @{$self->{open_elements}};
3906     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
3907     ## NOTE: Reprocess.
3908    
3909     #
3910     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3911     !!!cp ('t149.3');
3912    
3913 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
3914 wakaba 1.104
3915     ## As if </noscript>
3916     pop @{$self->{open_elements}};
3917     #$self->{insertion_mode} = IN_HEAD_IM;
3918     ## NOTE: Reprocess.
3919    
3920     ## NOTE: As if </head>
3921     pop @{$self->{open_elements}};
3922     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
3923     ## NOTE: Reprocess.
3924    
3925     #
3926     } else {
3927     !!!cp ('t149.4');
3928     #
3929     }
3930    
3931     ## NOTE: As if <body>
3932 wakaba 1.116 !!!insert-element ('body',, $token);
3933 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
3934     ## NOTE: Reprocess.
3935     redo B;
3936     } else {
3937     die "$0: $token->{type}: Unknown token type";
3938     }
3939 wakaba 1.52
3940     ## ISSUE: An issue in the spec.
3941 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
3942 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3943 wakaba 1.79 !!!cp ('t150');
3944 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
3945     $reconstruct_active_formatting_elements->($insert_to_current);
3946    
3947     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3948    
3949     !!!next-token;
3950     redo B;
3951 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
3952 wakaba 1.52 if ({
3953     caption => 1, col => 1, colgroup => 1, tbody => 1,
3954     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3955     }->{$token->{tag_name}}) {
3956 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
3957 wakaba 1.52 ## have an element in table scope
3958 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
3959 wakaba 1.52 my $node = $self->{open_elements}->[$_];
3960     if ($node->[1] eq 'td' or $node->[1] eq 'th') {
3961 wakaba 1.79 !!!cp ('t151');
3962 wakaba 1.108
3963     ## Close the cell
3964     !!!back-token; # <?>
3965 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $node->[1],
3966     line => $token->{line},
3967     column => $token->{column}};
3968 wakaba 1.108 redo B;
3969 wakaba 1.52 } elsif ({
3970     table => 1, html => 1,
3971     }->{$node->[1]}) {
3972 wakaba 1.79 !!!cp ('t152');
3973 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
3974     last;
3975 wakaba 1.52 }
3976 wakaba 1.108 }
3977    
3978     !!!cp ('t153');
3979     !!!parse-error (type => 'start tag not allowed',
3980 wakaba 1.113 value => $token->{tag_name}, token => $token);
3981 wakaba 1.108 ## Ignore the token
3982     !!!next-token;
3983 wakaba 1.52 redo B;
3984 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
3985 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
3986 wakaba 1.52
3987 wakaba 1.108 ## NOTE: As if </caption>.
3988 wakaba 1.52 ## have a table element in table scope
3989     my $i;
3990 wakaba 1.108 INSCOPE: {
3991     for (reverse 0..$#{$self->{open_elements}}) {
3992     my $node = $self->{open_elements}->[$_];
3993     if ($node->[1] eq 'caption') {
3994     !!!cp ('t155');
3995     $i = $_;
3996     last INSCOPE;
3997     } elsif ({
3998     table => 1, html => 1,
3999     }->{$node->[1]}) {
4000     !!!cp ('t156');
4001     last;
4002     }
4003 wakaba 1.52 }
4004 wakaba 1.108
4005     !!!cp ('t157');
4006     !!!parse-error (type => 'start tag not allowed',
4007 wakaba 1.113 value => $token->{tag_name}, token => $token);
4008 wakaba 1.108 ## Ignore the token
4009     !!!next-token;
4010     redo B;
4011 wakaba 1.52 } # INSCOPE
4012    
4013     ## generate implied end tags
4014 wakaba 1.86 while ({
4015     dd => 1, dt => 1, li => 1, p => 1,
4016     }->{$self->{open_elements}->[-1]->[1]}) {
4017 wakaba 1.79 !!!cp ('t158');
4018 wakaba 1.86 pop @{$self->{open_elements}};
4019 wakaba 1.52 }
4020    
4021     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4022 wakaba 1.79 !!!cp ('t159');
4023 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
4024 wakaba 1.79 } else {
4025     !!!cp ('t160');
4026 wakaba 1.52 }
4027    
4028     splice @{$self->{open_elements}}, $i;
4029    
4030     $clear_up_to_marker->();
4031    
4032 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4033 wakaba 1.52
4034     ## reprocess
4035     redo B;
4036     } else {
4037 wakaba 1.79 !!!cp ('t161');
4038 wakaba 1.52 #
4039     }
4040     } else {
4041 wakaba 1.79 !!!cp ('t162');
4042 wakaba 1.52 #
4043     }
4044 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4045 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4046 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4047 wakaba 1.43 ## have an element in table scope
4048 wakaba 1.52 my $i;
4049 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4050     my $node = $self->{open_elements}->[$_];
4051 wakaba 1.52 if ($node->[1] eq $token->{tag_name}) {
4052 wakaba 1.79 !!!cp ('t163');
4053 wakaba 1.52 $i = $_;
4054 wakaba 1.43 last INSCOPE;
4055     } elsif ({
4056     table => 1, html => 1,
4057     }->{$node->[1]}) {
4058 wakaba 1.79 !!!cp ('t164');
4059 wakaba 1.43 last INSCOPE;
4060     }
4061     } # INSCOPE
4062 wakaba 1.52 unless (defined $i) {
4063 wakaba 1.79 !!!cp ('t165');
4064 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4065 wakaba 1.43 ## Ignore the token
4066     !!!next-token;
4067     redo B;
4068     }
4069    
4070 wakaba 1.52 ## generate implied end tags
4071 wakaba 1.86 while ({
4072     dd => 1, dt => 1, li => 1, p => 1,
4073     }->{$self->{open_elements}->[-1]->[1]}) {
4074 wakaba 1.79 !!!cp ('t166');
4075 wakaba 1.86 pop @{$self->{open_elements}};
4076 wakaba 1.52 }
4077 wakaba 1.86
4078 wakaba 1.52 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4079 wakaba 1.79 !!!cp ('t167');
4080 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
4081 wakaba 1.79 } else {
4082     !!!cp ('t168');
4083 wakaba 1.52 }
4084    
4085     splice @{$self->{open_elements}}, $i;
4086    
4087     $clear_up_to_marker->();
4088    
4089 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4090 wakaba 1.52
4091     !!!next-token;
4092 wakaba 1.43 redo B;
4093 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4094 wakaba 1.79 !!!cp ('t169');
4095 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4096 wakaba 1.52 ## Ignore the token
4097     !!!next-token;
4098     redo B;
4099     } else {
4100 wakaba 1.79 !!!cp ('t170');
4101 wakaba 1.52 #
4102     }
4103     } elsif ($token->{tag_name} eq 'caption') {
4104 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
4105 wakaba 1.43 ## have a table element in table scope
4106     my $i;
4107 wakaba 1.108 INSCOPE: {
4108     for (reverse 0..$#{$self->{open_elements}}) {
4109     my $node = $self->{open_elements}->[$_];
4110     if ($node->[1] eq $token->{tag_name}) {
4111     !!!cp ('t171');
4112     $i = $_;
4113     last INSCOPE;
4114     } elsif ({
4115     table => 1, html => 1,
4116     }->{$node->[1]}) {
4117     !!!cp ('t172');
4118     last;
4119     }
4120 wakaba 1.43 }
4121 wakaba 1.108
4122     !!!cp ('t173');
4123     !!!parse-error (type => 'unmatched end tag',
4124 wakaba 1.113 value => $token->{tag_name}, token => $token);
4125 wakaba 1.108 ## Ignore the token
4126     !!!next-token;
4127     redo B;
4128 wakaba 1.43 } # INSCOPE
4129    
4130     ## generate implied end tags
4131 wakaba 1.86 while ({
4132     dd => 1, dt => 1, li => 1, p => 1,
4133     }->{$self->{open_elements}->[-1]->[1]}) {
4134 wakaba 1.79 !!!cp ('t174');
4135 wakaba 1.86 pop @{$self->{open_elements}};
4136 wakaba 1.43 }
4137 wakaba 1.52
4138     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4139 wakaba 1.79 !!!cp ('t175');
4140 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
4141 wakaba 1.79 } else {
4142     !!!cp ('t176');
4143 wakaba 1.52 }
4144    
4145     splice @{$self->{open_elements}}, $i;
4146    
4147     $clear_up_to_marker->();
4148    
4149 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4150 wakaba 1.52
4151     !!!next-token;
4152     redo B;
4153 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
4154 wakaba 1.79 !!!cp ('t177');
4155 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4156 wakaba 1.52 ## Ignore the token
4157     !!!next-token;
4158     redo B;
4159     } else {
4160 wakaba 1.79 !!!cp ('t178');
4161 wakaba 1.52 #
4162     }
4163     } elsif ({
4164     table => 1, tbody => 1, tfoot => 1,
4165     thead => 1, tr => 1,
4166     }->{$token->{tag_name}} and
4167 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
4168 wakaba 1.52 ## have an element in table scope
4169     my $i;
4170     my $tn;
4171 wakaba 1.108 INSCOPE: {
4172     for (reverse 0..$#{$self->{open_elements}}) {
4173     my $node = $self->{open_elements}->[$_];
4174     if ($node->[1] eq $token->{tag_name}) {
4175     !!!cp ('t179');
4176     $i = $_;
4177    
4178     ## Close the cell
4179     !!!back-token; # </?>
4180 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
4181     line => $token->{line},
4182     column => $token->{column}};
4183 wakaba 1.108 redo B;
4184     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4185     !!!cp ('t180');
4186     $tn = $node->[1];
4187     ## NOTE: There is exactly one |td| or |th| element
4188     ## in scope in the stack of open elements by definition.
4189     } elsif ({
4190     table => 1, html => 1,
4191     }->{$node->[1]}) {
4192     ## ISSUE: Can this be reached?
4193     !!!cp ('t181');
4194     last;
4195     }
4196 wakaba 1.52 }
4197 wakaba 1.108
4198 wakaba 1.79 !!!cp ('t182');
4199 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
4200 wakaba 1.113 value => $token->{tag_name}, token => $token);
4201 wakaba 1.52 ## Ignore the token
4202     !!!next-token;
4203     redo B;
4204 wakaba 1.108 } # INSCOPE
4205 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
4206 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4207 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4208 wakaba 1.52
4209     ## As if </caption>
4210     ## have a table element in table scope
4211     my $i;
4212     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4213     my $node = $self->{open_elements}->[$_];
4214     if ($node->[1] eq 'caption') {
4215 wakaba 1.79 !!!cp ('t184');
4216 wakaba 1.52 $i = $_;
4217     last INSCOPE;
4218     } elsif ({
4219     table => 1, html => 1,
4220     }->{$node->[1]}) {
4221 wakaba 1.79 !!!cp ('t185');
4222 wakaba 1.52 last INSCOPE;
4223     }
4224     } # INSCOPE
4225     unless (defined $i) {
4226 wakaba 1.79 !!!cp ('t186');
4227 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:caption', token => $token);
4228 wakaba 1.52 ## Ignore the token
4229     !!!next-token;
4230     redo B;
4231     }
4232    
4233     ## generate implied end tags
4234 wakaba 1.86 while ({
4235     dd => 1, dt => 1, li => 1, p => 1,
4236     }->{$self->{open_elements}->[-1]->[1]}) {
4237 wakaba 1.79 !!!cp ('t187');
4238 wakaba 1.86 pop @{$self->{open_elements}};
4239 wakaba 1.52 }
4240    
4241     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4242 wakaba 1.79 !!!cp ('t188');
4243 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
4244 wakaba 1.79 } else {
4245     !!!cp ('t189');
4246 wakaba 1.52 }
4247    
4248     splice @{$self->{open_elements}}, $i;
4249    
4250     $clear_up_to_marker->();
4251    
4252 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4253 wakaba 1.52
4254     ## reprocess
4255     redo B;
4256     } elsif ({
4257     body => 1, col => 1, colgroup => 1, html => 1,
4258     }->{$token->{tag_name}}) {
4259 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
4260 wakaba 1.79 !!!cp ('t190');
4261 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4262 wakaba 1.52 ## Ignore the token
4263     !!!next-token;
4264     redo B;
4265     } else {
4266 wakaba 1.79 !!!cp ('t191');
4267 wakaba 1.52 #
4268     }
4269     } elsif ({
4270     tbody => 1, tfoot => 1,
4271     thead => 1, tr => 1,
4272     }->{$token->{tag_name}} and
4273 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4274 wakaba 1.79 !!!cp ('t192');
4275 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4276 wakaba 1.52 ## Ignore the token
4277     !!!next-token;
4278     redo B;
4279     } else {
4280 wakaba 1.79 !!!cp ('t193');
4281 wakaba 1.52 #
4282     }
4283 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4284     for my $entry (@{$self->{open_elements}}) {
4285     if (not {
4286     dd => 1, dt => 1, li => 1, p => 1, tbody => 1, td => 1, tfoot => 1,
4287     th => 1, thead => 1, tr => 1, body => 1, html => 1,
4288     }->{$entry->[1]}) {
4289     !!!cp ('t75');
4290 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4291 wakaba 1.104 last;
4292     }
4293     }
4294    
4295     ## Stop parsing.
4296     last B;
4297 wakaba 1.52 } else {
4298     die "$0: $token->{type}: Unknown token type";
4299     }
4300    
4301     $insert = $insert_to_current;
4302     #
4303 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
4304 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
4305 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
4306     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4307     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4308 wakaba 1.52
4309 wakaba 1.95 unless (length $token->{data}) {
4310     !!!cp ('t194');
4311     !!!next-token;
4312     redo B;
4313     } else {
4314     !!!cp ('t195');
4315     }
4316     }
4317 wakaba 1.52
4318 wakaba 1.113 !!!parse-error (type => 'in table:#character', token => $token);
4319 wakaba 1.52
4320     ## As if in body, but insert into foster parent element
4321     ## ISSUE: Spec says that "whenever a node would be inserted
4322     ## into the current node" while characters might not be
4323     ## result in a new Text node.
4324     $reconstruct_active_formatting_elements->($insert_to_foster);
4325    
4326     if ({
4327     table => 1, tbody => 1, tfoot => 1,
4328     thead => 1, tr => 1,
4329     }->{$self->{open_elements}->[-1]->[1]}) {
4330     # MUST
4331     my $foster_parent_element;
4332     my $next_sibling;
4333     my $prev_sibling;
4334     OE: for (reverse 0..$#{$self->{open_elements}}) {
4335     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4336     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4337     if (defined $parent and $parent->node_type == 1) {
4338 wakaba 1.79 !!!cp ('t196');
4339 wakaba 1.52 $foster_parent_element = $parent;
4340     $next_sibling = $self->{open_elements}->[$_]->[0];
4341     $prev_sibling = $next_sibling->previous_sibling;
4342     } else {
4343 wakaba 1.79 !!!cp ('t197');
4344 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4345     $prev_sibling = $foster_parent_element->last_child;
4346     }
4347     last OE;
4348     }
4349     } # OE
4350     $foster_parent_element = $self->{open_elements}->[0]->[0] and
4351     $prev_sibling = $foster_parent_element->last_child
4352     unless defined $foster_parent_element;
4353     if (defined $prev_sibling and
4354     $prev_sibling->node_type == 3) {
4355 wakaba 1.79 !!!cp ('t198');
4356 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
4357     } else {
4358 wakaba 1.79 !!!cp ('t199');
4359 wakaba 1.52 $foster_parent_element->insert_before
4360     ($self->{document}->create_text_node ($token->{data}),
4361     $next_sibling);
4362     }
4363 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
4364     } else {
4365     !!!cp ('t200');
4366     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4367     }
4368 wakaba 1.52
4369 wakaba 1.95 !!!next-token;
4370     redo B;
4371 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
4372 wakaba 1.52 if ({
4373 wakaba 1.54 tr => ($self->{insertion_mode} != IN_ROW_IM),
4374 wakaba 1.52 th => 1, td => 1,
4375     }->{$token->{tag_name}}) {
4376 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_IM) {
4377 wakaba 1.52 ## Clear back to table context
4378     while ($self->{open_elements}->[-1]->[1] ne 'table' and
4379     $self->{open_elements}->[-1]->[1] ne 'html') {
4380 wakaba 1.79 !!!cp ('t201');
4381 wakaba 1.52 pop @{$self->{open_elements}};
4382 wakaba 1.43 }
4383    
4384 wakaba 1.116 !!!insert-element ('tbody',, $token);
4385 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4386 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4387     }
4388    
4389 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4390 wakaba 1.52 unless ($token->{tag_name} eq 'tr') {
4391 wakaba 1.79 !!!cp ('t202');
4392 wakaba 1.113 !!!parse-error (type => 'missing start tag:tr', token => $token);
4393 wakaba 1.52 }
4394 wakaba 1.43
4395 wakaba 1.52 ## Clear back to table body context
4396     while (not {
4397     tbody => 1, tfoot => 1, thead => 1, html => 1,
4398     }->{$self->{open_elements}->[-1]->[1]}) {
4399 wakaba 1.79 !!!cp ('t203');
4400 wakaba 1.83 ## ISSUE: Can this case be reached?
4401 wakaba 1.52 pop @{$self->{open_elements}};
4402     }
4403 wakaba 1.43
4404 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4405 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4406 wakaba 1.79 !!!cp ('t204');
4407 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4408 wakaba 1.52 !!!next-token;
4409     redo B;
4410     } else {
4411 wakaba 1.79 !!!cp ('t205');
4412 wakaba 1.116 !!!insert-element ('tr',, $token);
4413 wakaba 1.52 ## reprocess in the "in row" insertion mode
4414     }
4415 wakaba 1.79 } else {
4416     !!!cp ('t206');
4417 wakaba 1.52 }
4418    
4419     ## Clear back to table row context
4420     while (not {
4421     tr => 1, html => 1,
4422     }->{$self->{open_elements}->[-1]->[1]}) {
4423 wakaba 1.79 !!!cp ('t207');
4424 wakaba 1.52 pop @{$self->{open_elements}};
4425 wakaba 1.43 }
4426 wakaba 1.52
4427 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4428 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
4429 wakaba 1.52
4430     push @$active_formatting_elements, ['#marker', ''];
4431    
4432     !!!next-token;
4433     redo B;
4434     } elsif ({
4435     caption => 1, col => 1, colgroup => 1,
4436     tbody => 1, tfoot => 1, thead => 1,
4437 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
4438 wakaba 1.52 }->{$token->{tag_name}}) {
4439 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4440 wakaba 1.52 ## As if </tr>
4441 wakaba 1.43 ## have an element in table scope
4442     my $i;
4443     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4444     my $node = $self->{open_elements}->[$_];
4445 wakaba 1.52 if ($node->[1] eq 'tr') {
4446 wakaba 1.79 !!!cp ('t208');
4447 wakaba 1.43 $i = $_;
4448     last INSCOPE;
4449     } elsif ({
4450 wakaba 1.83 html => 1,
4451    
4452     ## NOTE: This element does not appear here, maybe.
4453     table => 1,
4454 wakaba 1.43 }->{$node->[1]}) {
4455 wakaba 1.79 !!!cp ('t209');
4456 wakaba 1.43 last INSCOPE;
4457     }
4458     } # INSCOPE
4459 wakaba 1.79 unless (defined $i) {
4460     !!!cp ('t210');
4461 wakaba 1.83 ## TODO: This type is wrong.
4462 wakaba 1.113 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name}, token => $token);
4463 wakaba 1.52 ## Ignore the token
4464     !!!next-token;
4465 wakaba 1.43 redo B;
4466     }
4467    
4468 wakaba 1.52 ## Clear back to table row context
4469     while (not {
4470     tr => 1, html => 1,
4471     }->{$self->{open_elements}->[-1]->[1]}) {
4472 wakaba 1.79 !!!cp ('t211');
4473 wakaba 1.83 ## ISSUE: Can this case be reached?
4474 wakaba 1.52 pop @{$self->{open_elements}};
4475 wakaba 1.1 }
4476 wakaba 1.43
4477 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4478 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4479 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4480 wakaba 1.79 !!!cp ('t212');
4481 wakaba 1.52 ## reprocess
4482     redo B;
4483     } else {
4484 wakaba 1.79 !!!cp ('t213');
4485 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4486     }
4487 wakaba 1.1 }
4488 wakaba 1.52
4489 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4490 wakaba 1.52 ## have an element in table scope
4491 wakaba 1.43 my $i;
4492     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4493     my $node = $self->{open_elements}->[$_];
4494 wakaba 1.52 if ({
4495     tbody => 1, thead => 1, tfoot => 1,
4496     }->{$node->[1]}) {
4497 wakaba 1.79 !!!cp ('t214');
4498 wakaba 1.43 $i = $_;
4499     last INSCOPE;
4500     } elsif ({
4501     table => 1, html => 1,
4502     }->{$node->[1]}) {
4503 wakaba 1.79 !!!cp ('t215');
4504 wakaba 1.43 last INSCOPE;
4505     }
4506     } # INSCOPE
4507 wakaba 1.52 unless (defined $i) {
4508 wakaba 1.79 !!!cp ('t216');
4509 wakaba 1.82 ## TODO: This erorr type ios wrong.
4510 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4511 wakaba 1.52 ## Ignore the token
4512     !!!next-token;
4513 wakaba 1.43 redo B;
4514     }
4515 wakaba 1.52
4516     ## Clear back to table body context
4517     while (not {
4518     tbody => 1, tfoot => 1, thead => 1, html => 1,
4519     }->{$self->{open_elements}->[-1]->[1]}) {
4520 wakaba 1.79 !!!cp ('t217');
4521 wakaba 1.83 ## ISSUE: Can this state be reached?
4522 wakaba 1.52 pop @{$self->{open_elements}};
4523 wakaba 1.43 }
4524    
4525 wakaba 1.52 ## As if <{current node}>
4526     ## have an element in table scope
4527     ## true by definition
4528 wakaba 1.43
4529 wakaba 1.52 ## Clear back to table body context
4530     ## nop by definition
4531 wakaba 1.43
4532 wakaba 1.52 pop @{$self->{open_elements}};
4533 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4534 wakaba 1.52 ## reprocess in "in table" insertion mode...
4535 wakaba 1.79 } else {
4536     !!!cp ('t218');
4537 wakaba 1.52 }
4538    
4539     if ($token->{tag_name} eq 'col') {
4540     ## Clear back to table context
4541     while ($self->{open_elements}->[-1]->[1] ne 'table' and
4542     $self->{open_elements}->[-1]->[1] ne 'html') {
4543 wakaba 1.79 !!!cp ('t219');
4544 wakaba 1.83 ## ISSUE: Can this state be reached?
4545 wakaba 1.52 pop @{$self->{open_elements}};
4546     }
4547 wakaba 1.43
4548 wakaba 1.116 !!!insert-element ('colgroup',, $token);
4549 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
4550 wakaba 1.52 ## reprocess
4551 wakaba 1.43 redo B;
4552 wakaba 1.52 } elsif ({
4553     caption => 1,
4554     colgroup => 1,
4555     tbody => 1, tfoot => 1, thead => 1,
4556     }->{$token->{tag_name}}) {
4557     ## Clear back to table context
4558     while ($self->{open_elements}->[-1]->[1] ne 'table' and
4559     $self->{open_elements}->[-1]->[1] ne 'html') {
4560 wakaba 1.79 !!!cp ('t220');
4561 wakaba 1.83 ## ISSUE: Can this state be reached?
4562 wakaba 1.52 pop @{$self->{open_elements}};
4563 wakaba 1.1 }
4564 wakaba 1.52
4565     push @$active_formatting_elements, ['#marker', '']
4566     if $token->{tag_name} eq 'caption';
4567    
4568 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4569 wakaba 1.52 $self->{insertion_mode} = {
4570 wakaba 1.54 caption => IN_CAPTION_IM,
4571     colgroup => IN_COLUMN_GROUP_IM,
4572     tbody => IN_TABLE_BODY_IM,
4573     tfoot => IN_TABLE_BODY_IM,
4574     thead => IN_TABLE_BODY_IM,
4575 wakaba 1.52 }->{$token->{tag_name}};
4576 wakaba 1.1 !!!next-token;
4577     redo B;
4578 wakaba 1.52 } else {
4579     die "$0: in table: <>: $token->{tag_name}";
4580 wakaba 1.1 }
4581 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
4582 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
4583 wakaba 1.1
4584 wakaba 1.52 ## As if </table>
4585 wakaba 1.1 ## have a table element in table scope
4586     my $i;
4587 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4588     my $node = $self->{open_elements}->[$_];
4589 wakaba 1.52 if ($node->[1] eq 'table') {
4590 wakaba 1.79 !!!cp ('t221');
4591 wakaba 1.1 $i = $_;
4592     last INSCOPE;
4593     } elsif ({
4594 wakaba 1.83 #table => 1,
4595     html => 1,
4596 wakaba 1.1 }->{$node->[1]}) {
4597 wakaba 1.79 !!!cp ('t222');
4598 wakaba 1.1 last INSCOPE;
4599     }
4600     } # INSCOPE
4601     unless (defined $i) {
4602 wakaba 1.79 !!!cp ('t223');
4603 wakaba 1.83 ## TODO: The following is wrong, maybe.
4604 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:table', token => $token);
4605 wakaba 1.52 ## Ignore tokens </table><table>
4606 wakaba 1.1 !!!next-token;
4607     redo B;
4608     }
4609    
4610 wakaba 1.106 ## TODO: Followings are removed from the latest spec.
4611 wakaba 1.1 ## generate implied end tags
4612 wakaba 1.86 while ({
4613     dd => 1, dt => 1, li => 1, p => 1,
4614     }->{$self->{open_elements}->[-1]->[1]}) {
4615 wakaba 1.79 !!!cp ('t224');
4616 wakaba 1.86 pop @{$self->{open_elements}};
4617 wakaba 1.1 }
4618    
4619 wakaba 1.52 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4620 wakaba 1.79 !!!cp ('t225');
4621 wakaba 1.83 ## ISSUE: Can this case be reached?
4622 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
4623 wakaba 1.79 } else {
4624     !!!cp ('t226');
4625 wakaba 1.1 }
4626    
4627 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4628 wakaba 1.95 pop @{$open_tables};
4629 wakaba 1.1
4630 wakaba 1.52 $self->_reset_insertion_mode;
4631 wakaba 1.1
4632     ## reprocess
4633     redo B;
4634 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
4635     if (not $open_tables->[-1]->[1]) { # tainted
4636     !!!cp ('t227.8');
4637     ## NOTE: This is a "as if in head" code clone.
4638     $parse_rcdata->(CDATA_CONTENT_MODEL);
4639     redo B;
4640     } else {
4641     !!!cp ('t227.7');
4642     #
4643     }
4644     } elsif ($token->{tag_name} eq 'script') {
4645     if (not $open_tables->[-1]->[1]) { # tainted
4646     !!!cp ('t227.6');
4647     ## NOTE: This is a "as if in head" code clone.
4648     $script_start_tag->();
4649     redo B;
4650     } else {
4651     !!!cp ('t227.5');
4652     #
4653     }
4654 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
4655     if (not $open_tables->[-1]->[1]) { # tainted
4656     if ($token->{attributes}->{type}) { ## TODO: case
4657     my $type = lc $token->{attributes}->{type}->{value};
4658     if ($type eq 'hidden') {
4659     !!!cp ('t227.3');
4660 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
4661 wakaba 1.98
4662 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4663 wakaba 1.98
4664     ## TODO: form element pointer
4665    
4666     pop @{$self->{open_elements}};
4667    
4668     !!!next-token;
4669     redo B;
4670     } else {
4671     !!!cp ('t227.2');
4672     #
4673     }
4674     } else {
4675     !!!cp ('t227.1');
4676     #
4677     }
4678     } else {
4679     !!!cp ('t227.4');
4680     #
4681     }
4682 wakaba 1.58 } else {
4683 wakaba 1.79 !!!cp ('t227');
4684 wakaba 1.58 #
4685     }
4686 wakaba 1.98
4687 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
4688 wakaba 1.98
4689     $insert = $insert_to_foster;
4690     #
4691 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
4692 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
4693 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
4694 wakaba 1.52 ## have an element in table scope
4695     my $i;
4696     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4697     my $node = $self->{open_elements}->[$_];
4698     if ($node->[1] eq $token->{tag_name}) {
4699 wakaba 1.79 !!!cp ('t228');
4700 wakaba 1.52 $i = $_;
4701     last INSCOPE;
4702     } elsif ({
4703     table => 1, html => 1,
4704     }->{$node->[1]}) {
4705 wakaba 1.79 !!!cp ('t229');
4706 wakaba 1.52 last INSCOPE;
4707     }
4708     } # INSCOPE
4709     unless (defined $i) {
4710 wakaba 1.79 !!!cp ('t230');
4711 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4712 wakaba 1.52 ## Ignore the token
4713 wakaba 1.42 !!!next-token;
4714     redo B;
4715 wakaba 1.79 } else {
4716     !!!cp ('t232');
4717 wakaba 1.42 }
4718    
4719 wakaba 1.52 ## Clear back to table row context
4720     while (not {
4721     tr => 1, html => 1,
4722     }->{$self->{open_elements}->[-1]->[1]}) {
4723 wakaba 1.79 !!!cp ('t231');
4724 wakaba 1.83 ## ISSUE: Can this state be reached?
4725 wakaba 1.52 pop @{$self->{open_elements}};
4726     }
4727 wakaba 1.42
4728 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4729 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4730 wakaba 1.52 !!!next-token;
4731     redo B;
4732     } elsif ($token->{tag_name} eq 'table') {
4733 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4734 wakaba 1.52 ## As if </tr>
4735     ## have an element in table scope
4736     my $i;
4737     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4738     my $node = $self->{open_elements}->[$_];
4739     if ($node->[1] eq 'tr') {
4740 wakaba 1.79 !!!cp ('t233');
4741 wakaba 1.52 $i = $_;
4742     last INSCOPE;
4743     } elsif ({
4744     table => 1, html => 1,
4745     }->{$node->[1]}) {
4746 wakaba 1.79 !!!cp ('t234');
4747 wakaba 1.52 last INSCOPE;
4748 wakaba 1.42 }
4749 wakaba 1.52 } # INSCOPE
4750     unless (defined $i) {
4751 wakaba 1.79 !!!cp ('t235');
4752 wakaba 1.83 ## TODO: The following is wrong.
4753 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{type}, token => $token);
4754 wakaba 1.52 ## Ignore the token
4755     !!!next-token;
4756     redo B;
4757 wakaba 1.42 }
4758 wakaba 1.52
4759     ## Clear back to table row context
4760     while (not {
4761     tr => 1, html => 1,
4762     }->{$self->{open_elements}->[-1]->[1]}) {
4763 wakaba 1.79 !!!cp ('t236');
4764 wakaba 1.83 ## ISSUE: Can this state be reached?
4765 wakaba 1.46 pop @{$self->{open_elements}};
4766 wakaba 1.1 }
4767 wakaba 1.46
4768 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4769 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4770 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
4771 wakaba 1.1 }
4772    
4773 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4774 wakaba 1.52 ## have an element in table scope
4775     my $i;
4776     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4777     my $node = $self->{open_elements}->[$_];
4778     if ({
4779     tbody => 1, thead => 1, tfoot => 1,
4780     }->{$node->[1]}) {
4781 wakaba 1.79 !!!cp ('t237');
4782 wakaba 1.52 $i = $_;
4783     last INSCOPE;
4784     } elsif ({
4785     table => 1, html => 1,
4786     }->{$node->[1]}) {
4787 wakaba 1.79 !!!cp ('t238');
4788 wakaba 1.52 last INSCOPE;
4789     }
4790     } # INSCOPE
4791     unless (defined $i) {
4792 wakaba 1.79 !!!cp ('t239');
4793 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4794 wakaba 1.52 ## Ignore the token
4795     !!!next-token;
4796     redo B;
4797 wakaba 1.47 }
4798    
4799     ## Clear back to table body context
4800     while (not {
4801     tbody => 1, tfoot => 1, thead => 1, html => 1,
4802     }->{$self->{open_elements}->[-1]->[1]}) {
4803 wakaba 1.79 !!!cp ('t240');
4804 wakaba 1.47 pop @{$self->{open_elements}};
4805     }
4806    
4807 wakaba 1.52 ## As if <{current node}>
4808     ## have an element in table scope
4809     ## true by definition
4810    
4811     ## Clear back to table body context
4812     ## nop by definition
4813    
4814     pop @{$self->{open_elements}};
4815 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4816 wakaba 1.52 ## reprocess in the "in table" insertion mode...
4817     }
4818    
4819 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
4820     ## When you edit the code fragment below, please ensure that
4821     ## the code for <table> in the "in table" insertion mode
4822     ## is synced with it.
4823    
4824 wakaba 1.52 ## have a table element in table scope
4825     my $i;
4826     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4827     my $node = $self->{open_elements}->[$_];
4828     if ($node->[1] eq $token->{tag_name}) {
4829 wakaba 1.79 !!!cp ('t241');
4830 wakaba 1.52 $i = $_;
4831     last INSCOPE;
4832     } elsif ({
4833     table => 1, html => 1,
4834     }->{$node->[1]}) {
4835 wakaba 1.79 !!!cp ('t242');
4836 wakaba 1.52 last INSCOPE;
4837 wakaba 1.47 }
4838 wakaba 1.52 } # INSCOPE
4839     unless (defined $i) {
4840 wakaba 1.79 !!!cp ('t243');
4841 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4842 wakaba 1.52 ## Ignore the token
4843     !!!next-token;
4844     redo B;
4845 wakaba 1.3 }
4846 wakaba 1.52
4847     splice @{$self->{open_elements}}, $i;
4848 wakaba 1.95 pop @{$open_tables};
4849 wakaba 1.1
4850 wakaba 1.52 $self->_reset_insertion_mode;
4851 wakaba 1.47
4852     !!!next-token;
4853     redo B;
4854     } elsif ({
4855 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
4856 wakaba 1.52 }->{$token->{tag_name}} and
4857 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
4858 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4859 wakaba 1.52 ## have an element in table scope
4860     my $i;
4861     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4862     my $node = $self->{open_elements}->[$_];
4863     if ($node->[1] eq $token->{tag_name}) {
4864 wakaba 1.79 !!!cp ('t247');
4865 wakaba 1.52 $i = $_;
4866     last INSCOPE;
4867     } elsif ({
4868     table => 1, html => 1,
4869     }->{$node->[1]}) {
4870 wakaba 1.79 !!!cp ('t248');
4871 wakaba 1.52 last INSCOPE;
4872     }
4873     } # INSCOPE
4874     unless (defined $i) {
4875 wakaba 1.79 !!!cp ('t249');
4876 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4877 wakaba 1.52 ## Ignore the token
4878     !!!next-token;
4879     redo B;
4880     }
4881    
4882 wakaba 1.48 ## As if </tr>
4883     ## have an element in table scope
4884     my $i;
4885     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4886     my $node = $self->{open_elements}->[$_];
4887     if ($node->[1] eq 'tr') {
4888 wakaba 1.79 !!!cp ('t250');
4889 wakaba 1.48 $i = $_;
4890     last INSCOPE;
4891     } elsif ({
4892     table => 1, html => 1,
4893     }->{$node->[1]}) {
4894 wakaba 1.79 !!!cp ('t251');
4895 wakaba 1.48 last INSCOPE;
4896     }
4897     } # INSCOPE
4898 wakaba 1.52 unless (defined $i) {
4899 wakaba 1.79 !!!cp ('t252');
4900 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:tr', token => $token);
4901 wakaba 1.52 ## Ignore the token
4902     !!!next-token;
4903     redo B;
4904     }
4905 wakaba 1.48
4906     ## Clear back to table row context
4907     while (not {
4908     tr => 1, html => 1,
4909     }->{$self->{open_elements}->[-1]->[1]}) {
4910 wakaba 1.79 !!!cp ('t253');
4911 wakaba 1.83 ## ISSUE: Can this case be reached?
4912 wakaba 1.48 pop @{$self->{open_elements}};
4913     }
4914    
4915     pop @{$self->{open_elements}}; # tr
4916 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4917 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4918     }
4919    
4920     ## have an element in table scope
4921     my $i;
4922     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4923     my $node = $self->{open_elements}->[$_];
4924     if ($node->[1] eq $token->{tag_name}) {
4925 wakaba 1.79 !!!cp ('t254');
4926 wakaba 1.52 $i = $_;
4927     last INSCOPE;
4928     } elsif ({
4929     table => 1, html => 1,
4930     }->{$node->[1]}) {
4931 wakaba 1.79 !!!cp ('t255');
4932 wakaba 1.52 last INSCOPE;
4933     }
4934     } # INSCOPE
4935     unless (defined $i) {
4936 wakaba 1.79 !!!cp ('t256');
4937 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4938 wakaba 1.52 ## Ignore the token
4939     !!!next-token;
4940     redo B;
4941     }
4942    
4943     ## Clear back to table body context
4944     while (not {
4945     tbody => 1, tfoot => 1, thead => 1, html => 1,
4946     }->{$self->{open_elements}->[-1]->[1]}) {
4947 wakaba 1.79 !!!cp ('t257');
4948 wakaba 1.83 ## ISSUE: Can this case be reached?
4949 wakaba 1.52 pop @{$self->{open_elements}};
4950     }
4951    
4952     pop @{$self->{open_elements}};
4953 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4954 wakaba 1.52 !!!next-token;
4955     redo B;
4956     } elsif ({
4957     body => 1, caption => 1, col => 1, colgroup => 1,
4958     html => 1, td => 1, th => 1,
4959 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
4960     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
4961 wakaba 1.52 }->{$token->{tag_name}}) {
4962 wakaba 1.79 !!!cp ('t258');
4963 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4964 wakaba 1.52 ## Ignore the token
4965     !!!next-token;
4966     redo B;
4967 wakaba 1.58 } else {
4968 wakaba 1.79 !!!cp ('t259');
4969 wakaba 1.113 !!!parse-error (type => 'in table:/'.$token->{tag_name}, token => $token);
4970 wakaba 1.52
4971 wakaba 1.58 $insert = $insert_to_foster;
4972     #
4973     }
4974 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4975     unless ($self->{open_elements}->[-1]->[1] eq 'html' and
4976     @{$self->{open_elements}} == 1) { # redundant, maybe
4977 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4978 wakaba 1.104 !!!cp ('t259.1');
4979 wakaba 1.105 #
4980 wakaba 1.104 } else {
4981     !!!cp ('t259.2');
4982 wakaba 1.105 #
4983 wakaba 1.104 }
4984    
4985     ## Stop parsing
4986     last B;
4987 wakaba 1.58 } else {
4988     die "$0: $token->{type}: Unknown token type";
4989     }
4990 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
4991 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4992 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4993     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4994     unless (length $token->{data}) {
4995 wakaba 1.79 !!!cp ('t260');
4996 wakaba 1.52 !!!next-token;
4997     redo B;
4998     }
4999     }
5000    
5001 wakaba 1.79 !!!cp ('t261');
5002 wakaba 1.52 #
5003 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5004 wakaba 1.52 if ($token->{tag_name} eq 'col') {
5005 wakaba 1.79 !!!cp ('t262');
5006 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5007 wakaba 1.52 pop @{$self->{open_elements}};
5008     !!!next-token;
5009     redo B;
5010     } else {
5011 wakaba 1.79 !!!cp ('t263');
5012 wakaba 1.52 #
5013     }
5014 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5015 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
5016     if ($self->{open_elements}->[-1]->[1] eq 'html') {
5017 wakaba 1.79 !!!cp ('t264');
5018 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5019 wakaba 1.52 ## Ignore the token
5020     !!!next-token;
5021     redo B;
5022     } else {
5023 wakaba 1.79 !!!cp ('t265');
5024 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5025 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5026 wakaba 1.52 !!!next-token;
5027     redo B;
5028     }
5029     } elsif ($token->{tag_name} eq 'col') {
5030 wakaba 1.79 !!!cp ('t266');
5031 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:col', token => $token);
5032 wakaba 1.52 ## Ignore the token
5033     !!!next-token;
5034     redo B;
5035     } else {
5036 wakaba 1.79 !!!cp ('t267');
5037 wakaba 1.52 #
5038     }
5039 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5040     if ($self->{open_elements}->[-1]->[1] eq 'html' or
5041     @{$self->{open_elements}} == 1) { # redundant, maybe
5042     !!!cp ('t270.2');
5043     ## Stop parsing.
5044     last B;
5045     } else {
5046     ## NOTE: As if </colgroup>.
5047     !!!cp ('t270.1');
5048     pop @{$self->{open_elements}}; # colgroup
5049     $self->{insertion_mode} = IN_TABLE_IM;
5050     ## Reprocess.
5051     redo B;
5052     }
5053     } else {
5054     die "$0: $token->{type}: Unknown token type";
5055     }
5056 wakaba 1.52
5057     ## As if </colgroup>
5058     if ($self->{open_elements}->[-1]->[1] eq 'html') {
5059 wakaba 1.79 !!!cp ('t269');
5060 wakaba 1.104 ## TODO: Wrong error type?
5061 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5062 wakaba 1.52 ## Ignore the token
5063     !!!next-token;
5064     redo B;
5065     } else {
5066 wakaba 1.79 !!!cp ('t270');
5067 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5068 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5069 wakaba 1.52 ## reprocess
5070     redo B;
5071     }
5072 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
5073 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5074 wakaba 1.79 !!!cp ('t271');
5075 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5076     !!!next-token;
5077     redo B;
5078     } elsif ($token->{type} == START_TAG_TOKEN) {
5079 wakaba 1.52 if ($token->{tag_name} eq 'option') {
5080     if ($self->{open_elements}->[-1]->[1] eq 'option') {
5081 wakaba 1.79 !!!cp ('t272');
5082 wakaba 1.52 ## As if </option>
5083     pop @{$self->{open_elements}};
5084 wakaba 1.79 } else {
5085     !!!cp ('t273');
5086 wakaba 1.52 }
5087    
5088 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5089 wakaba 1.52 !!!next-token;
5090     redo B;
5091     } elsif ($token->{tag_name} eq 'optgroup') {
5092     if ($self->{open_elements}->[-1]->[1] eq 'option') {
5093 wakaba 1.79 !!!cp ('t274');
5094 wakaba 1.52 ## As if </option>
5095     pop @{$self->{open_elements}};
5096 wakaba 1.79 } else {
5097     !!!cp ('t275');
5098 wakaba 1.52 }
5099    
5100     if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
5101 wakaba 1.79 !!!cp ('t276');
5102 wakaba 1.52 ## As if </optgroup>
5103     pop @{$self->{open_elements}};
5104 wakaba 1.79 } else {
5105     !!!cp ('t277');
5106 wakaba 1.52 }
5107    
5108 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5109 wakaba 1.52 !!!next-token;
5110     redo B;
5111 wakaba 1.101 } elsif ($token->{tag_name} eq 'select' or
5112     $token->{tag_name} eq 'input' or
5113     ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5114     {
5115     caption => 1, table => 1,
5116     tbody => 1, tfoot => 1, thead => 1,
5117     tr => 1, td => 1, th => 1,
5118     }->{$token->{tag_name}})) {
5119     ## TODO: The type below is not good - <select> is replaced by </select>
5120 wakaba 1.113 !!!parse-error (type => 'not closed:select', token => $token);
5121 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
5122     ## as if there were </select> (otherwise).
5123 wakaba 1.52 ## have an element in table scope
5124     my $i;
5125     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5126     my $node = $self->{open_elements}->[$_];
5127 wakaba 1.101 if ($node->[1] eq 'select') {
5128 wakaba 1.79 !!!cp ('t278');
5129 wakaba 1.52 $i = $_;
5130     last INSCOPE;
5131     } elsif ({
5132     table => 1, html => 1,
5133     }->{$node->[1]}) {
5134 wakaba 1.79 !!!cp ('t279');
5135 wakaba 1.52 last INSCOPE;
5136 wakaba 1.47 }
5137 wakaba 1.52 } # INSCOPE
5138     unless (defined $i) {
5139 wakaba 1.79 !!!cp ('t280');
5140 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:select', token => $token);
5141 wakaba 1.52 ## Ignore the token
5142     !!!next-token;
5143     redo B;
5144 wakaba 1.47 }
5145 wakaba 1.52
5146 wakaba 1.79 !!!cp ('t281');
5147 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5148    
5149     $self->_reset_insertion_mode;
5150 wakaba 1.47
5151 wakaba 1.101 if ($token->{tag_name} eq 'select') {
5152     !!!cp ('t281.2');
5153     !!!next-token;
5154     redo B;
5155     } else {
5156     !!!cp ('t281.1');
5157     ## Reprocess the token.
5158     redo B;
5159     }
5160 wakaba 1.58 } else {
5161 wakaba 1.79 !!!cp ('t282');
5162 wakaba 1.113 !!!parse-error (type => 'in select:'.$token->{tag_name}, token => $token);
5163 wakaba 1.58 ## Ignore the token
5164     !!!next-token;
5165     redo B;
5166     }
5167     } elsif ($token->{type} == END_TAG_TOKEN) {
5168 wakaba 1.52 if ($token->{tag_name} eq 'optgroup') {
5169     if ($self->{open_elements}->[-1]->[1] eq 'option' and
5170     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
5171 wakaba 1.79 !!!cp ('t283');
5172 wakaba 1.52 ## As if </option>
5173     splice @{$self->{open_elements}}, -2;
5174     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
5175 wakaba 1.79 !!!cp ('t284');
5176 wakaba 1.52 pop @{$self->{open_elements}};
5177     } else {
5178 wakaba 1.79 !!!cp ('t285');
5179 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5180 wakaba 1.52 ## Ignore the token
5181     }
5182     !!!next-token;
5183     redo B;
5184     } elsif ($token->{tag_name} eq 'option') {
5185     if ($self->{open_elements}->[-1]->[1] eq 'option') {
5186 wakaba 1.79 !!!cp ('t286');
5187 wakaba 1.47 pop @{$self->{open_elements}};
5188 wakaba 1.52 } else {
5189 wakaba 1.79 !!!cp ('t287');
5190 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5191 wakaba 1.52 ## Ignore the token
5192 wakaba 1.1 }
5193 wakaba 1.52 !!!next-token;
5194     redo B;
5195     } elsif ($token->{tag_name} eq 'select') {
5196     ## have an element in table scope
5197     my $i;
5198     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5199     my $node = $self->{open_elements}->[$_];
5200     if ($node->[1] eq $token->{tag_name}) {
5201 wakaba 1.79 !!!cp ('t288');
5202 wakaba 1.52 $i = $_;
5203     last INSCOPE;
5204     } elsif ({
5205     table => 1, html => 1,
5206     }->{$node->[1]}) {
5207 wakaba 1.79 !!!cp ('t289');
5208 wakaba 1.52 last INSCOPE;
5209 wakaba 1.48 }
5210 wakaba 1.52 } # INSCOPE
5211     unless (defined $i) {
5212 wakaba 1.79 !!!cp ('t290');
5213 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5214 wakaba 1.52 ## Ignore the token
5215     !!!next-token;
5216 wakaba 1.48 redo B;
5217 wakaba 1.52 }
5218    
5219 wakaba 1.79 !!!cp ('t291');
5220 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5221    
5222     $self->_reset_insertion_mode;
5223    
5224     !!!next-token;
5225     redo B;
5226 wakaba 1.101 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5227     {
5228     caption => 1, table => 1, tbody => 1,
5229     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
5230     }->{$token->{tag_name}}) {
5231 wakaba 1.83 ## TODO: The following is wrong?
5232 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5233 wakaba 1.52
5234     ## have an element in table scope
5235     my $i;
5236     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5237     my $node = $self->{open_elements}->[$_];
5238     if ($node->[1] eq $token->{tag_name}) {
5239 wakaba 1.79 !!!cp ('t292');
5240 wakaba 1.52 $i = $_;
5241     last INSCOPE;
5242     } elsif ({
5243     table => 1, html => 1,
5244     }->{$node->[1]}) {
5245 wakaba 1.79 !!!cp ('t293');
5246 wakaba 1.52 last INSCOPE;
5247 wakaba 1.1 }
5248 wakaba 1.52 } # INSCOPE
5249     unless (defined $i) {
5250 wakaba 1.79 !!!cp ('t294');
5251 wakaba 1.52 ## Ignore the token
5252 wakaba 1.1 !!!next-token;
5253     redo B;
5254     }
5255 wakaba 1.52
5256     ## As if </select>
5257     ## have an element in table scope
5258     undef $i;
5259 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5260     my $node = $self->{open_elements}->[$_];
5261 wakaba 1.52 if ($node->[1] eq 'select') {
5262 wakaba 1.79 !!!cp ('t295');
5263 wakaba 1.1 $i = $_;
5264     last INSCOPE;
5265     } elsif ({
5266     table => 1, html => 1,
5267 wakaba 1.52 }->{$node->[1]}) {
5268 wakaba 1.83 ## ISSUE: Can this state be reached?
5269 wakaba 1.79 !!!cp ('t296');
5270 wakaba 1.52 last INSCOPE;
5271     }
5272     } # INSCOPE
5273     unless (defined $i) {
5274 wakaba 1.79 !!!cp ('t297');
5275 wakaba 1.83 ## TODO: The following error type is correct?
5276 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:select', token => $token);
5277 wakaba 1.52 ## Ignore the </select> token
5278     !!!next-token; ## TODO: ok?
5279     redo B;
5280     }
5281    
5282 wakaba 1.79 !!!cp ('t298');
5283 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5284    
5285     $self->_reset_insertion_mode;
5286    
5287     ## reprocess
5288     redo B;
5289 wakaba 1.58 } else {
5290 wakaba 1.79 !!!cp ('t299');
5291 wakaba 1.113 !!!parse-error (type => 'in select:/'.$token->{tag_name}, token => $token);
5292 wakaba 1.52 ## Ignore the token
5293     !!!next-token;
5294     redo B;
5295 wakaba 1.58 }
5296 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5297     unless ($self->{open_elements}->[-1]->[1] eq 'html' and
5298     @{$self->{open_elements}} == 1) { # redundant, maybe
5299     !!!cp ('t299.1');
5300 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5301 wakaba 1.104 } else {
5302     !!!cp ('t299.2');
5303     }
5304    
5305     ## Stop parsing.
5306     last B;
5307 wakaba 1.58 } else {
5308     die "$0: $token->{type}: Unknown token type";
5309     }
5310 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
5311 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5312 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5313     my $data = $1;
5314     ## As if in body
5315     $reconstruct_active_formatting_elements->($insert_to_current);
5316    
5317     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5318    
5319     unless (length $token->{data}) {
5320 wakaba 1.79 !!!cp ('t300');
5321 wakaba 1.52 !!!next-token;
5322     redo B;
5323     }
5324     }
5325    
5326 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5327 wakaba 1.79 !!!cp ('t301');
5328 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
5329 wakaba 1.52
5330 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5331 wakaba 1.79 } else {
5332     !!!cp ('t302');
5333 wakaba 1.52 }
5334    
5335     ## "after body" insertion mode
5336 wakaba 1.113 !!!parse-error (type => 'after body:#character', token => $token);
5337 wakaba 1.52
5338 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5339 wakaba 1.52 ## reprocess
5340     redo B;
5341 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5342 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5343 wakaba 1.79 !!!cp ('t303');
5344 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
5345 wakaba 1.52
5346 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5347 wakaba 1.79 } else {
5348     !!!cp ('t304');
5349 wakaba 1.52 }
5350    
5351     ## "after body" insertion mode
5352 wakaba 1.113 !!!parse-error (type => 'after body:'.$token->{tag_name}, token => $token);
5353 wakaba 1.52
5354 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5355 wakaba 1.52 ## reprocess
5356     redo B;
5357 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5358 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5359 wakaba 1.79 !!!cp ('t305');
5360 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
5361 wakaba 1.52
5362 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5363 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5364 wakaba 1.79 } else {
5365     !!!cp ('t306');
5366 wakaba 1.52 }
5367    
5368     ## "after body" insertion mode
5369     if ($token->{tag_name} eq 'html') {
5370     if (defined $self->{inner_html_node}) {
5371 wakaba 1.79 !!!cp ('t307');
5372 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:html', token => $token);
5373 wakaba 1.52 ## Ignore the token
5374     !!!next-token;
5375     redo B;
5376     } else {
5377 wakaba 1.79 !!!cp ('t308');
5378 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
5379 wakaba 1.52 !!!next-token;
5380     redo B;
5381     }
5382     } else {
5383 wakaba 1.79 !!!cp ('t309');
5384 wakaba 1.113 !!!parse-error (type => 'after body:/'.$token->{tag_name}, token => $token);
5385 wakaba 1.52
5386 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5387 wakaba 1.52 ## reprocess
5388     redo B;
5389     }
5390 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5391     !!!cp ('t309.2');
5392     ## Stop parsing
5393     last B;
5394 wakaba 1.52 } else {
5395     die "$0: $token->{type}: Unknown token type";
5396     }
5397 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
5398 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5399 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5400     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5401    
5402     unless (length $token->{data}) {
5403 wakaba 1.79 !!!cp ('t310');
5404 wakaba 1.52 !!!next-token;
5405     redo B;
5406     }
5407     }
5408    
5409     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
5410 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5411 wakaba 1.79 !!!cp ('t311');
5412 wakaba 1.113 !!!parse-error (type => 'in frameset:#character', token => $token);
5413 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
5414 wakaba 1.79 !!!cp ('t312');
5415 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
5416 wakaba 1.52 } else { # "after html frameset"
5417 wakaba 1.79 !!!cp ('t313');
5418 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
5419 wakaba 1.52
5420 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5421 wakaba 1.84 ## Reprocess in the "after frameset" insertion mode.
5422 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
5423 wakaba 1.52 }
5424    
5425     ## Ignore the token.
5426     if (length $token->{data}) {
5427 wakaba 1.79 !!!cp ('t314');
5428 wakaba 1.52 ## reprocess the rest of characters
5429     } else {
5430 wakaba 1.79 !!!cp ('t315');
5431 wakaba 1.52 !!!next-token;
5432     }
5433     redo B;
5434     }
5435    
5436     die qq[$0: Character "$token->{data}"];
5437 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5438 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5439 wakaba 1.79 !!!cp ('t316');
5440 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
5441 wakaba 1.1
5442 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5443 wakaba 1.84 ## Process in the "after frameset" insertion mode.
5444 wakaba 1.79 } else {
5445     !!!cp ('t317');
5446     }
5447 wakaba 1.1
5448 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
5449 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5450 wakaba 1.79 !!!cp ('t318');
5451 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5452 wakaba 1.52 !!!next-token;
5453     redo B;
5454     } elsif ($token->{tag_name} eq 'frame' and
5455 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5456 wakaba 1.79 !!!cp ('t319');
5457 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5458 wakaba 1.52 pop @{$self->{open_elements}};
5459     !!!next-token;
5460     redo B;
5461     } elsif ($token->{tag_name} eq 'noframes') {
5462 wakaba 1.79 !!!cp ('t320');
5463 wakaba 1.52 ## NOTE: As if in body.
5464 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5465 wakaba 1.52 redo B;
5466     } else {
5467 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5468 wakaba 1.79 !!!cp ('t321');
5469 wakaba 1.113 !!!parse-error (type => 'in frameset:'.$token->{tag_name}, token => $token);
5470 wakaba 1.52 } else {
5471 wakaba 1.79 !!!cp ('t322');
5472 wakaba 1.113 !!!parse-error (type => 'after frameset:'.$token->{tag_name}, token => $token);
5473 wakaba 1.52 }
5474     ## Ignore the token
5475     !!!next-token;
5476     redo B;
5477     }
5478 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5479 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5480 wakaba 1.79 !!!cp ('t323');
5481 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
5482 wakaba 1.1
5483 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5484 wakaba 1.84 ## Process in the "after frameset" insertion mode.
5485 wakaba 1.79 } else {
5486     !!!cp ('t324');
5487 wakaba 1.52 }
5488 wakaba 1.1
5489 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
5490 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5491 wakaba 1.52 if ($self->{open_elements}->[-1]->[1] eq 'html' and
5492     @{$self->{open_elements}} == 1) {
5493 wakaba 1.79 !!!cp ('t325');
5494 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5495 wakaba 1.52 ## Ignore the token
5496     !!!next-token;
5497     } else {
5498 wakaba 1.79 !!!cp ('t326');
5499 wakaba 1.52 pop @{$self->{open_elements}};
5500     !!!next-token;
5501     }
5502 wakaba 1.47
5503 wakaba 1.52 if (not defined $self->{inner_html_node} and
5504     $self->{open_elements}->[-1]->[1] ne 'frameset') {
5505 wakaba 1.79 !!!cp ('t327');
5506 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5507 wakaba 1.79 } else {
5508     !!!cp ('t328');
5509 wakaba 1.52 }
5510     redo B;
5511     } elsif ($token->{tag_name} eq 'html' and
5512 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
5513 wakaba 1.79 !!!cp ('t329');
5514 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
5515 wakaba 1.52 !!!next-token;
5516     redo B;
5517     } else {
5518 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5519 wakaba 1.79 !!!cp ('t330');
5520 wakaba 1.113 !!!parse-error (type => 'in frameset:/'.$token->{tag_name}, token => $token);
5521 wakaba 1.52 } else {
5522 wakaba 1.79 !!!cp ('t331');
5523 wakaba 1.113 !!!parse-error (type => 'after frameset:/'.$token->{tag_name}, token => $token);
5524 wakaba 1.52 }
5525     ## Ignore the token
5526     !!!next-token;
5527     redo B;
5528     }
5529 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5530     unless ($self->{open_elements}->[-1]->[1] eq 'html' and
5531     @{$self->{open_elements}} == 1) { # redundant, maybe
5532     !!!cp ('t331.1');
5533 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5534 wakaba 1.104 } else {
5535     !!!cp ('t331.2');
5536     }
5537    
5538     ## Stop parsing
5539     last B;
5540 wakaba 1.52 } else {
5541     die "$0: $token->{type}: Unknown token type";
5542     }
5543 wakaba 1.47
5544 wakaba 1.52 ## ISSUE: An issue in spec here
5545     } else {
5546     die "$0: $self->{insertion_mode}: Unknown insertion mode";
5547     }
5548 wakaba 1.47
5549 wakaba 1.52 ## "in body" insertion mode
5550 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
5551 wakaba 1.52 if ($token->{tag_name} eq 'script') {
5552 wakaba 1.79 !!!cp ('t332');
5553 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5554 wakaba 1.100 $script_start_tag->();
5555 wakaba 1.53 redo B;
5556 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
5557 wakaba 1.79 !!!cp ('t333');
5558 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5559 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5560 wakaba 1.53 redo B;
5561 wakaba 1.52 } elsif ({
5562     base => 1, link => 1,
5563     }->{$token->{tag_name}}) {
5564 wakaba 1.79 !!!cp ('t334');
5565 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
5566 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5567 wakaba 1.52 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5568     !!!next-token;
5569 wakaba 1.53 redo B;
5570 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
5571     ## NOTE: This is an "as if in head" code clone, only "-t" differs
5572 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5573 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5574 wakaba 1.46
5575 wakaba 1.52 unless ($self->{confident}) {
5576     if ($token->{attributes}->{charset}) { ## TODO: And if supported
5577 wakaba 1.79 !!!cp ('t335');
5578 wakaba 1.63 $self->{change_encoding}
5579 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
5580 wakaba 1.66
5581     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
5582     ->set_user_data (manakai_has_reference =>
5583     $token->{attributes}->{charset}
5584     ->{has_reference});
5585 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
5586 wakaba 1.52 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
5587 wakaba 1.63 if ($token->{attributes}->{content}->{value}
5588 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
5589     [\x09-\x0D\x20]*=
5590 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
5591     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
5592 wakaba 1.79 !!!cp ('t336');
5593 wakaba 1.63 $self->{change_encoding}
5594 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
5595 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
5596     ->set_user_data (manakai_has_reference =>
5597     $token->{attributes}->{content}
5598     ->{has_reference});
5599 wakaba 1.63 }
5600 wakaba 1.52 }
5601 wakaba 1.66 } else {
5602     if ($token->{attributes}->{charset}) {
5603 wakaba 1.79 !!!cp ('t337');
5604 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
5605     ->set_user_data (manakai_has_reference =>
5606     $token->{attributes}->{charset}
5607     ->{has_reference});
5608     }
5609 wakaba 1.68 if ($token->{attributes}->{content}) {
5610 wakaba 1.79 !!!cp ('t338');
5611 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
5612     ->set_user_data (manakai_has_reference =>
5613     $token->{attributes}->{content}
5614     ->{has_reference});
5615     }
5616 wakaba 1.52 }
5617 wakaba 1.1
5618 wakaba 1.52 !!!next-token;
5619 wakaba 1.53 redo B;
5620 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
5621 wakaba 1.79 !!!cp ('t341');
5622 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5623 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
5624 wakaba 1.53 redo B;
5625 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
5626 wakaba 1.113 !!!parse-error (type => 'in body:body', token => $token);
5627 wakaba 1.46
5628 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
5629     $self->{open_elements}->[1]->[1] ne 'body') {
5630 wakaba 1.79 !!!cp ('t342');
5631 wakaba 1.52 ## Ignore the token
5632     } else {
5633     my $body_el = $self->{open_elements}->[1]->[0];
5634     for my $attr_name (keys %{$token->{attributes}}) {
5635     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
5636 wakaba 1.79 !!!cp ('t343');
5637 wakaba 1.52 $body_el->set_attribute_ns
5638     (undef, [undef, $attr_name],
5639     $token->{attributes}->{$attr_name}->{value});
5640     }
5641     }
5642     }
5643     !!!next-token;
5644 wakaba 1.53 redo B;
5645 wakaba 1.52 } elsif ({
5646     address => 1, blockquote => 1, center => 1, dir => 1,
5647 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
5648     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5649 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
5650     pre => 1, listing => 1,
5651 wakaba 1.109 form => 1,
5652     table => 1,
5653     hr => 1,
5654 wakaba 1.52 }->{$token->{tag_name}}) {
5655 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
5656     !!!cp ('t350');
5657 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
5658 wakaba 1.109 ## Ignore the token
5659     !!!next-token;
5660     redo B;
5661     }
5662    
5663 wakaba 1.52 ## has a p element in scope
5664     INSCOPE: for (reverse @{$self->{open_elements}}) {
5665     if ($_->[1] eq 'p') {
5666 wakaba 1.79 !!!cp ('t344');
5667 wakaba 1.52 !!!back-token;
5668 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
5669     line => $token->{line}, column => $token->{column}};
5670 wakaba 1.53 redo B;
5671 wakaba 1.52 } elsif ({
5672 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
5673 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
5674     }->{$_->[1]}) {
5675 wakaba 1.79 !!!cp ('t345');
5676 wakaba 1.52 last INSCOPE;
5677     }
5678     } # INSCOPE
5679    
5680 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5681 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
5682 wakaba 1.52 !!!next-token;
5683 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5684 wakaba 1.52 $token->{data} =~ s/^\x0A//;
5685     unless (length $token->{data}) {
5686 wakaba 1.79 !!!cp ('t346');
5687 wakaba 1.1 !!!next-token;
5688 wakaba 1.79 } else {
5689     !!!cp ('t349');
5690 wakaba 1.52 }
5691 wakaba 1.79 } else {
5692     !!!cp ('t348');
5693 wakaba 1.52 }
5694 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
5695     !!!cp ('t347.1');
5696     $self->{form_element} = $self->{open_elements}->[-1]->[0];
5697    
5698     !!!next-token;
5699     } elsif ($token->{tag_name} eq 'table') {
5700     !!!cp ('t382');
5701     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
5702    
5703     $self->{insertion_mode} = IN_TABLE_IM;
5704    
5705     !!!next-token;
5706     } elsif ($token->{tag_name} eq 'hr') {
5707     !!!cp ('t386');
5708     pop @{$self->{open_elements}};
5709    
5710     !!!next-token;
5711 wakaba 1.52 } else {
5712 wakaba 1.79 !!!cp ('t347');
5713 wakaba 1.52 !!!next-token;
5714     }
5715 wakaba 1.53 redo B;
5716 wakaba 1.109 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
5717 wakaba 1.52 ## has a p element in scope
5718     INSCOPE: for (reverse @{$self->{open_elements}}) {
5719     if ($_->[1] eq 'p') {
5720 wakaba 1.79 !!!cp ('t353');
5721 wakaba 1.52 !!!back-token;
5722 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
5723     line => $token->{line}, column => $token->{column}};
5724 wakaba 1.53 redo B;
5725 wakaba 1.52 } elsif ({
5726 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
5727 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
5728     }->{$_->[1]}) {
5729 wakaba 1.79 !!!cp ('t354');
5730 wakaba 1.52 last INSCOPE;
5731     }
5732     } # INSCOPE
5733    
5734     ## Step 1
5735     my $i = -1;
5736     my $node = $self->{open_elements}->[$i];
5737 wakaba 1.109 my $li_or_dtdd = {li => {li => 1},
5738     dt => {dt => 1, dd => 1},
5739     dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
5740 wakaba 1.52 LI: {
5741     ## Step 2
5742 wakaba 1.109 if ($li_or_dtdd->{$node->[1]}) {
5743 wakaba 1.52 if ($i != -1) {
5744 wakaba 1.79 !!!cp ('t355');
5745 wakaba 1.52 !!!parse-error (type => 'end tag missing:'.
5746 wakaba 1.113 $self->{open_elements}->[-1]->[1], token => $token);
5747 wakaba 1.79 } else {
5748     !!!cp ('t356');
5749 wakaba 1.52 }
5750     splice @{$self->{open_elements}}, $i;
5751     last LI;
5752 wakaba 1.79 } else {
5753     !!!cp ('t357');
5754 wakaba 1.52 }
5755    
5756     ## Step 3
5757     if (not $formatting_category->{$node->[1]} and
5758     #not $phrasing_category->{$node->[1]} and
5759     ($special_category->{$node->[1]} or
5760     $scoping_category->{$node->[1]}) and
5761     $node->[1] ne 'address' and $node->[1] ne 'div') {
5762 wakaba 1.79 !!!cp ('t358');
5763 wakaba 1.52 last LI;
5764     }
5765    
5766 wakaba 1.79 !!!cp ('t359');
5767 wakaba 1.52 ## Step 4
5768     $i--;
5769     $node = $self->{open_elements}->[$i];
5770     redo LI;
5771     } # LI
5772    
5773 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5774 wakaba 1.52 !!!next-token;
5775 wakaba 1.53 redo B;
5776 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
5777     ## has a p element in scope
5778     INSCOPE: for (reverse @{$self->{open_elements}}) {
5779     if ($_->[1] eq 'p') {
5780 wakaba 1.79 !!!cp ('t367');
5781 wakaba 1.52 !!!back-token;
5782 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
5783     line => $token->{line}, column => $token->{column}};
5784 wakaba 1.53 redo B;
5785 wakaba 1.52 } elsif ({
5786 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
5787 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
5788     }->{$_->[1]}) {
5789 wakaba 1.79 !!!cp ('t368');
5790 wakaba 1.52 last INSCOPE;
5791 wakaba 1.46 }
5792 wakaba 1.52 } # INSCOPE
5793    
5794 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5795 wakaba 1.52
5796     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
5797    
5798     !!!next-token;
5799 wakaba 1.53 redo B;
5800 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
5801     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
5802     my $node = $active_formatting_elements->[$i];
5803     if ($node->[1] eq 'a') {
5804 wakaba 1.79 !!!cp ('t371');
5805 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
5806 wakaba 1.52
5807     !!!back-token;
5808 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
5809     line => $token->{line}, column => $token->{column}};
5810 wakaba 1.113 $formatting_end_tag->($token);
5811 wakaba 1.52
5812     AFE2: for (reverse 0..$#$active_formatting_elements) {
5813     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
5814 wakaba 1.79 !!!cp ('t372');
5815 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
5816     last AFE2;
5817 wakaba 1.1 }
5818 wakaba 1.52 } # AFE2
5819     OE: for (reverse 0..$#{$self->{open_elements}}) {
5820     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
5821 wakaba 1.79 !!!cp ('t373');
5822 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
5823     last OE;
5824 wakaba 1.1 }
5825 wakaba 1.52 } # OE
5826     last AFE;
5827     } elsif ($node->[0] eq '#marker') {
5828 wakaba 1.79 !!!cp ('t374');
5829 wakaba 1.52 last AFE;
5830     }
5831     } # AFE
5832    
5833     $reconstruct_active_formatting_elements->($insert_to_current);
5834 wakaba 1.1
5835 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5836 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
5837 wakaba 1.1
5838 wakaba 1.52 !!!next-token;
5839 wakaba 1.53 redo B;
5840 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
5841     $reconstruct_active_formatting_elements->($insert_to_current);
5842 wakaba 1.1
5843 wakaba 1.52 ## has a |nobr| element in scope
5844     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5845     my $node = $self->{open_elements}->[$_];
5846     if ($node->[1] eq 'nobr') {
5847 wakaba 1.79 !!!cp ('t376');
5848 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
5849 wakaba 1.52 !!!back-token;
5850 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
5851     line => $token->{line}, column => $token->{column}};
5852 wakaba 1.53 redo B;
5853 wakaba 1.52 } elsif ({
5854 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
5855 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
5856     }->{$node->[1]}) {
5857 wakaba 1.79 !!!cp ('t377');
5858 wakaba 1.52 last INSCOPE;
5859     }
5860     } # INSCOPE
5861    
5862 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5863 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
5864    
5865     !!!next-token;
5866 wakaba 1.53 redo B;
5867 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
5868     ## has a button element in scope
5869     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5870     my $node = $self->{open_elements}->[$_];
5871     if ($node->[1] eq 'button') {
5872 wakaba 1.79 !!!cp ('t378');
5873 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
5874 wakaba 1.52 !!!back-token;
5875 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
5876     line => $token->{line}, column => $token->{column}};
5877 wakaba 1.53 redo B;
5878 wakaba 1.52 } elsif ({
5879 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
5880 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
5881     }->{$node->[1]}) {
5882 wakaba 1.79 !!!cp ('t379');
5883 wakaba 1.52 last INSCOPE;
5884     }
5885     } # INSCOPE
5886    
5887     $reconstruct_active_formatting_elements->($insert_to_current);
5888    
5889 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5890 wakaba 1.85
5891     ## TODO: associate with $self->{form_element} if defined
5892    
5893 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
5894 wakaba 1.1
5895 wakaba 1.52 !!!next-token;
5896 wakaba 1.53 redo B;
5897 wakaba 1.103 } elsif ({
5898 wakaba 1.109 xmp => 1,
5899     iframe => 1,
5900     noembed => 1,
5901     noframes => 1,
5902     noscript => 0, ## TODO: 1 if scripting is enabled
5903 wakaba 1.103 }->{$token->{tag_name}}) {
5904 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
5905     !!!cp ('t381');
5906     $reconstruct_active_formatting_elements->($insert_to_current);
5907     } else {
5908     !!!cp ('t399');
5909     }
5910     ## NOTE: There is an "as if in body" code clone.
5911 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5912 wakaba 1.53 redo B;
5913 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
5914 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
5915 wakaba 1.52
5916     if (defined $self->{form_element}) {
5917 wakaba 1.79 !!!cp ('t389');
5918 wakaba 1.52 ## Ignore the token
5919     !!!next-token;
5920 wakaba 1.53 redo B;
5921 wakaba 1.52 } else {
5922     my $at = $token->{attributes};
5923     my $form_attrs;
5924     $form_attrs->{action} = $at->{action} if $at->{action};
5925     my $prompt_attr = $at->{prompt};
5926     $at->{name} = {name => 'name', value => 'isindex'};
5927     delete $at->{action};
5928     delete $at->{prompt};
5929     my @tokens = (
5930 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
5931 wakaba 1.114 attributes => $form_attrs,
5932     line => $token->{line}, column => $token->{column}},
5933     {type => START_TAG_TOKEN, tag_name => 'hr',
5934     line => $token->{line}, column => $token->{column}},
5935     {type => START_TAG_TOKEN, tag_name => 'p',
5936     line => $token->{line}, column => $token->{column}},
5937     {type => START_TAG_TOKEN, tag_name => 'label',
5938     line => $token->{line}, column => $token->{column}},
5939 wakaba 1.52 );
5940     if ($prompt_attr) {
5941 wakaba 1.79 !!!cp ('t390');
5942 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
5943 wakaba 1.118 #line => $token->{line}, column => $token->{column},
5944     };
5945 wakaba 1.1 } else {
5946 wakaba 1.79 !!!cp ('t391');
5947 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
5948 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
5949 wakaba 1.118 #line => $token->{line}, column => $token->{column},
5950     }; # SHOULD
5951 wakaba 1.52 ## TODO: make this configurable
5952 wakaba 1.1 }
5953 wakaba 1.52 push @tokens,
5954 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
5955     line => $token->{line}, column => $token->{column}},
5956 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
5957 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
5958     line => $token->{line}, column => $token->{column}},
5959     {type => END_TAG_TOKEN, tag_name => 'p',
5960     line => $token->{line}, column => $token->{column}},
5961     {type => START_TAG_TOKEN, tag_name => 'hr',
5962     line => $token->{line}, column => $token->{column}},
5963     {type => END_TAG_TOKEN, tag_name => 'form',
5964     line => $token->{line}, column => $token->{column}};
5965 wakaba 1.52 $token = shift @tokens;
5966     !!!back-token (@tokens);
5967 wakaba 1.53 redo B;
5968 wakaba 1.52 }
5969     } elsif ($token->{tag_name} eq 'textarea') {
5970     my $tag_name = $token->{tag_name};
5971     my $el;
5972 wakaba 1.116 !!!create-element ($el, $token->{tag_name}, $token->{attributes}, $token);
5973 wakaba 1.52
5974     ## TODO: $self->{form_element} if defined
5975     $self->{content_model} = RCDATA_CONTENT_MODEL;
5976     delete $self->{escape}; # MUST
5977    
5978     $insert->($el);
5979    
5980     my $text = '';
5981     !!!next-token;
5982 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5983 wakaba 1.52 $token->{data} =~ s/^\x0A//;
5984 wakaba 1.51 unless (length $token->{data}) {
5985 wakaba 1.79 !!!cp ('t392');
5986 wakaba 1.51 !!!next-token;
5987 wakaba 1.79 } else {
5988     !!!cp ('t393');
5989 wakaba 1.51 }
5990 wakaba 1.79 } else {
5991     !!!cp ('t394');
5992 wakaba 1.51 }
5993 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
5994 wakaba 1.79 !!!cp ('t395');
5995 wakaba 1.52 $text .= $token->{data};
5996     !!!next-token;
5997     }
5998     if (length $text) {
5999 wakaba 1.79 !!!cp ('t396');
6000 wakaba 1.52 $el->manakai_append_text ($text);
6001     }
6002    
6003     $self->{content_model} = PCDATA_CONTENT_MODEL;
6004 wakaba 1.51
6005 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
6006 wakaba 1.52 $token->{tag_name} eq $tag_name) {
6007 wakaba 1.79 !!!cp ('t397');
6008 wakaba 1.52 ## Ignore the token
6009     } else {
6010 wakaba 1.79 !!!cp ('t398');
6011 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
6012 wakaba 1.51 }
6013 wakaba 1.52 !!!next-token;
6014 wakaba 1.53 redo B;
6015 wakaba 1.52 } elsif ({
6016     caption => 1, col => 1, colgroup => 1, frame => 1,
6017     frameset => 1, head => 1, option => 1, optgroup => 1,
6018     tbody => 1, td => 1, tfoot => 1, th => 1,
6019     thead => 1, tr => 1,
6020     }->{$token->{tag_name}}) {
6021 wakaba 1.79 !!!cp ('t401');
6022 wakaba 1.113 !!!parse-error (type => 'in body:'.$token->{tag_name}, token => $token);
6023 wakaba 1.52 ## Ignore the token
6024     !!!next-token;
6025 wakaba 1.53 redo B;
6026 wakaba 1.52
6027     ## ISSUE: An issue on HTML5 new elements in the spec.
6028     } else {
6029 wakaba 1.110 if ($token->{tag_name} eq 'image') {
6030     !!!cp ('t384');
6031 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
6032 wakaba 1.110 $token->{tag_name} = 'img';
6033     } else {
6034     !!!cp ('t385');
6035     }
6036    
6037     ## NOTE: There is an "as if <br>" code clone.
6038 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
6039    
6040 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6041 wakaba 1.109
6042 wakaba 1.110 if ({
6043     applet => 1, marquee => 1, object => 1,
6044     }->{$token->{tag_name}}) {
6045     !!!cp ('t380');
6046     push @$active_formatting_elements, ['#marker', ''];
6047     } elsif ({
6048     b => 1, big => 1, em => 1, font => 1, i => 1,
6049     s => 1, small => 1, strile => 1,
6050     strong => 1, tt => 1, u => 1,
6051     }->{$token->{tag_name}}) {
6052     !!!cp ('t375');
6053     push @$active_formatting_elements, $self->{open_elements}->[-1];
6054     } elsif ($token->{tag_name} eq 'input') {
6055     !!!cp ('t388');
6056     ## TODO: associate with $self->{form_element} if defined
6057     pop @{$self->{open_elements}};
6058     } elsif ({
6059     area => 1, basefont => 1, bgsound => 1, br => 1,
6060     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
6061     #image => 1,
6062     }->{$token->{tag_name}}) {
6063     !!!cp ('t388.1');
6064     pop @{$self->{open_elements}};
6065     } elsif ($token->{tag_name} eq 'select') {
6066 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
6067    
6068     if ($self->{insertion_mode} & TABLE_IMS or
6069     $self->{insertion_mode} & BODY_TABLE_IMS or
6070     $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6071     !!!cp ('t400.1');
6072     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
6073     } else {
6074     !!!cp ('t400.2');
6075     $self->{insertion_mode} = IN_SELECT_IM;
6076     }
6077 wakaba 1.110 } else {
6078     !!!cp ('t402');
6079 wakaba 1.109 }
6080 wakaba 1.51
6081 wakaba 1.52 !!!next-token;
6082 wakaba 1.53 redo B;
6083 wakaba 1.52 }
6084 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6085 wakaba 1.52 if ($token->{tag_name} eq 'body') {
6086 wakaba 1.107 ## has a |body| element in scope
6087     my $i;
6088 wakaba 1.111 INSCOPE: {
6089     for (reverse @{$self->{open_elements}}) {
6090     if ($_->[1] eq 'body') {
6091     !!!cp ('t405');
6092     $i = $_;
6093     last INSCOPE;
6094     } elsif ({
6095     applet => 1, table => 1, caption => 1, td => 1, th => 1,
6096     button => 1, marquee => 1, object => 1, html => 1,
6097     }->{$_->[1]}) {
6098     !!!cp ('t405.1');
6099     last;
6100     }
6101 wakaba 1.52 }
6102 wakaba 1.111
6103     !!!parse-error (type => 'start tag not allowed',
6104 wakaba 1.113 value => $token->{tag_name}, token => $token);
6105 wakaba 1.107 ## NOTE: Ignore the token.
6106 wakaba 1.52 !!!next-token;
6107 wakaba 1.53 redo B;
6108 wakaba 1.111 } # INSCOPE
6109 wakaba 1.107
6110     for (@{$self->{open_elements}}) {
6111     unless ({
6112     dd => 1, dt => 1, li => 1, p => 1, td => 1,
6113     th => 1, tr => 1, body => 1, html => 1,
6114     tbody => 1, tfoot => 1, thead => 1,
6115     }->{$_->[1]}) {
6116     !!!cp ('t403');
6117 wakaba 1.113 !!!parse-error (type => 'not closed:'.$_->[1], token => $token);
6118 wakaba 1.107 last;
6119     } else {
6120     !!!cp ('t404');
6121     }
6122     }
6123    
6124     $self->{insertion_mode} = AFTER_BODY_IM;
6125     !!!next-token;
6126     redo B;
6127 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
6128     if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
6129     ## ISSUE: There is an issue in the spec.
6130     if ($self->{open_elements}->[-1]->[1] ne 'body') {
6131 wakaba 1.79 !!!cp ('t406');
6132 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1], token => $token);
6133 wakaba 1.79 } else {
6134     !!!cp ('t407');
6135 wakaba 1.1 }
6136 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6137 wakaba 1.52 ## reprocess
6138 wakaba 1.53 redo B;
6139 wakaba 1.51 } else {
6140 wakaba 1.79 !!!cp ('t408');
6141 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6142 wakaba 1.52 ## Ignore the token
6143     !!!next-token;
6144 wakaba 1.53 redo B;
6145 wakaba 1.51 }
6146 wakaba 1.52 } elsif ({
6147     address => 1, blockquote => 1, center => 1, dir => 1,
6148     div => 1, dl => 1, fieldset => 1, listing => 1,
6149     menu => 1, ol => 1, pre => 1, ul => 1,
6150     dd => 1, dt => 1, li => 1,
6151 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6152 wakaba 1.52 }->{$token->{tag_name}}) {
6153     ## has an element in scope
6154     my $i;
6155     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6156     my $node = $self->{open_elements}->[$_];
6157     if ($node->[1] eq $token->{tag_name}) {
6158 wakaba 1.79 !!!cp ('t410');
6159 wakaba 1.52 $i = $_;
6160 wakaba 1.87 last INSCOPE;
6161 wakaba 1.52 } elsif ({
6162 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
6163 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
6164     }->{$node->[1]}) {
6165 wakaba 1.79 !!!cp ('t411');
6166 wakaba 1.52 last INSCOPE;
6167 wakaba 1.51 }
6168 wakaba 1.52 } # INSCOPE
6169 wakaba 1.89
6170     unless (defined $i) { # has an element in scope
6171     !!!cp ('t413');
6172 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6173 wakaba 1.89 } else {
6174     ## Step 1. generate implied end tags
6175     while ({
6176     dd => ($token->{tag_name} ne 'dd'),
6177     dt => ($token->{tag_name} ne 'dt'),
6178     li => ($token->{tag_name} ne 'li'),
6179     p => 1,
6180     }->{$self->{open_elements}->[-1]->[1]}) {
6181     !!!cp ('t409');
6182     pop @{$self->{open_elements}};
6183     }
6184    
6185     ## Step 2.
6186     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6187 wakaba 1.79 !!!cp ('t412');
6188 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
6189 wakaba 1.51 } else {
6190 wakaba 1.89 !!!cp ('t414');
6191 wakaba 1.51 }
6192 wakaba 1.89
6193     ## Step 3.
6194 wakaba 1.52 splice @{$self->{open_elements}}, $i;
6195 wakaba 1.89
6196     ## Step 4.
6197     $clear_up_to_marker->()
6198     if {
6199 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6200 wakaba 1.89 }->{$token->{tag_name}};
6201 wakaba 1.51 }
6202 wakaba 1.52 !!!next-token;
6203 wakaba 1.53 redo B;
6204 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
6205 wakaba 1.92 undef $self->{form_element};
6206    
6207 wakaba 1.52 ## has an element in scope
6208 wakaba 1.92 my $i;
6209 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6210     my $node = $self->{open_elements}->[$_];
6211     if ($node->[1] eq $token->{tag_name}) {
6212 wakaba 1.79 !!!cp ('t418');
6213 wakaba 1.92 $i = $_;
6214 wakaba 1.52 last INSCOPE;
6215     } elsif ({
6216 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
6217 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
6218     }->{$node->[1]}) {
6219 wakaba 1.79 !!!cp ('t419');
6220 wakaba 1.52 last INSCOPE;
6221     }
6222     } # INSCOPE
6223 wakaba 1.92
6224     unless (defined $i) { # has an element in scope
6225 wakaba 1.79 !!!cp ('t421');
6226 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6227 wakaba 1.92 } else {
6228     ## Step 1. generate implied end tags
6229     while ({
6230     dd => 1, dt => 1, li => 1, p => 1,
6231     }->{$self->{open_elements}->[-1]->[1]}) {
6232     !!!cp ('t417');
6233     pop @{$self->{open_elements}};
6234     }
6235    
6236     ## Step 2.
6237     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6238     !!!cp ('t417.1');
6239 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
6240 wakaba 1.92 } else {
6241     !!!cp ('t420');
6242     }
6243    
6244     ## Step 3.
6245     splice @{$self->{open_elements}}, $i;
6246 wakaba 1.52 }
6247    
6248     !!!next-token;
6249 wakaba 1.53 redo B;
6250 wakaba 1.52 } elsif ({
6251     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6252     }->{$token->{tag_name}}) {
6253     ## has an element in scope
6254     my $i;
6255     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6256     my $node = $self->{open_elements}->[$_];
6257     if ({
6258     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6259     }->{$node->[1]}) {
6260 wakaba 1.79 !!!cp ('t423');
6261 wakaba 1.52 $i = $_;
6262     last INSCOPE;
6263     } elsif ({
6264 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
6265 wakaba 1.52 button => 1, marquee => 1, object => 1, html => 1,
6266     }->{$node->[1]}) {
6267 wakaba 1.79 !!!cp ('t424');
6268 wakaba 1.52 last INSCOPE;
6269 wakaba 1.51 }
6270 wakaba 1.52 } # INSCOPE
6271 wakaba 1.93
6272     unless (defined $i) { # has an element in scope
6273     !!!cp ('t425.1');
6274 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6275 wakaba 1.79 } else {
6276 wakaba 1.93 ## Step 1. generate implied end tags
6277     while ({
6278     dd => 1, dt => 1, li => 1, p => 1,
6279     }->{$self->{open_elements}->[-1]->[1]}) {
6280     !!!cp ('t422');
6281     pop @{$self->{open_elements}};
6282     }
6283    
6284     ## Step 2.
6285     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6286     !!!cp ('t425');
6287 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6288 wakaba 1.93 } else {
6289     !!!cp ('t426');
6290     }
6291    
6292     ## Step 3.
6293     splice @{$self->{open_elements}}, $i;
6294 wakaba 1.36 }
6295 wakaba 1.52
6296     !!!next-token;
6297 wakaba 1.53 redo B;
6298 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
6299     ## has an element in scope
6300     my $i;
6301     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6302     my $node = $self->{open_elements}->[$_];
6303     if ($node->[1] eq $token->{tag_name}) {
6304     !!!cp ('t410.1');
6305     $i = $_;
6306 wakaba 1.88 last INSCOPE;
6307 wakaba 1.87 } elsif ({
6308 wakaba 1.103 applet => 1, table => 1, caption => 1, td => 1, th => 1,
6309 wakaba 1.87 button => 1, marquee => 1, object => 1, html => 1,
6310     }->{$node->[1]}) {
6311     !!!cp ('t411.1');
6312     last INSCOPE;
6313     }
6314     } # INSCOPE
6315 wakaba 1.91
6316     if (defined $i) {
6317     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6318 wakaba 1.87 !!!cp ('t412.1');
6319 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
6320 wakaba 1.87 } else {
6321 wakaba 1.91 !!!cp ('t414.1');
6322 wakaba 1.87 }
6323 wakaba 1.91
6324 wakaba 1.87 splice @{$self->{open_elements}}, $i;
6325     } else {
6326 wakaba 1.91 !!!cp ('t413.1');
6327 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6328 wakaba 1.91
6329 wakaba 1.87 !!!cp ('t415.1');
6330     ## As if <p>, then reprocess the current token
6331     my $el;
6332 wakaba 1.116 !!!create-element ($el, 'p',, $token);
6333 wakaba 1.87 $insert->($el);
6334 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
6335 wakaba 1.87 }
6336 wakaba 1.91
6337 wakaba 1.87 !!!next-token;
6338     redo B;
6339 wakaba 1.52 } elsif ({
6340     a => 1,
6341     b => 1, big => 1, em => 1, font => 1, i => 1,
6342     nobr => 1, s => 1, small => 1, strile => 1,
6343     strong => 1, tt => 1, u => 1,
6344     }->{$token->{tag_name}}) {
6345 wakaba 1.79 !!!cp ('t427');
6346 wakaba 1.113 $formatting_end_tag->($token);
6347 wakaba 1.53 redo B;
6348 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
6349 wakaba 1.79 !!!cp ('t428');
6350 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:br', token => $token);
6351 wakaba 1.52
6352     ## As if <br>
6353     $reconstruct_active_formatting_elements->($insert_to_current);
6354    
6355     my $el;
6356 wakaba 1.116 !!!create-element ($el, 'br',, $token);
6357 wakaba 1.52 $insert->($el);
6358    
6359     ## Ignore the token.
6360     !!!next-token;
6361 wakaba 1.53 redo B;
6362 wakaba 1.52 } elsif ({
6363     caption => 1, col => 1, colgroup => 1, frame => 1,
6364     frameset => 1, head => 1, option => 1, optgroup => 1,
6365     tbody => 1, td => 1, tfoot => 1, th => 1,
6366     thead => 1, tr => 1,
6367     area => 1, basefont => 1, bgsound => 1,
6368     embed => 1, hr => 1, iframe => 1, image => 1,
6369     img => 1, input => 1, isindex => 1, noembed => 1,
6370     noframes => 1, param => 1, select => 1, spacer => 1,
6371     table => 1, textarea => 1, wbr => 1,
6372     noscript => 0, ## TODO: if scripting is enabled
6373     }->{$token->{tag_name}}) {
6374 wakaba 1.79 !!!cp ('t429');
6375 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6376 wakaba 1.52 ## Ignore the token
6377     !!!next-token;
6378 wakaba 1.53 redo B;
6379 wakaba 1.52
6380     ## ISSUE: Issue on HTML5 new elements in spec
6381    
6382     } else {
6383     ## Step 1
6384     my $node_i = -1;
6385     my $node = $self->{open_elements}->[$node_i];
6386 wakaba 1.51
6387 wakaba 1.52 ## Step 2
6388     S2: {
6389     if ($node->[1] eq $token->{tag_name}) {
6390     ## Step 1
6391     ## generate implied end tags
6392 wakaba 1.86 while ({
6393     dd => 1, dt => 1, li => 1, p => 1,
6394     }->{$self->{open_elements}->[-1]->[1]}) {
6395 wakaba 1.79 !!!cp ('t430');
6396 wakaba 1.83 ## ISSUE: Can this case be reached?
6397 wakaba 1.86 pop @{$self->{open_elements}};
6398 wakaba 1.52 }
6399    
6400     ## Step 2
6401     if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
6402 wakaba 1.79 !!!cp ('t431');
6403 wakaba 1.58 ## NOTE: <x><y></x>
6404 wakaba 1.113 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1], token => $token);
6405 wakaba 1.79 } else {
6406     !!!cp ('t432');
6407 wakaba 1.52 }
6408    
6409     ## Step 3
6410     splice @{$self->{open_elements}}, $node_i;
6411 wakaba 1.51
6412 wakaba 1.1 !!!next-token;
6413 wakaba 1.52 last S2;
6414 wakaba 1.1 } else {
6415 wakaba 1.52 ## Step 3
6416     if (not $formatting_category->{$node->[1]} and
6417     #not $phrasing_category->{$node->[1]} and
6418     ($special_category->{$node->[1]} or
6419     $scoping_category->{$node->[1]})) {
6420 wakaba 1.79 !!!cp ('t433');
6421 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6422 wakaba 1.52 ## Ignore the token
6423     !!!next-token;
6424     last S2;
6425     }
6426 wakaba 1.79
6427     !!!cp ('t434');
6428 wakaba 1.1 }
6429 wakaba 1.52
6430     ## Step 4
6431     $node_i--;
6432     $node = $self->{open_elements}->[$node_i];
6433    
6434     ## Step 5;
6435     redo S2;
6436     } # S2
6437 wakaba 1.53 redo B;
6438 wakaba 1.1 }
6439     }
6440 wakaba 1.52 redo B;
6441 wakaba 1.1 } # B
6442    
6443     ## Stop parsing # MUST
6444    
6445     ## TODO: script stuffs
6446 wakaba 1.3 } # _tree_construct_main
6447    
6448     sub set_inner_html ($$$) {
6449     my $class = shift;
6450     my $node = shift;
6451     my $s = \$_[0];
6452     my $onerror = $_[1];
6453    
6454 wakaba 1.63 ## ISSUE: Should {confident} be true?
6455    
6456 wakaba 1.3 my $nt = $node->node_type;
6457     if ($nt == 9) {
6458     # MUST
6459    
6460     ## Step 1 # MUST
6461     ## TODO: If the document has an active parser, ...
6462     ## ISSUE: There is an issue in the spec.
6463    
6464     ## Step 2 # MUST
6465     my @cn = @{$node->child_nodes};
6466     for (@cn) {
6467     $node->remove_child ($_);
6468     }
6469    
6470     ## Step 3, 4, 5 # MUST
6471     $class->parse_string ($$s => $node, $onerror);
6472     } elsif ($nt == 1) {
6473     ## TODO: If non-html element
6474    
6475     ## NOTE: Most of this code is copied from |parse_string|
6476    
6477     ## Step 1 # MUST
6478 wakaba 1.14 my $this_doc = $node->owner_document;
6479     my $doc = $this_doc->implementation->create_document;
6480 wakaba 1.18 $doc->manakai_is_html (1);
6481 wakaba 1.3 my $p = $class->new;
6482     $p->{document} = $doc;
6483    
6484 wakaba 1.84 ## Step 8 # MUST
6485 wakaba 1.3 my $i = 0;
6486     my $line = 1;
6487     my $column = 0;
6488 wakaba 1.76 $p->{set_next_char} = sub {
6489 wakaba 1.3 my $self = shift;
6490 wakaba 1.14
6491 wakaba 1.76 pop @{$self->{prev_char}};
6492     unshift @{$self->{prev_char}}, $self->{next_char};
6493 wakaba 1.14
6494 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
6495     $self->{next_char} = ord substr $$s, $i++, 1;
6496 wakaba 1.3 $column++;
6497 wakaba 1.4
6498 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
6499 wakaba 1.4 $line++;
6500     $column = 0;
6501 wakaba 1.79 !!!cp ('i1');
6502 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
6503 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6504 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
6505 wakaba 1.3 $line++;
6506 wakaba 1.4 $column = 0;
6507 wakaba 1.79 !!!cp ('i2');
6508 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
6509     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6510 wakaba 1.79 !!!cp ('i3');
6511 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
6512 wakaba 1.79 !!!cp ('i4');
6513 wakaba 1.14 !!!parse-error (type => 'NULL');
6514 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6515 wakaba 1.3 }
6516     };
6517 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
6518     $p->{next_char} = -1;
6519 wakaba 1.3
6520     my $ponerror = $onerror || sub {
6521     my (%opt) = @_;
6522     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
6523     };
6524     $p->{parse_error} = sub {
6525     $ponerror->(@_, line => $line, column => $column);
6526     };
6527    
6528     $p->_initialize_tokenizer;
6529     $p->_initialize_tree_constructor;
6530    
6531     ## Step 2
6532 wakaba 1.71 my $node_ln = $node->manakai_local_name;
6533 wakaba 1.40 $p->{content_model} = {
6534     title => RCDATA_CONTENT_MODEL,
6535     textarea => RCDATA_CONTENT_MODEL,
6536     style => CDATA_CONTENT_MODEL,
6537     script => CDATA_CONTENT_MODEL,
6538     xmp => CDATA_CONTENT_MODEL,
6539     iframe => CDATA_CONTENT_MODEL,
6540     noembed => CDATA_CONTENT_MODEL,
6541     noframes => CDATA_CONTENT_MODEL,
6542     noscript => CDATA_CONTENT_MODEL,
6543     plaintext => PLAINTEXT_CONTENT_MODEL,
6544     }->{$node_ln};
6545     $p->{content_model} = PCDATA_CONTENT_MODEL
6546     unless defined $p->{content_model};
6547     ## ISSUE: What is "the name of the element"? local name?
6548 wakaba 1.3
6549     $p->{inner_html_node} = [$node, $node_ln];
6550    
6551 wakaba 1.84 ## Step 3
6552 wakaba 1.3 my $root = $doc->create_element_ns
6553     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6554    
6555 wakaba 1.84 ## Step 4 # MUST
6556 wakaba 1.3 $doc->append_child ($root);
6557    
6558 wakaba 1.84 ## Step 5 # MUST
6559 wakaba 1.3 push @{$p->{open_elements}}, [$root, 'html'];
6560    
6561     undef $p->{head_element};
6562    
6563 wakaba 1.84 ## Step 6 # MUST
6564 wakaba 1.3 $p->_reset_insertion_mode;
6565    
6566 wakaba 1.84 ## Step 7 # MUST
6567 wakaba 1.3 my $anode = $node;
6568     AN: while (defined $anode) {
6569     if ($anode->node_type == 1) {
6570     my $nsuri = $anode->namespace_uri;
6571     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6572 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
6573 wakaba 1.79 !!!cp ('i5');
6574 wakaba 1.3 $p->{form_element} = $anode;
6575     last AN;
6576     }
6577     }
6578     }
6579     $anode = $anode->parent_node;
6580     } # AN
6581    
6582 wakaba 1.84 ## Step 9 # MUST
6583 wakaba 1.3 {
6584     my $self = $p;
6585     !!!next-token;
6586     }
6587     $p->_tree_construction_main;
6588    
6589 wakaba 1.84 ## Step 10 # MUST
6590 wakaba 1.3 my @cn = @{$node->child_nodes};
6591     for (@cn) {
6592     $node->remove_child ($_);
6593     }
6594     ## ISSUE: mutation events? read-only?
6595    
6596 wakaba 1.84 ## Step 11 # MUST
6597 wakaba 1.3 @cn = @{$root->child_nodes};
6598     for (@cn) {
6599 wakaba 1.14 $this_doc->adopt_node ($_);
6600 wakaba 1.3 $node->append_child ($_);
6601     }
6602 wakaba 1.14 ## ISSUE: mutation events?
6603 wakaba 1.3
6604     $p->_terminate_tree_constructor;
6605     } else {
6606     die "$0: |set_inner_html| is not defined for node of type $nt";
6607     }
6608     } # set_inner_html
6609    
6610     } # tree construction stage
6611 wakaba 1.1
6612 wakaba 1.63 package Whatpm::HTML::RestartParser;
6613     push our @ISA, 'Error';
6614    
6615 wakaba 1.1 1;
6616 wakaba 1.119 # $Date: 2008/03/20 01:34:00 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24