/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.63 - (hide annotations) (download)
Sun Nov 4 04:15:06 2007 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.62: +21 -2 lines
++ whatpm/Whatpm/ChangeLog	4 Nov 2007 04:14:45 -0000
	* HTML.pm.src: Support for application cache selection algorithm
	callback.

2007-11-04  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.61 our $VERSION=do{my @r=(q$Revision: 1.60 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 ## ISSUE:
6     ## var doc = implementation.createDocument (null, null, null);
7     ## doc.write ('');
8     ## alert (doc.compatMode);
9 wakaba 1.1
10 wakaba 1.31 ## ISSUE: HTML5 revision 967 says that the encoding layer MUST NOT
11     ## strip BOM and the HTML layer MUST ignore it. Whether we can do it
12     ## is not yet clear.
13     ## "{U+FEFF}..." in UTF-16BE/UTF-16LE is three or four characters?
14     ## "{U+FEFF}..." in GB18030?
15    
16 wakaba 1.1 my $permitted_slash_tag_name = {
17     base => 1,
18     link => 1,
19     meta => 1,
20     hr => 1,
21     br => 1,
22     img=> 1,
23     embed => 1,
24     param => 1,
25     area => 1,
26     col => 1,
27     input => 1,
28     };
29    
30 wakaba 1.4 my $c1_entity_char = {
31 wakaba 1.9 0x80 => 0x20AC,
32     0x81 => 0xFFFD,
33     0x82 => 0x201A,
34     0x83 => 0x0192,
35     0x84 => 0x201E,
36     0x85 => 0x2026,
37     0x86 => 0x2020,
38     0x87 => 0x2021,
39     0x88 => 0x02C6,
40     0x89 => 0x2030,
41     0x8A => 0x0160,
42     0x8B => 0x2039,
43     0x8C => 0x0152,
44     0x8D => 0xFFFD,
45     0x8E => 0x017D,
46     0x8F => 0xFFFD,
47     0x90 => 0xFFFD,
48     0x91 => 0x2018,
49     0x92 => 0x2019,
50     0x93 => 0x201C,
51     0x94 => 0x201D,
52     0x95 => 0x2022,
53     0x96 => 0x2013,
54     0x97 => 0x2014,
55     0x98 => 0x02DC,
56     0x99 => 0x2122,
57     0x9A => 0x0161,
58     0x9B => 0x203A,
59     0x9C => 0x0153,
60     0x9D => 0xFFFD,
61     0x9E => 0x017E,
62     0x9F => 0x0178,
63 wakaba 1.4 }; # $c1_entity_char
64 wakaba 1.1
65     my $special_category = {
66     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
67     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
68     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
69     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
70     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
71     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
72     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
73     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
74     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
75     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
76     };
77     my $scoping_category = {
78     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
79     table => 1, td => 1, th => 1,
80     };
81     my $formatting_category = {
82     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
83     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
84     };
85     # $phrasing_category: all other elements
86    
87     sub parse_string ($$$;$) {
88     my $self = shift->new;
89     my $s = \$_[0];
90     $self->{document} = $_[1];
91    
92 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
93    
94 wakaba 1.1 my $i = 0;
95 wakaba 1.3 my $line = 1;
96     my $column = 0;
97 wakaba 1.1 $self->{set_next_input_character} = sub {
98     my $self = shift;
99 wakaba 1.13
100     pop @{$self->{prev_input_character}};
101     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
102    
103 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
104     $self->{next_input_character} = ord substr $$s, $i++, 1;
105 wakaba 1.3 $column++;
106 wakaba 1.1
107 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
108     $line++;
109     $column = 0;
110     } elsif ($self->{next_input_character} == 0x000D) { # CR
111 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
112 wakaba 1.1 $self->{next_input_character} = 0x000A; # LF # MUST
113 wakaba 1.3 $line++;
114 wakaba 1.4 $column = 0;
115 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
116     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
117     } elsif ($self->{next_input_character} == 0x0000) { # NULL
118 wakaba 1.8 $self->{parse_error}-> (type => 'NULL');
119 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
120     }
121     };
122 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
123     $self->{next_input_character} = -1;
124 wakaba 1.1
125 wakaba 1.3 my $onerror = $_[2] || sub {
126     my (%opt) = @_;
127     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
128     };
129     $self->{parse_error} = sub {
130     $onerror->(@_, line => $line, column => $column);
131 wakaba 1.1 };
132    
133     $self->_initialize_tokenizer;
134     $self->_initialize_tree_constructor;
135     $self->_construct_tree;
136     $self->_terminate_tree_constructor;
137    
138     return $self->{document};
139     } # parse_string
140    
141     sub new ($) {
142     my $class = shift;
143     my $self = bless {}, $class;
144     $self->{set_next_input_character} = sub {
145     $self->{next_input_character} = -1;
146     };
147     $self->{parse_error} = sub {
148     #
149     };
150 wakaba 1.63 $self->{application_cache_selection} = sub {
151     #
152     };
153 wakaba 1.1 return $self;
154     } # new
155    
156 wakaba 1.41 sub CM_ENTITY () { 0b001 } # & markup in data
157     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
158     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
159    
160     sub PLAINTEXT_CONTENT_MODEL () { 0 }
161     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
162     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
163     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
164    
165 wakaba 1.59 sub DATA_STATE () { 0 }
166     sub ENTITY_DATA_STATE () { 1 }
167     sub TAG_OPEN_STATE () { 2 }
168     sub CLOSE_TAG_OPEN_STATE () { 3 }
169     sub TAG_NAME_STATE () { 4 }
170     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
171     sub ATTRIBUTE_NAME_STATE () { 6 }
172     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
173     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
174     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
175     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
176     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
177     sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
178     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
179     sub COMMENT_START_STATE () { 14 }
180     sub COMMENT_START_DASH_STATE () { 15 }
181     sub COMMENT_STATE () { 16 }
182     sub COMMENT_END_STATE () { 17 }
183     sub COMMENT_END_DASH_STATE () { 18 }
184     sub BOGUS_COMMENT_STATE () { 19 }
185     sub DOCTYPE_STATE () { 20 }
186     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
187     sub DOCTYPE_NAME_STATE () { 22 }
188     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
189     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
190     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
191     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
192     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
193     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
194     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
195     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
196     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
197     sub BOGUS_DOCTYPE_STATE () { 32 }
198    
199 wakaba 1.57 sub DOCTYPE_TOKEN () { 1 }
200     sub COMMENT_TOKEN () { 2 }
201     sub START_TAG_TOKEN () { 3 }
202     sub END_TAG_TOKEN () { 4 }
203     sub END_OF_FILE_TOKEN () { 5 }
204     sub CHARACTER_TOKEN () { 6 }
205    
206 wakaba 1.56 sub AFTER_HTML_IMS () { 0b100 }
207     sub HEAD_IMS () { 0b1000 }
208     sub BODY_IMS () { 0b10000 }
209 wakaba 1.58 sub BODY_TABLE_IMS () { 0b100000 }
210 wakaba 1.56 sub TABLE_IMS () { 0b1000000 }
211 wakaba 1.58 sub ROW_IMS () { 0b10000000 }
212 wakaba 1.56 sub BODY_AFTER_IMS () { 0b100000000 }
213     sub FRAME_IMS () { 0b1000000000 }
214    
215     sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
216     sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
217     sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
218     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
219     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
220     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
221     sub IN_BODY_IM () { BODY_IMS }
222 wakaba 1.58 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
223     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
224     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
225     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
226 wakaba 1.56 sub IN_TABLE_IM () { TABLE_IMS }
227     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
228     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
229     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
230     sub IN_SELECT_IM () { 0b01 }
231     sub IN_COLUMN_GROUP_IM () { 0b10 }
232    
233 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
234    
235     sub _initialize_tokenizer ($) {
236     my $self = shift;
237 wakaba 1.59 $self->{state} = DATA_STATE; # MUST
238 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
239 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
240     undef $self->{current_attribute};
241     undef $self->{last_emitted_start_tag_name};
242     undef $self->{last_attribute_value_state};
243     $self->{char} = [];
244     # $self->{next_input_character}
245    
246     if (@{$self->{char}}) {
247     $self->{next_input_character} = shift @{$self->{char}};
248     } else {
249     $self->{set_next_input_character}->($self);
250     }
251    
252     $self->{token} = [];
253 wakaba 1.18 # $self->{escape}
254 wakaba 1.1 } # _initialize_tokenizer
255    
256     ## A token has:
257 wakaba 1.57 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
258     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
259     ## ->{name} (DOCTYPE_TOKEN)
260     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
261     ## ->{public_identifier} (DOCTYPE_TOKEN)
262     ## ->{system_identifier} (DOCTYPE_TOKEN)
263     ## ->{correct} == 1 or 0 (DOCTYPE_TOKEN)
264     ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
265     ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
266 wakaba 1.1
267     ## Emitted token MUST immediately be handled by the tree construction state.
268    
269     ## Before each step, UA MAY check to see if either one of the scripts in
270     ## "list of scripts that will execute as soon as possible" or the first
271     ## script in the "list of scripts that will execute asynchronously",
272     ## has completed loading. If one has, then it MUST be executed
273     ## and removed from the list.
274    
275 wakaba 1.61 ## NOTE: HTML5 "Writing HTML documents" section, applied to
276     ## documents and not to user agents and conformance checkers,
277     ## contains some requirements that are not detected by the
278     ## parsing algorithm:
279     ## - Some requirements on character encoding declarations. ## TODO
280     ## - "Elements MUST NOT contain content that their content model disallows."
281     ## ... Some are parse error, some are not (will be reported by c.c.).
282     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
283     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
284     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
285    
286     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
287     ## be detected by the HTML5 parsing algorithm:
288     ## - Text,
289    
290 wakaba 1.1 sub _get_next_token ($) {
291     my $self = shift;
292     if (@{$self->{token}}) {
293     return shift @{$self->{token}};
294     }
295    
296     A: {
297 wakaba 1.59 if ($self->{state} == DATA_STATE) {
298 wakaba 1.1 if ($self->{next_input_character} == 0x0026) { # &
299 wakaba 1.41 if ($self->{content_model} & CM_ENTITY) { # PCDATA | RCDATA
300 wakaba 1.59 $self->{state} = ENTITY_DATA_STATE;
301 wakaba 1.1
302     if (@{$self->{char}}) {
303     $self->{next_input_character} = shift @{$self->{char}};
304     } else {
305     $self->{set_next_input_character}->($self);
306     }
307    
308     redo A;
309     } else {
310     #
311     }
312 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
313 wakaba 1.41 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
314 wakaba 1.13 unless ($self->{escape}) {
315     if ($self->{prev_input_character}->[0] == 0x002D and # -
316     $self->{prev_input_character}->[1] == 0x0021 and # !
317     $self->{prev_input_character}->[2] == 0x003C) { # <
318     $self->{escape} = 1;
319     }
320     }
321     }
322    
323     #
324 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
325 wakaba 1.41 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
326     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
327 wakaba 1.13 not $self->{escape})) {
328 wakaba 1.59 $self->{state} = TAG_OPEN_STATE;
329 wakaba 1.1
330     if (@{$self->{char}}) {
331     $self->{next_input_character} = shift @{$self->{char}};
332     } else {
333     $self->{set_next_input_character}->($self);
334     }
335    
336     redo A;
337     } else {
338     #
339     }
340 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
341     if ($self->{escape} and
342 wakaba 1.41 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
343 wakaba 1.13 if ($self->{prev_input_character}->[0] == 0x002D and # -
344     $self->{prev_input_character}->[1] == 0x002D) { # -
345     delete $self->{escape};
346     }
347     }
348    
349     #
350 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
351 wakaba 1.57 return ({type => END_OF_FILE_TOKEN});
352 wakaba 1.1 last A; ## TODO: ok?
353     }
354     # Anything else
355 wakaba 1.57 my $token = {type => CHARACTER_TOKEN,
356 wakaba 1.1 data => chr $self->{next_input_character}};
357     ## Stay in the data state
358    
359     if (@{$self->{char}}) {
360     $self->{next_input_character} = shift @{$self->{char}};
361     } else {
362     $self->{set_next_input_character}->($self);
363     }
364    
365    
366     return ($token);
367    
368     redo A;
369 wakaba 1.59 } elsif ($self->{state} == ENTITY_DATA_STATE) {
370 wakaba 1.1 ## (cannot happen in CDATA state)
371    
372 wakaba 1.26 my $token = $self->_tokenize_attempt_to_consume_an_entity (0);
373 wakaba 1.1
374 wakaba 1.59 $self->{state} = DATA_STATE;
375 wakaba 1.1 # next-input-character is already done
376    
377     unless (defined $token) {
378 wakaba 1.57 return ({type => CHARACTER_TOKEN, data => '&'});
379 wakaba 1.1 } else {
380     return ($token);
381     }
382    
383     redo A;
384 wakaba 1.59 } elsif ($self->{state} == TAG_OPEN_STATE) {
385 wakaba 1.41 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
386 wakaba 1.1 if ($self->{next_input_character} == 0x002F) { # /
387    
388     if (@{$self->{char}}) {
389     $self->{next_input_character} = shift @{$self->{char}};
390     } else {
391     $self->{set_next_input_character}->($self);
392     }
393    
394 wakaba 1.59 $self->{state} = CLOSE_TAG_OPEN_STATE;
395 wakaba 1.1 redo A;
396     } else {
397     ## reconsume
398 wakaba 1.59 $self->{state} = DATA_STATE;
399 wakaba 1.1
400 wakaba 1.57 return ({type => CHARACTER_TOKEN, data => '<'});
401 wakaba 1.1
402     redo A;
403     }
404 wakaba 1.41 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
405 wakaba 1.1 if ($self->{next_input_character} == 0x0021) { # !
406 wakaba 1.59 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
407 wakaba 1.1
408     if (@{$self->{char}}) {
409     $self->{next_input_character} = shift @{$self->{char}};
410     } else {
411     $self->{set_next_input_character}->($self);
412     }
413    
414     redo A;
415     } elsif ($self->{next_input_character} == 0x002F) { # /
416 wakaba 1.59 $self->{state} = CLOSE_TAG_OPEN_STATE;
417 wakaba 1.1
418     if (@{$self->{char}}) {
419     $self->{next_input_character} = shift @{$self->{char}};
420     } else {
421     $self->{set_next_input_character}->($self);
422     }
423    
424     redo A;
425     } elsif (0x0041 <= $self->{next_input_character} and
426     $self->{next_input_character} <= 0x005A) { # A..Z
427     $self->{current_token}
428 wakaba 1.57 = {type => START_TAG_TOKEN,
429 wakaba 1.1 tag_name => chr ($self->{next_input_character} + 0x0020)};
430 wakaba 1.59 $self->{state} = TAG_NAME_STATE;
431 wakaba 1.1
432     if (@{$self->{char}}) {
433     $self->{next_input_character} = shift @{$self->{char}};
434     } else {
435     $self->{set_next_input_character}->($self);
436     }
437    
438     redo A;
439     } elsif (0x0061 <= $self->{next_input_character} and
440     $self->{next_input_character} <= 0x007A) { # a..z
441 wakaba 1.57 $self->{current_token} = {type => START_TAG_TOKEN,
442 wakaba 1.1 tag_name => chr ($self->{next_input_character})};
443 wakaba 1.59 $self->{state} = TAG_NAME_STATE;
444 wakaba 1.1
445     if (@{$self->{char}}) {
446     $self->{next_input_character} = shift @{$self->{char}};
447     } else {
448     $self->{set_next_input_character}->($self);
449     }
450    
451     redo A;
452     } elsif ($self->{next_input_character} == 0x003E) { # >
453 wakaba 1.3 $self->{parse_error}-> (type => 'empty start tag');
454 wakaba 1.59 $self->{state} = DATA_STATE;
455 wakaba 1.1
456     if (@{$self->{char}}) {
457     $self->{next_input_character} = shift @{$self->{char}};
458     } else {
459     $self->{set_next_input_character}->($self);
460     }
461    
462    
463 wakaba 1.57 return ({type => CHARACTER_TOKEN, data => '<>'});
464 wakaba 1.1
465     redo A;
466     } elsif ($self->{next_input_character} == 0x003F) { # ?
467 wakaba 1.3 $self->{parse_error}-> (type => 'pio');
468 wakaba 1.59 $self->{state} = BOGUS_COMMENT_STATE;
469 wakaba 1.1 ## $self->{next_input_character} is intentionally left as is
470     redo A;
471     } else {
472 wakaba 1.3 $self->{parse_error}-> (type => 'bare stago');
473 wakaba 1.59 $self->{state} = DATA_STATE;
474 wakaba 1.1 ## reconsume
475    
476 wakaba 1.57 return ({type => CHARACTER_TOKEN, data => '<'});
477 wakaba 1.1
478     redo A;
479     }
480     } else {
481 wakaba 1.41 die "$0: $self->{content_model} in tag open";
482 wakaba 1.1 }
483 wakaba 1.59 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
484 wakaba 1.41 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
485 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
486 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
487 wakaba 1.23 my @next_char;
488     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
489     push @next_char, $self->{next_input_character};
490     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
491     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
492     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
493    
494 wakaba 1.1 if (@{$self->{char}}) {
495     $self->{next_input_character} = shift @{$self->{char}};
496     } else {
497     $self->{set_next_input_character}->($self);
498     }
499    
500 wakaba 1.23 next TAGNAME;
501     } else {
502     $self->{next_input_character} = shift @next_char; # reconsume
503     unshift @{$self->{char}}, (@next_char);
504 wakaba 1.59 $self->{state} = DATA_STATE;
505 wakaba 1.23
506 wakaba 1.57 return ({type => CHARACTER_TOKEN, data => '</'});
507 wakaba 1.23
508     redo A;
509     }
510     }
511     push @next_char, $self->{next_input_character};
512    
513     unless ($self->{next_input_character} == 0x0009 or # HT
514     $self->{next_input_character} == 0x000A or # LF
515     $self->{next_input_character} == 0x000B or # VT
516     $self->{next_input_character} == 0x000C or # FF
517     $self->{next_input_character} == 0x0020 or # SP
518     $self->{next_input_character} == 0x003E or # >
519     $self->{next_input_character} == 0x002F or # /
520     $self->{next_input_character} == -1) {
521 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
522     unshift @{$self->{char}}, (@next_char);
523 wakaba 1.59 $self->{state} = DATA_STATE;
524 wakaba 1.57 return ({type => CHARACTER_TOKEN, data => '</'});
525 wakaba 1.1 redo A;
526 wakaba 1.23 } else {
527     $self->{next_input_character} = shift @next_char;
528     unshift @{$self->{char}}, (@next_char);
529     # and consume...
530 wakaba 1.1 }
531 wakaba 1.23 } else {
532     ## No start tag token has ever been emitted
533     # next-input-character is already done
534 wakaba 1.59 $self->{state} = DATA_STATE;
535 wakaba 1.57 return ({type => CHARACTER_TOKEN, data => '</'});
536 wakaba 1.1 redo A;
537     }
538     }
539    
540     if (0x0041 <= $self->{next_input_character} and
541     $self->{next_input_character} <= 0x005A) { # A..Z
542 wakaba 1.57 $self->{current_token} = {type => END_TAG_TOKEN,
543 wakaba 1.1 tag_name => chr ($self->{next_input_character} + 0x0020)};
544 wakaba 1.59 $self->{state} = TAG_NAME_STATE;
545 wakaba 1.1
546     if (@{$self->{char}}) {
547     $self->{next_input_character} = shift @{$self->{char}};
548     } else {
549     $self->{set_next_input_character}->($self);
550     }
551    
552     redo A;
553     } elsif (0x0061 <= $self->{next_input_character} and
554     $self->{next_input_character} <= 0x007A) { # a..z
555 wakaba 1.57 $self->{current_token} = {type => END_TAG_TOKEN,
556 wakaba 1.1 tag_name => chr ($self->{next_input_character})};
557 wakaba 1.59 $self->{state} = TAG_NAME_STATE;
558 wakaba 1.1
559     if (@{$self->{char}}) {
560     $self->{next_input_character} = shift @{$self->{char}};
561     } else {
562     $self->{set_next_input_character}->($self);
563     }
564    
565     redo A;
566     } elsif ($self->{next_input_character} == 0x003E) { # >
567 wakaba 1.3 $self->{parse_error}-> (type => 'empty end tag');
568 wakaba 1.59 $self->{state} = DATA_STATE;
569 wakaba 1.1
570     if (@{$self->{char}}) {
571     $self->{next_input_character} = shift @{$self->{char}};
572     } else {
573     $self->{set_next_input_character}->($self);
574     }
575    
576     redo A;
577     } elsif ($self->{next_input_character} == -1) {
578 wakaba 1.3 $self->{parse_error}-> (type => 'bare etago');
579 wakaba 1.59 $self->{state} = DATA_STATE;
580 wakaba 1.1 # reconsume
581    
582 wakaba 1.57 return ({type => CHARACTER_TOKEN, data => '</'});
583 wakaba 1.1
584     redo A;
585     } else {
586 wakaba 1.3 $self->{parse_error}-> (type => 'bogus end tag');
587 wakaba 1.59 $self->{state} = BOGUS_COMMENT_STATE;
588 wakaba 1.1 ## $self->{next_input_character} is intentionally left as is
589     redo A;
590     }
591 wakaba 1.59 } elsif ($self->{state} == TAG_NAME_STATE) {
592 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
593     $self->{next_input_character} == 0x000A or # LF
594     $self->{next_input_character} == 0x000B or # VT
595     $self->{next_input_character} == 0x000C or # FF
596     $self->{next_input_character} == 0x0020) { # SP
597 wakaba 1.59 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
598 wakaba 1.1
599     if (@{$self->{char}}) {
600     $self->{next_input_character} = shift @{$self->{char}};
601     } else {
602     $self->{set_next_input_character}->($self);
603     }
604    
605     redo A;
606     } elsif ($self->{next_input_character} == 0x003E) { # >
607 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
608 wakaba 1.28 $self->{current_token}->{first_start_tag}
609     = not defined $self->{last_emitted_start_tag_name};
610 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
611 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
612 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
613 wakaba 1.1 if ($self->{current_token}->{attributes}) {
614 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
615 wakaba 1.1 }
616     } else {
617     die "$0: $self->{current_token}->{type}: Unknown token type";
618     }
619 wakaba 1.59 $self->{state} = DATA_STATE;
620 wakaba 1.1
621     if (@{$self->{char}}) {
622     $self->{next_input_character} = shift @{$self->{char}};
623     } else {
624     $self->{set_next_input_character}->($self);
625     }
626    
627    
628     return ($self->{current_token}); # start tag or end tag
629    
630     redo A;
631     } elsif (0x0041 <= $self->{next_input_character} and
632     $self->{next_input_character} <= 0x005A) { # A..Z
633     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
634     # start tag or end tag
635     ## Stay in this state
636    
637     if (@{$self->{char}}) {
638     $self->{next_input_character} = shift @{$self->{char}};
639     } else {
640     $self->{set_next_input_character}->($self);
641     }
642    
643     redo A;
644 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
645 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
646 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
647 wakaba 1.28 $self->{current_token}->{first_start_tag}
648     = not defined $self->{last_emitted_start_tag_name};
649 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
650 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
651 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
652 wakaba 1.1 if ($self->{current_token}->{attributes}) {
653 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
654 wakaba 1.1 }
655     } else {
656     die "$0: $self->{current_token}->{type}: Unknown token type";
657     }
658 wakaba 1.59 $self->{state} = DATA_STATE;
659 wakaba 1.1 # reconsume
660    
661     return ($self->{current_token}); # start tag or end tag
662    
663     redo A;
664     } elsif ($self->{next_input_character} == 0x002F) { # /
665    
666     if (@{$self->{char}}) {
667     $self->{next_input_character} = shift @{$self->{char}};
668     } else {
669     $self->{set_next_input_character}->($self);
670     }
671    
672     if ($self->{next_input_character} == 0x003E and # >
673 wakaba 1.57 $self->{current_token}->{type} == START_TAG_TOKEN and
674 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
675     # permitted slash
676     #
677     } else {
678 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
679 wakaba 1.1 }
680 wakaba 1.59 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
681 wakaba 1.1 # next-input-character is already done
682     redo A;
683     } else {
684     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
685     # start tag or end tag
686     ## Stay in the state
687    
688     if (@{$self->{char}}) {
689     $self->{next_input_character} = shift @{$self->{char}};
690     } else {
691     $self->{set_next_input_character}->($self);
692     }
693    
694     redo A;
695     }
696 wakaba 1.59 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
697 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
698     $self->{next_input_character} == 0x000A or # LF
699     $self->{next_input_character} == 0x000B or # VT
700     $self->{next_input_character} == 0x000C or # FF
701     $self->{next_input_character} == 0x0020) { # SP
702     ## Stay in the state
703    
704     if (@{$self->{char}}) {
705     $self->{next_input_character} = shift @{$self->{char}};
706     } else {
707     $self->{set_next_input_character}->($self);
708     }
709    
710     redo A;
711     } elsif ($self->{next_input_character} == 0x003E) { # >
712 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
713 wakaba 1.28 $self->{current_token}->{first_start_tag}
714     = not defined $self->{last_emitted_start_tag_name};
715 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
716 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
717 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
718 wakaba 1.1 if ($self->{current_token}->{attributes}) {
719 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
720 wakaba 1.1 }
721     } else {
722     die "$0: $self->{current_token}->{type}: Unknown token type";
723     }
724 wakaba 1.59 $self->{state} = DATA_STATE;
725 wakaba 1.1
726     if (@{$self->{char}}) {
727     $self->{next_input_character} = shift @{$self->{char}};
728     } else {
729     $self->{set_next_input_character}->($self);
730     }
731    
732    
733     return ($self->{current_token}); # start tag or end tag
734    
735     redo A;
736     } elsif (0x0041 <= $self->{next_input_character} and
737     $self->{next_input_character} <= 0x005A) { # A..Z
738     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
739     value => ''};
740 wakaba 1.59 $self->{state} = ATTRIBUTE_NAME_STATE;
741 wakaba 1.1
742     if (@{$self->{char}}) {
743     $self->{next_input_character} = shift @{$self->{char}};
744     } else {
745     $self->{set_next_input_character}->($self);
746     }
747    
748     redo A;
749     } elsif ($self->{next_input_character} == 0x002F) { # /
750    
751     if (@{$self->{char}}) {
752     $self->{next_input_character} = shift @{$self->{char}};
753     } else {
754     $self->{set_next_input_character}->($self);
755     }
756    
757     if ($self->{next_input_character} == 0x003E and # >
758 wakaba 1.57 $self->{current_token}->{type} == START_TAG_TOKEN and
759 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
760     # permitted slash
761     #
762     } else {
763 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
764 wakaba 1.1 }
765     ## Stay in the state
766     # next-input-character is already done
767     redo A;
768 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
769 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
770 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
771 wakaba 1.28 $self->{current_token}->{first_start_tag}
772     = not defined $self->{last_emitted_start_tag_name};
773 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
774 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
775 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
776 wakaba 1.1 if ($self->{current_token}->{attributes}) {
777 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
778 wakaba 1.1 }
779     } else {
780     die "$0: $self->{current_token}->{type}: Unknown token type";
781     }
782 wakaba 1.59 $self->{state} = DATA_STATE;
783 wakaba 1.1 # reconsume
784    
785     return ($self->{current_token}); # start tag or end tag
786    
787     redo A;
788     } else {
789     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
790     value => ''};
791 wakaba 1.59 $self->{state} = ATTRIBUTE_NAME_STATE;
792 wakaba 1.1
793     if (@{$self->{char}}) {
794     $self->{next_input_character} = shift @{$self->{char}};
795     } else {
796     $self->{set_next_input_character}->($self);
797     }
798    
799     redo A;
800     }
801 wakaba 1.59 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
802 wakaba 1.1 my $before_leave = sub {
803     if (exists $self->{current_token}->{attributes} # start tag or end tag
804     ->{$self->{current_attribute}->{name}}) { # MUST
805 wakaba 1.40 $self->{parse_error}-> (type => 'duplicate attribute:'.$self->{current_attribute}->{name});
806 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
807     } else {
808     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
809     = $self->{current_attribute};
810     }
811     }; # $before_leave
812    
813     if ($self->{next_input_character} == 0x0009 or # HT
814     $self->{next_input_character} == 0x000A or # LF
815     $self->{next_input_character} == 0x000B or # VT
816     $self->{next_input_character} == 0x000C or # FF
817     $self->{next_input_character} == 0x0020) { # SP
818     $before_leave->();
819 wakaba 1.59 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
820 wakaba 1.1
821     if (@{$self->{char}}) {
822     $self->{next_input_character} = shift @{$self->{char}};
823     } else {
824     $self->{set_next_input_character}->($self);
825     }
826    
827     redo A;
828     } elsif ($self->{next_input_character} == 0x003D) { # =
829     $before_leave->();
830 wakaba 1.59 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
831 wakaba 1.1
832     if (@{$self->{char}}) {
833     $self->{next_input_character} = shift @{$self->{char}};
834     } else {
835     $self->{set_next_input_character}->($self);
836     }
837    
838     redo A;
839     } elsif ($self->{next_input_character} == 0x003E) { # >
840     $before_leave->();
841 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
842 wakaba 1.28 $self->{current_token}->{first_start_tag}
843     = not defined $self->{last_emitted_start_tag_name};
844 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
845 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
846 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
847 wakaba 1.1 if ($self->{current_token}->{attributes}) {
848 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
849 wakaba 1.1 }
850     } else {
851     die "$0: $self->{current_token}->{type}: Unknown token type";
852     }
853 wakaba 1.59 $self->{state} = DATA_STATE;
854 wakaba 1.1
855     if (@{$self->{char}}) {
856     $self->{next_input_character} = shift @{$self->{char}};
857     } else {
858     $self->{set_next_input_character}->($self);
859     }
860    
861    
862     return ($self->{current_token}); # start tag or end tag
863    
864     redo A;
865     } elsif (0x0041 <= $self->{next_input_character} and
866     $self->{next_input_character} <= 0x005A) { # A..Z
867     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
868     ## Stay in the state
869    
870     if (@{$self->{char}}) {
871     $self->{next_input_character} = shift @{$self->{char}};
872     } else {
873     $self->{set_next_input_character}->($self);
874     }
875    
876     redo A;
877     } elsif ($self->{next_input_character} == 0x002F) { # /
878     $before_leave->();
879    
880     if (@{$self->{char}}) {
881     $self->{next_input_character} = shift @{$self->{char}};
882     } else {
883     $self->{set_next_input_character}->($self);
884     }
885    
886     if ($self->{next_input_character} == 0x003E and # >
887 wakaba 1.57 $self->{current_token}->{type} == START_TAG_TOKEN and
888 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
889     # permitted slash
890     #
891     } else {
892 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
893 wakaba 1.1 }
894 wakaba 1.59 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
895 wakaba 1.1 # next-input-character is already done
896     redo A;
897 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
898 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
899 wakaba 1.1 $before_leave->();
900 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
901 wakaba 1.28 $self->{current_token}->{first_start_tag}
902     = not defined $self->{last_emitted_start_tag_name};
903 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
904 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
905 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
906 wakaba 1.1 if ($self->{current_token}->{attributes}) {
907 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
908 wakaba 1.1 }
909     } else {
910     die "$0: $self->{current_token}->{type}: Unknown token type";
911     }
912 wakaba 1.59 $self->{state} = DATA_STATE;
913 wakaba 1.1 # reconsume
914    
915     return ($self->{current_token}); # start tag or end tag
916    
917     redo A;
918     } else {
919     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
920     ## Stay in the state
921    
922     if (@{$self->{char}}) {
923     $self->{next_input_character} = shift @{$self->{char}};
924     } else {
925     $self->{set_next_input_character}->($self);
926     }
927    
928     redo A;
929     }
930 wakaba 1.59 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
931 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
932     $self->{next_input_character} == 0x000A or # LF
933     $self->{next_input_character} == 0x000B or # VT
934     $self->{next_input_character} == 0x000C or # FF
935     $self->{next_input_character} == 0x0020) { # SP
936     ## Stay in the state
937    
938     if (@{$self->{char}}) {
939     $self->{next_input_character} = shift @{$self->{char}};
940     } else {
941     $self->{set_next_input_character}->($self);
942     }
943    
944     redo A;
945     } elsif ($self->{next_input_character} == 0x003D) { # =
946 wakaba 1.59 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
947 wakaba 1.1
948     if (@{$self->{char}}) {
949     $self->{next_input_character} = shift @{$self->{char}};
950     } else {
951     $self->{set_next_input_character}->($self);
952     }
953    
954     redo A;
955     } elsif ($self->{next_input_character} == 0x003E) { # >
956 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
957 wakaba 1.28 $self->{current_token}->{first_start_tag}
958     = not defined $self->{last_emitted_start_tag_name};
959 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
960 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
961 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
962 wakaba 1.1 if ($self->{current_token}->{attributes}) {
963 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
964 wakaba 1.1 }
965     } else {
966     die "$0: $self->{current_token}->{type}: Unknown token type";
967     }
968 wakaba 1.59 $self->{state} = DATA_STATE;
969 wakaba 1.1
970     if (@{$self->{char}}) {
971     $self->{next_input_character} = shift @{$self->{char}};
972     } else {
973     $self->{set_next_input_character}->($self);
974     }
975    
976    
977     return ($self->{current_token}); # start tag or end tag
978    
979     redo A;
980     } elsif (0x0041 <= $self->{next_input_character} and
981     $self->{next_input_character} <= 0x005A) { # A..Z
982     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
983     value => ''};
984 wakaba 1.59 $self->{state} = ATTRIBUTE_NAME_STATE;
985 wakaba 1.1
986     if (@{$self->{char}}) {
987     $self->{next_input_character} = shift @{$self->{char}};
988     } else {
989     $self->{set_next_input_character}->($self);
990     }
991    
992     redo A;
993     } elsif ($self->{next_input_character} == 0x002F) { # /
994    
995     if (@{$self->{char}}) {
996     $self->{next_input_character} = shift @{$self->{char}};
997     } else {
998     $self->{set_next_input_character}->($self);
999     }
1000    
1001     if ($self->{next_input_character} == 0x003E and # >
1002 wakaba 1.57 $self->{current_token}->{type} == START_TAG_TOKEN and
1003 wakaba 1.1 $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
1004     # permitted slash
1005     #
1006     } else {
1007 wakaba 1.3 $self->{parse_error}-> (type => 'nestc');
1008 wakaba 1.33 ## TODO: Different error type for <aa / bb> than <aa/>
1009 wakaba 1.1 }
1010 wakaba 1.59 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1011 wakaba 1.1 # next-input-character is already done
1012     redo A;
1013 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
1014 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
1015 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1016 wakaba 1.28 $self->{current_token}->{first_start_tag}
1017     = not defined $self->{last_emitted_start_tag_name};
1018 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1019 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1020 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1021 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1022 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1023 wakaba 1.1 }
1024     } else {
1025     die "$0: $self->{current_token}->{type}: Unknown token type";
1026     }
1027 wakaba 1.59 $self->{state} = DATA_STATE;
1028 wakaba 1.1 # reconsume
1029    
1030     return ($self->{current_token}); # start tag or end tag
1031    
1032     redo A;
1033     } else {
1034     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
1035     value => ''};
1036 wakaba 1.59 $self->{state} = ATTRIBUTE_NAME_STATE;
1037 wakaba 1.1
1038     if (@{$self->{char}}) {
1039     $self->{next_input_character} = shift @{$self->{char}};
1040     } else {
1041     $self->{set_next_input_character}->($self);
1042     }
1043    
1044     redo A;
1045     }
1046 wakaba 1.59 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1047 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1048     $self->{next_input_character} == 0x000A or # LF
1049     $self->{next_input_character} == 0x000B or # VT
1050     $self->{next_input_character} == 0x000C or # FF
1051     $self->{next_input_character} == 0x0020) { # SP
1052     ## Stay in the state
1053    
1054     if (@{$self->{char}}) {
1055     $self->{next_input_character} = shift @{$self->{char}};
1056     } else {
1057     $self->{set_next_input_character}->($self);
1058     }
1059    
1060     redo A;
1061     } elsif ($self->{next_input_character} == 0x0022) { # "
1062 wakaba 1.59 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1063 wakaba 1.1
1064     if (@{$self->{char}}) {
1065     $self->{next_input_character} = shift @{$self->{char}};
1066     } else {
1067     $self->{set_next_input_character}->($self);
1068     }
1069    
1070     redo A;
1071     } elsif ($self->{next_input_character} == 0x0026) { # &
1072 wakaba 1.59 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1073 wakaba 1.1 ## reconsume
1074     redo A;
1075     } elsif ($self->{next_input_character} == 0x0027) { # '
1076 wakaba 1.59 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1077 wakaba 1.1
1078     if (@{$self->{char}}) {
1079     $self->{next_input_character} = shift @{$self->{char}};
1080     } else {
1081     $self->{set_next_input_character}->($self);
1082     }
1083    
1084     redo A;
1085     } elsif ($self->{next_input_character} == 0x003E) { # >
1086 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1087 wakaba 1.28 $self->{current_token}->{first_start_tag}
1088     = not defined $self->{last_emitted_start_tag_name};
1089 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1090 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1091 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1092 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1093 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1094 wakaba 1.1 }
1095     } else {
1096     die "$0: $self->{current_token}->{type}: Unknown token type";
1097     }
1098 wakaba 1.59 $self->{state} = DATA_STATE;
1099 wakaba 1.1
1100     if (@{$self->{char}}) {
1101     $self->{next_input_character} = shift @{$self->{char}};
1102     } else {
1103     $self->{set_next_input_character}->($self);
1104     }
1105    
1106    
1107     return ($self->{current_token}); # start tag or end tag
1108    
1109     redo A;
1110 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
1111 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
1112 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1113 wakaba 1.28 $self->{current_token}->{first_start_tag}
1114     = not defined $self->{last_emitted_start_tag_name};
1115 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1116 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1117 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1118 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1119 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1120 wakaba 1.1 }
1121     } else {
1122     die "$0: $self->{current_token}->{type}: Unknown token type";
1123     }
1124 wakaba 1.59 $self->{state} = DATA_STATE;
1125 wakaba 1.1 ## reconsume
1126    
1127     return ($self->{current_token}); # start tag or end tag
1128    
1129     redo A;
1130     } else {
1131     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1132 wakaba 1.59 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1133 wakaba 1.1
1134     if (@{$self->{char}}) {
1135     $self->{next_input_character} = shift @{$self->{char}};
1136     } else {
1137     $self->{set_next_input_character}->($self);
1138     }
1139    
1140     redo A;
1141     }
1142 wakaba 1.59 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1143 wakaba 1.1 if ($self->{next_input_character} == 0x0022) { # "
1144 wakaba 1.59 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1145 wakaba 1.1
1146     if (@{$self->{char}}) {
1147     $self->{next_input_character} = shift @{$self->{char}};
1148     } else {
1149     $self->{set_next_input_character}->($self);
1150     }
1151    
1152     redo A;
1153     } elsif ($self->{next_input_character} == 0x0026) { # &
1154 wakaba 1.59 $self->{last_attribute_value_state} = $self->{state};
1155     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1156 wakaba 1.1
1157     if (@{$self->{char}}) {
1158     $self->{next_input_character} = shift @{$self->{char}};
1159     } else {
1160     $self->{set_next_input_character}->($self);
1161     }
1162    
1163     redo A;
1164     } elsif ($self->{next_input_character} == -1) {
1165 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1166 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1167 wakaba 1.28 $self->{current_token}->{first_start_tag}
1168     = not defined $self->{last_emitted_start_tag_name};
1169 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1170 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1171 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1172 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1173 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1174 wakaba 1.1 }
1175     } else {
1176     die "$0: $self->{current_token}->{type}: Unknown token type";
1177     }
1178 wakaba 1.59 $self->{state} = DATA_STATE;
1179 wakaba 1.1 ## reconsume
1180    
1181     return ($self->{current_token}); # start tag or end tag
1182    
1183     redo A;
1184     } else {
1185     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1186     ## Stay in the state
1187    
1188     if (@{$self->{char}}) {
1189     $self->{next_input_character} = shift @{$self->{char}};
1190     } else {
1191     $self->{set_next_input_character}->($self);
1192     }
1193    
1194     redo A;
1195     }
1196 wakaba 1.59 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1197 wakaba 1.1 if ($self->{next_input_character} == 0x0027) { # '
1198 wakaba 1.59 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1199 wakaba 1.1
1200     if (@{$self->{char}}) {
1201     $self->{next_input_character} = shift @{$self->{char}};
1202     } else {
1203     $self->{set_next_input_character}->($self);
1204     }
1205    
1206     redo A;
1207     } elsif ($self->{next_input_character} == 0x0026) { # &
1208 wakaba 1.59 $self->{last_attribute_value_state} = $self->{state};
1209     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1210 wakaba 1.1
1211     if (@{$self->{char}}) {
1212     $self->{next_input_character} = shift @{$self->{char}};
1213     } else {
1214     $self->{set_next_input_character}->($self);
1215     }
1216    
1217     redo A;
1218     } elsif ($self->{next_input_character} == -1) {
1219 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed attribute value');
1220 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1221 wakaba 1.28 $self->{current_token}->{first_start_tag}
1222     = not defined $self->{last_emitted_start_tag_name};
1223 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1224 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1225 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1226 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1227 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1228 wakaba 1.1 }
1229     } else {
1230     die "$0: $self->{current_token}->{type}: Unknown token type";
1231     }
1232 wakaba 1.59 $self->{state} = DATA_STATE;
1233 wakaba 1.1 ## reconsume
1234    
1235     return ($self->{current_token}); # start tag or end tag
1236    
1237     redo A;
1238     } else {
1239     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1240     ## Stay in the state
1241    
1242     if (@{$self->{char}}) {
1243     $self->{next_input_character} = shift @{$self->{char}};
1244     } else {
1245     $self->{set_next_input_character}->($self);
1246     }
1247    
1248     redo A;
1249     }
1250 wakaba 1.59 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1251 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1252     $self->{next_input_character} == 0x000A or # LF
1253     $self->{next_input_character} == 0x000B or # HT
1254     $self->{next_input_character} == 0x000C or # FF
1255     $self->{next_input_character} == 0x0020) { # SP
1256 wakaba 1.59 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1257 wakaba 1.1
1258     if (@{$self->{char}}) {
1259     $self->{next_input_character} = shift @{$self->{char}};
1260     } else {
1261     $self->{set_next_input_character}->($self);
1262     }
1263    
1264     redo A;
1265     } elsif ($self->{next_input_character} == 0x0026) { # &
1266 wakaba 1.59 $self->{last_attribute_value_state} = $self->{state};
1267     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1268 wakaba 1.1
1269     if (@{$self->{char}}) {
1270     $self->{next_input_character} = shift @{$self->{char}};
1271     } else {
1272     $self->{set_next_input_character}->($self);
1273     }
1274    
1275     redo A;
1276     } elsif ($self->{next_input_character} == 0x003E) { # >
1277 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1278 wakaba 1.28 $self->{current_token}->{first_start_tag}
1279     = not defined $self->{last_emitted_start_tag_name};
1280 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1281 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1282 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1283 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1284 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1285 wakaba 1.1 }
1286     } else {
1287     die "$0: $self->{current_token}->{type}: Unknown token type";
1288     }
1289 wakaba 1.59 $self->{state} = DATA_STATE;
1290 wakaba 1.1
1291     if (@{$self->{char}}) {
1292     $self->{next_input_character} = shift @{$self->{char}};
1293     } else {
1294     $self->{set_next_input_character}->($self);
1295     }
1296    
1297    
1298     return ($self->{current_token}); # start tag or end tag
1299    
1300     redo A;
1301 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
1302 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed tag');
1303 wakaba 1.57 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1304 wakaba 1.28 $self->{current_token}->{first_start_tag}
1305     = not defined $self->{last_emitted_start_tag_name};
1306 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1307 wakaba 1.57 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1308 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1309 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1310 wakaba 1.3 $self->{parse_error}-> (type => 'end tag attribute');
1311 wakaba 1.1 }
1312     } else {
1313     die "$0: $self->{current_token}->{type}: Unknown token type";
1314     }
1315 wakaba 1.59 $self->{state} = DATA_STATE;
1316 wakaba 1.1 ## reconsume
1317    
1318     return ($self->{current_token}); # start tag or end tag
1319    
1320     redo A;
1321     } else {
1322     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1323     ## Stay in the state
1324    
1325     if (@{$self->{char}}) {
1326     $self->{next_input_character} = shift @{$self->{char}};
1327     } else {
1328     $self->{set_next_input_character}->($self);
1329     }
1330    
1331     redo A;
1332     }
1333 wakaba 1.59 } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1334 wakaba 1.26 my $token = $self->_tokenize_attempt_to_consume_an_entity (1);
1335 wakaba 1.1
1336     unless (defined $token) {
1337     $self->{current_attribute}->{value} .= '&';
1338     } else {
1339     $self->{current_attribute}->{value} .= $token->{data};
1340     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1341     }
1342    
1343     $self->{state} = $self->{last_attribute_value_state};
1344     # next-input-character is already done
1345     redo A;
1346 wakaba 1.59 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1347 wakaba 1.1 ## (only happen if PCDATA state)
1348    
1349 wakaba 1.57 my $token = {type => COMMENT_TOKEN, data => ''};
1350 wakaba 1.1
1351     BC: {
1352     if ($self->{next_input_character} == 0x003E) { # >
1353 wakaba 1.59 $self->{state} = DATA_STATE;
1354 wakaba 1.1
1355     if (@{$self->{char}}) {
1356     $self->{next_input_character} = shift @{$self->{char}};
1357     } else {
1358     $self->{set_next_input_character}->($self);
1359     }
1360    
1361    
1362     return ($token);
1363    
1364     redo A;
1365     } elsif ($self->{next_input_character} == -1) {
1366 wakaba 1.59 $self->{state} = DATA_STATE;
1367 wakaba 1.1 ## reconsume
1368    
1369     return ($token);
1370    
1371     redo A;
1372     } else {
1373     $token->{data} .= chr ($self->{next_input_character});
1374    
1375     if (@{$self->{char}}) {
1376     $self->{next_input_character} = shift @{$self->{char}};
1377     } else {
1378     $self->{set_next_input_character}->($self);
1379     }
1380    
1381     redo BC;
1382     }
1383     } # BC
1384 wakaba 1.59 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1385 wakaba 1.1 ## (only happen if PCDATA state)
1386    
1387     my @next_char;
1388     push @next_char, $self->{next_input_character};
1389    
1390     if ($self->{next_input_character} == 0x002D) { # -
1391    
1392     if (@{$self->{char}}) {
1393     $self->{next_input_character} = shift @{$self->{char}};
1394     } else {
1395     $self->{set_next_input_character}->($self);
1396     }
1397    
1398     push @next_char, $self->{next_input_character};
1399     if ($self->{next_input_character} == 0x002D) { # -
1400 wakaba 1.57 $self->{current_token} = {type => COMMENT_TOKEN, data => ''};
1401 wakaba 1.59 $self->{state} = COMMENT_START_STATE;
1402 wakaba 1.1
1403     if (@{$self->{char}}) {
1404     $self->{next_input_character} = shift @{$self->{char}};
1405     } else {
1406     $self->{set_next_input_character}->($self);
1407     }
1408    
1409     redo A;
1410     }
1411     } elsif ($self->{next_input_character} == 0x0044 or # D
1412     $self->{next_input_character} == 0x0064) { # d
1413    
1414     if (@{$self->{char}}) {
1415     $self->{next_input_character} = shift @{$self->{char}};
1416     } else {
1417     $self->{set_next_input_character}->($self);
1418     }
1419    
1420     push @next_char, $self->{next_input_character};
1421     if ($self->{next_input_character} == 0x004F or # O
1422     $self->{next_input_character} == 0x006F) { # o
1423    
1424     if (@{$self->{char}}) {
1425     $self->{next_input_character} = shift @{$self->{char}};
1426     } else {
1427     $self->{set_next_input_character}->($self);
1428     }
1429    
1430     push @next_char, $self->{next_input_character};
1431     if ($self->{next_input_character} == 0x0043 or # C
1432     $self->{next_input_character} == 0x0063) { # c
1433    
1434     if (@{$self->{char}}) {
1435     $self->{next_input_character} = shift @{$self->{char}};
1436     } else {
1437     $self->{set_next_input_character}->($self);
1438     }
1439    
1440     push @next_char, $self->{next_input_character};
1441     if ($self->{next_input_character} == 0x0054 or # T
1442     $self->{next_input_character} == 0x0074) { # t
1443    
1444     if (@{$self->{char}}) {
1445     $self->{next_input_character} = shift @{$self->{char}};
1446     } else {
1447     $self->{set_next_input_character}->($self);
1448     }
1449    
1450     push @next_char, $self->{next_input_character};
1451     if ($self->{next_input_character} == 0x0059 or # Y
1452     $self->{next_input_character} == 0x0079) { # y
1453    
1454     if (@{$self->{char}}) {
1455     $self->{next_input_character} = shift @{$self->{char}};
1456     } else {
1457     $self->{set_next_input_character}->($self);
1458     }
1459    
1460     push @next_char, $self->{next_input_character};
1461     if ($self->{next_input_character} == 0x0050 or # P
1462     $self->{next_input_character} == 0x0070) { # p
1463    
1464     if (@{$self->{char}}) {
1465     $self->{next_input_character} = shift @{$self->{char}};
1466     } else {
1467     $self->{set_next_input_character}->($self);
1468     }
1469    
1470     push @next_char, $self->{next_input_character};
1471     if ($self->{next_input_character} == 0x0045 or # E
1472     $self->{next_input_character} == 0x0065) { # e
1473     ## ISSUE: What a stupid code this is!
1474 wakaba 1.59 $self->{state} = DOCTYPE_STATE;
1475 wakaba 1.1
1476     if (@{$self->{char}}) {
1477     $self->{next_input_character} = shift @{$self->{char}};
1478     } else {
1479     $self->{set_next_input_character}->($self);
1480     }
1481    
1482     redo A;
1483     }
1484     }
1485     }
1486     }
1487     }
1488     }
1489     }
1490    
1491 wakaba 1.30 $self->{parse_error}-> (type => 'bogus comment');
1492 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1493     unshift @{$self->{char}}, (@next_char);
1494 wakaba 1.59 $self->{state} = BOGUS_COMMENT_STATE;
1495 wakaba 1.1 redo A;
1496    
1497     ## ISSUE: typos in spec: chacacters, is is a parse error
1498     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1499 wakaba 1.59 } elsif ($self->{state} == COMMENT_START_STATE) {
1500 wakaba 1.23 if ($self->{next_input_character} == 0x002D) { # -
1501 wakaba 1.59 $self->{state} = COMMENT_START_DASH_STATE;
1502 wakaba 1.23
1503     if (@{$self->{char}}) {
1504     $self->{next_input_character} = shift @{$self->{char}};
1505     } else {
1506     $self->{set_next_input_character}->($self);
1507     }
1508    
1509     redo A;
1510     } elsif ($self->{next_input_character} == 0x003E) { # >
1511     $self->{parse_error}-> (type => 'bogus comment');
1512 wakaba 1.59 $self->{state} = DATA_STATE;
1513 wakaba 1.23
1514     if (@{$self->{char}}) {
1515     $self->{next_input_character} = shift @{$self->{char}};
1516     } else {
1517     $self->{set_next_input_character}->($self);
1518     }
1519    
1520    
1521     return ($self->{current_token}); # comment
1522    
1523     redo A;
1524     } elsif ($self->{next_input_character} == -1) {
1525     $self->{parse_error}-> (type => 'unclosed comment');
1526 wakaba 1.59 $self->{state} = DATA_STATE;
1527 wakaba 1.23 ## reconsume
1528    
1529     return ($self->{current_token}); # comment
1530    
1531     redo A;
1532     } else {
1533     $self->{current_token}->{data} # comment
1534     .= chr ($self->{next_input_character});
1535 wakaba 1.59 $self->{state} = COMMENT_STATE;
1536 wakaba 1.23
1537     if (@{$self->{char}}) {
1538     $self->{next_input_character} = shift @{$self->{char}};
1539     } else {
1540     $self->{set_next_input_character}->($self);
1541     }
1542    
1543     redo A;
1544     }
1545 wakaba 1.59 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
1546 wakaba 1.23 if ($self->{next_input_character} == 0x002D) { # -
1547 wakaba 1.59 $self->{state} = COMMENT_END_STATE;
1548 wakaba 1.23
1549     if (@{$self->{char}}) {
1550     $self->{next_input_character} = shift @{$self->{char}};
1551     } else {
1552     $self->{set_next_input_character}->($self);
1553     }
1554    
1555     redo A;
1556     } elsif ($self->{next_input_character} == 0x003E) { # >
1557     $self->{parse_error}-> (type => 'bogus comment');
1558 wakaba 1.59 $self->{state} = DATA_STATE;
1559 wakaba 1.23
1560     if (@{$self->{char}}) {
1561     $self->{next_input_character} = shift @{$self->{char}};
1562     } else {
1563     $self->{set_next_input_character}->($self);
1564     }
1565    
1566    
1567     return ($self->{current_token}); # comment
1568    
1569     redo A;
1570     } elsif ($self->{next_input_character} == -1) {
1571     $self->{parse_error}-> (type => 'unclosed comment');
1572 wakaba 1.59 $self->{state} = DATA_STATE;
1573 wakaba 1.23 ## reconsume
1574    
1575     return ($self->{current_token}); # comment
1576    
1577     redo A;
1578     } else {
1579     $self->{current_token}->{data} # comment
1580 wakaba 1.33 .= '-' . chr ($self->{next_input_character});
1581 wakaba 1.59 $self->{state} = COMMENT_STATE;
1582 wakaba 1.23
1583     if (@{$self->{char}}) {
1584     $self->{next_input_character} = shift @{$self->{char}};
1585     } else {
1586     $self->{set_next_input_character}->($self);
1587     }
1588    
1589     redo A;
1590     }
1591 wakaba 1.59 } elsif ($self->{state} == COMMENT_STATE) {
1592 wakaba 1.1 if ($self->{next_input_character} == 0x002D) { # -
1593 wakaba 1.59 $self->{state} = COMMENT_END_DASH_STATE;
1594 wakaba 1.1
1595     if (@{$self->{char}}) {
1596     $self->{next_input_character} = shift @{$self->{char}};
1597     } else {
1598     $self->{set_next_input_character}->($self);
1599     }
1600    
1601     redo A;
1602     } elsif ($self->{next_input_character} == -1) {
1603 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1604 wakaba 1.59 $self->{state} = DATA_STATE;
1605 wakaba 1.1 ## reconsume
1606    
1607     return ($self->{current_token}); # comment
1608    
1609     redo A;
1610     } else {
1611     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1612     ## Stay in the state
1613    
1614     if (@{$self->{char}}) {
1615     $self->{next_input_character} = shift @{$self->{char}};
1616     } else {
1617     $self->{set_next_input_character}->($self);
1618     }
1619    
1620     redo A;
1621     }
1622 wakaba 1.59 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
1623 wakaba 1.1 if ($self->{next_input_character} == 0x002D) { # -
1624 wakaba 1.59 $self->{state} = COMMENT_END_STATE;
1625 wakaba 1.1
1626     if (@{$self->{char}}) {
1627     $self->{next_input_character} = shift @{$self->{char}};
1628     } else {
1629     $self->{set_next_input_character}->($self);
1630     }
1631    
1632     redo A;
1633     } elsif ($self->{next_input_character} == -1) {
1634 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1635 wakaba 1.59 $self->{state} = DATA_STATE;
1636 wakaba 1.1 ## reconsume
1637    
1638     return ($self->{current_token}); # comment
1639    
1640     redo A;
1641     } else {
1642     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1643 wakaba 1.59 $self->{state} = COMMENT_STATE;
1644 wakaba 1.1
1645     if (@{$self->{char}}) {
1646     $self->{next_input_character} = shift @{$self->{char}};
1647     } else {
1648     $self->{set_next_input_character}->($self);
1649     }
1650    
1651     redo A;
1652     }
1653 wakaba 1.59 } elsif ($self->{state} == COMMENT_END_STATE) {
1654 wakaba 1.1 if ($self->{next_input_character} == 0x003E) { # >
1655 wakaba 1.59 $self->{state} = DATA_STATE;
1656 wakaba 1.1
1657     if (@{$self->{char}}) {
1658     $self->{next_input_character} = shift @{$self->{char}};
1659     } else {
1660     $self->{set_next_input_character}->($self);
1661     }
1662    
1663    
1664     return ($self->{current_token}); # comment
1665    
1666     redo A;
1667     } elsif ($self->{next_input_character} == 0x002D) { # -
1668 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1669 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1670     ## Stay in the state
1671    
1672     if (@{$self->{char}}) {
1673     $self->{next_input_character} = shift @{$self->{char}};
1674     } else {
1675     $self->{set_next_input_character}->($self);
1676     }
1677    
1678     redo A;
1679     } elsif ($self->{next_input_character} == -1) {
1680 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed comment');
1681 wakaba 1.59 $self->{state} = DATA_STATE;
1682 wakaba 1.1 ## reconsume
1683    
1684     return ($self->{current_token}); # comment
1685    
1686     redo A;
1687     } else {
1688 wakaba 1.3 $self->{parse_error}-> (type => 'dash in comment');
1689 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1690 wakaba 1.59 $self->{state} = COMMENT_STATE;
1691 wakaba 1.1
1692     if (@{$self->{char}}) {
1693     $self->{next_input_character} = shift @{$self->{char}};
1694     } else {
1695     $self->{set_next_input_character}->($self);
1696     }
1697    
1698     redo A;
1699     }
1700 wakaba 1.59 } elsif ($self->{state} == DOCTYPE_STATE) {
1701 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1702     $self->{next_input_character} == 0x000A or # LF
1703     $self->{next_input_character} == 0x000B or # VT
1704     $self->{next_input_character} == 0x000C or # FF
1705     $self->{next_input_character} == 0x0020) { # SP
1706 wakaba 1.59 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1707 wakaba 1.1
1708     if (@{$self->{char}}) {
1709     $self->{next_input_character} = shift @{$self->{char}};
1710     } else {
1711     $self->{set_next_input_character}->($self);
1712     }
1713    
1714     redo A;
1715     } else {
1716 wakaba 1.3 $self->{parse_error}-> (type => 'no space before DOCTYPE name');
1717 wakaba 1.59 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1718 wakaba 1.1 ## reconsume
1719     redo A;
1720     }
1721 wakaba 1.59 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
1722 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1723     $self->{next_input_character} == 0x000A or # LF
1724     $self->{next_input_character} == 0x000B or # VT
1725     $self->{next_input_character} == 0x000C or # FF
1726     $self->{next_input_character} == 0x0020) { # SP
1727     ## Stay in the state
1728    
1729     if (@{$self->{char}}) {
1730     $self->{next_input_character} = shift @{$self->{char}};
1731     } else {
1732     $self->{set_next_input_character}->($self);
1733     }
1734    
1735     redo A;
1736 wakaba 1.18 } elsif ($self->{next_input_character} == 0x003E) { # >
1737     $self->{parse_error}-> (type => 'no DOCTYPE name');
1738 wakaba 1.59 $self->{state} = DATA_STATE;
1739 wakaba 1.18
1740     if (@{$self->{char}}) {
1741     $self->{next_input_character} = shift @{$self->{char}};
1742     } else {
1743     $self->{set_next_input_character}->($self);
1744     }
1745    
1746    
1747 wakaba 1.57 return ({type => DOCTYPE_TOKEN}); # incorrect
1748 wakaba 1.18
1749     redo A;
1750     } elsif ($self->{next_input_character} == -1) {
1751     $self->{parse_error}-> (type => 'no DOCTYPE name');
1752 wakaba 1.59 $self->{state} = DATA_STATE;
1753 wakaba 1.18 ## reconsume
1754    
1755 wakaba 1.57 return ({type => DOCTYPE_TOKEN}); # incorrect
1756 wakaba 1.18
1757     redo A;
1758     } else {
1759     $self->{current_token}
1760 wakaba 1.57 = {type => DOCTYPE_TOKEN,
1761 wakaba 1.18 name => chr ($self->{next_input_character}),
1762     correct => 1};
1763 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1764 wakaba 1.59 $self->{state} = DOCTYPE_NAME_STATE;
1765 wakaba 1.1
1766     if (@{$self->{char}}) {
1767     $self->{next_input_character} = shift @{$self->{char}};
1768     } else {
1769     $self->{set_next_input_character}->($self);
1770     }
1771    
1772     redo A;
1773 wakaba 1.18 }
1774 wakaba 1.59 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
1775 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
1776     if ($self->{next_input_character} == 0x0009 or # HT
1777     $self->{next_input_character} == 0x000A or # LF
1778     $self->{next_input_character} == 0x000B or # VT
1779     $self->{next_input_character} == 0x000C or # FF
1780     $self->{next_input_character} == 0x0020) { # SP
1781 wakaba 1.59 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
1782 wakaba 1.18
1783     if (@{$self->{char}}) {
1784     $self->{next_input_character} = shift @{$self->{char}};
1785     } else {
1786     $self->{set_next_input_character}->($self);
1787     }
1788    
1789     redo A;
1790 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
1791 wakaba 1.59 $self->{state} = DATA_STATE;
1792 wakaba 1.1
1793     if (@{$self->{char}}) {
1794     $self->{next_input_character} = shift @{$self->{char}};
1795     } else {
1796     $self->{set_next_input_character}->($self);
1797     }
1798    
1799    
1800 wakaba 1.18 return ($self->{current_token}); # DOCTYPE
1801 wakaba 1.1
1802     redo A;
1803 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1804     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1805 wakaba 1.59 $self->{state} = DATA_STATE;
1806 wakaba 1.1 ## reconsume
1807    
1808 wakaba 1.18 delete $self->{current_token}->{correct};
1809     return ($self->{current_token}); # DOCTYPE
1810 wakaba 1.1
1811     redo A;
1812     } else {
1813 wakaba 1.18 $self->{current_token}->{name}
1814     .= chr ($self->{next_input_character}); # DOCTYPE
1815     ## Stay in the state
1816 wakaba 1.1
1817     if (@{$self->{char}}) {
1818     $self->{next_input_character} = shift @{$self->{char}};
1819     } else {
1820     $self->{set_next_input_character}->($self);
1821     }
1822    
1823     redo A;
1824     }
1825 wakaba 1.59 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
1826 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1827     $self->{next_input_character} == 0x000A or # LF
1828     $self->{next_input_character} == 0x000B or # VT
1829     $self->{next_input_character} == 0x000C or # FF
1830     $self->{next_input_character} == 0x0020) { # SP
1831 wakaba 1.18 ## Stay in the state
1832 wakaba 1.1
1833     if (@{$self->{char}}) {
1834     $self->{next_input_character} = shift @{$self->{char}};
1835     } else {
1836     $self->{set_next_input_character}->($self);
1837     }
1838    
1839     redo A;
1840     } elsif ($self->{next_input_character} == 0x003E) { # >
1841 wakaba 1.59 $self->{state} = DATA_STATE;
1842 wakaba 1.1
1843     if (@{$self->{char}}) {
1844     $self->{next_input_character} = shift @{$self->{char}};
1845     } else {
1846     $self->{set_next_input_character}->($self);
1847     }
1848    
1849    
1850     return ($self->{current_token}); # DOCTYPE
1851    
1852     redo A;
1853 wakaba 1.18 } elsif ($self->{next_input_character} == -1) {
1854     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
1855 wakaba 1.59 $self->{state} = DATA_STATE;
1856 wakaba 1.18 ## reconsume
1857    
1858     delete $self->{current_token}->{correct};
1859     return ($self->{current_token}); # DOCTYPE
1860    
1861     redo A;
1862     } elsif ($self->{next_input_character} == 0x0050 or # P
1863     $self->{next_input_character} == 0x0070) { # p
1864    
1865     if (@{$self->{char}}) {
1866     $self->{next_input_character} = shift @{$self->{char}};
1867     } else {
1868     $self->{set_next_input_character}->($self);
1869     }
1870    
1871     if ($self->{next_input_character} == 0x0055 or # U
1872     $self->{next_input_character} == 0x0075) { # u
1873    
1874     if (@{$self->{char}}) {
1875     $self->{next_input_character} = shift @{$self->{char}};
1876     } else {
1877     $self->{set_next_input_character}->($self);
1878     }
1879    
1880     if ($self->{next_input_character} == 0x0042 or # B
1881     $self->{next_input_character} == 0x0062) { # b
1882    
1883     if (@{$self->{char}}) {
1884     $self->{next_input_character} = shift @{$self->{char}};
1885     } else {
1886     $self->{set_next_input_character}->($self);
1887     }
1888    
1889     if ($self->{next_input_character} == 0x004C or # L
1890     $self->{next_input_character} == 0x006C) { # l
1891    
1892     if (@{$self->{char}}) {
1893     $self->{next_input_character} = shift @{$self->{char}};
1894     } else {
1895     $self->{set_next_input_character}->($self);
1896     }
1897    
1898     if ($self->{next_input_character} == 0x0049 or # I
1899     $self->{next_input_character} == 0x0069) { # i
1900    
1901     if (@{$self->{char}}) {
1902     $self->{next_input_character} = shift @{$self->{char}};
1903     } else {
1904     $self->{set_next_input_character}->($self);
1905     }
1906    
1907     if ($self->{next_input_character} == 0x0043 or # C
1908     $self->{next_input_character} == 0x0063) { # c
1909 wakaba 1.59 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1910 wakaba 1.18
1911     if (@{$self->{char}}) {
1912     $self->{next_input_character} = shift @{$self->{char}};
1913     } else {
1914     $self->{set_next_input_character}->($self);
1915     }
1916    
1917     redo A;
1918     }
1919     }
1920     }
1921     }
1922     }
1923    
1924     #
1925     } elsif ($self->{next_input_character} == 0x0053 or # S
1926     $self->{next_input_character} == 0x0073) { # s
1927    
1928     if (@{$self->{char}}) {
1929     $self->{next_input_character} = shift @{$self->{char}};
1930     } else {
1931     $self->{set_next_input_character}->($self);
1932     }
1933    
1934     if ($self->{next_input_character} == 0x0059 or # Y
1935     $self->{next_input_character} == 0x0079) { # y
1936    
1937     if (@{$self->{char}}) {
1938     $self->{next_input_character} = shift @{$self->{char}};
1939     } else {
1940     $self->{set_next_input_character}->($self);
1941     }
1942    
1943     if ($self->{next_input_character} == 0x0053 or # S
1944     $self->{next_input_character} == 0x0073) { # s
1945    
1946     if (@{$self->{char}}) {
1947     $self->{next_input_character} = shift @{$self->{char}};
1948     } else {
1949     $self->{set_next_input_character}->($self);
1950     }
1951    
1952     if ($self->{next_input_character} == 0x0054 or # T
1953     $self->{next_input_character} == 0x0074) { # t
1954    
1955     if (@{$self->{char}}) {
1956     $self->{next_input_character} = shift @{$self->{char}};
1957     } else {
1958     $self->{set_next_input_character}->($self);
1959     }
1960    
1961     if ($self->{next_input_character} == 0x0045 or # E
1962     $self->{next_input_character} == 0x0065) { # e
1963    
1964     if (@{$self->{char}}) {
1965     $self->{next_input_character} = shift @{$self->{char}};
1966     } else {
1967     $self->{set_next_input_character}->($self);
1968     }
1969    
1970     if ($self->{next_input_character} == 0x004D or # M
1971     $self->{next_input_character} == 0x006D) { # m
1972 wakaba 1.59 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
1973 wakaba 1.18
1974     if (@{$self->{char}}) {
1975     $self->{next_input_character} = shift @{$self->{char}};
1976     } else {
1977     $self->{set_next_input_character}->($self);
1978     }
1979    
1980     redo A;
1981     }
1982     }
1983     }
1984     }
1985     }
1986    
1987     #
1988     } else {
1989    
1990     if (@{$self->{char}}) {
1991     $self->{next_input_character} = shift @{$self->{char}};
1992     } else {
1993     $self->{set_next_input_character}->($self);
1994     }
1995    
1996     #
1997     }
1998    
1999     $self->{parse_error}-> (type => 'string after DOCTYPE name');
2000 wakaba 1.59 $self->{state} = BOGUS_DOCTYPE_STATE;
2001 wakaba 1.18 # next-input-character is already done
2002     redo A;
2003 wakaba 1.59 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2004 wakaba 1.18 if ({
2005     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2006     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2007     }->{$self->{next_input_character}}) {
2008 wakaba 1.1 ## Stay in the state
2009    
2010     if (@{$self->{char}}) {
2011     $self->{next_input_character} = shift @{$self->{char}};
2012     } else {
2013     $self->{set_next_input_character}->($self);
2014     }
2015    
2016     redo A;
2017 wakaba 1.18 } elsif ($self->{next_input_character} eq 0x0022) { # "
2018     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2019 wakaba 1.59 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2020 wakaba 1.18
2021     if (@{$self->{char}}) {
2022     $self->{next_input_character} = shift @{$self->{char}};
2023     } else {
2024     $self->{set_next_input_character}->($self);
2025     }
2026    
2027     redo A;
2028     } elsif ($self->{next_input_character} eq 0x0027) { # '
2029     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2030 wakaba 1.59 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2031 wakaba 1.18
2032     if (@{$self->{char}}) {
2033     $self->{next_input_character} = shift @{$self->{char}};
2034     } else {
2035     $self->{set_next_input_character}->($self);
2036     }
2037    
2038     redo A;
2039     } elsif ($self->{next_input_character} eq 0x003E) { # >
2040     $self->{parse_error}-> (type => 'no PUBLIC literal');
2041    
2042 wakaba 1.59 $self->{state} = DATA_STATE;
2043 wakaba 1.18
2044     if (@{$self->{char}}) {
2045     $self->{next_input_character} = shift @{$self->{char}};
2046     } else {
2047     $self->{set_next_input_character}->($self);
2048     }
2049    
2050    
2051     delete $self->{current_token}->{correct};
2052     return ($self->{current_token}); # DOCTYPE
2053    
2054     redo A;
2055 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
2056 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2057 wakaba 1.18
2058 wakaba 1.59 $self->{state} = DATA_STATE;
2059 wakaba 1.1 ## reconsume
2060    
2061 wakaba 1.18 delete $self->{current_token}->{correct};
2062     return ($self->{current_token}); # DOCTYPE
2063 wakaba 1.1
2064     redo A;
2065     } else {
2066 wakaba 1.18 $self->{parse_error}-> (type => 'string after PUBLIC');
2067 wakaba 1.59 $self->{state} = BOGUS_DOCTYPE_STATE;
2068 wakaba 1.18
2069     if (@{$self->{char}}) {
2070     $self->{next_input_character} = shift @{$self->{char}};
2071     } else {
2072     $self->{set_next_input_character}->($self);
2073     }
2074    
2075     redo A;
2076     }
2077 wakaba 1.59 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2078 wakaba 1.18 if ($self->{next_input_character} == 0x0022) { # "
2079 wakaba 1.59 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2080 wakaba 1.18
2081     if (@{$self->{char}}) {
2082     $self->{next_input_character} = shift @{$self->{char}};
2083     } else {
2084     $self->{set_next_input_character}->($self);
2085     }
2086    
2087     redo A;
2088     } elsif ($self->{next_input_character} == -1) {
2089     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
2090    
2091 wakaba 1.59 $self->{state} = DATA_STATE;
2092 wakaba 1.18 ## reconsume
2093    
2094     delete $self->{current_token}->{correct};
2095     return ($self->{current_token}); # DOCTYPE
2096    
2097     redo A;
2098     } else {
2099     $self->{current_token}->{public_identifier} # DOCTYPE
2100     .= chr $self->{next_input_character};
2101     ## Stay in the state
2102    
2103     if (@{$self->{char}}) {
2104     $self->{next_input_character} = shift @{$self->{char}};
2105     } else {
2106     $self->{set_next_input_character}->($self);
2107     }
2108    
2109     redo A;
2110     }
2111 wakaba 1.59 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2112 wakaba 1.18 if ($self->{next_input_character} == 0x0027) { # '
2113 wakaba 1.59 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2114 wakaba 1.18
2115     if (@{$self->{char}}) {
2116     $self->{next_input_character} = shift @{$self->{char}};
2117     } else {
2118     $self->{set_next_input_character}->($self);
2119     }
2120    
2121     redo A;
2122     } elsif ($self->{next_input_character} == -1) {
2123     $self->{parse_error}-> (type => 'unclosed PUBLIC literal');
2124    
2125 wakaba 1.59 $self->{state} = DATA_STATE;
2126 wakaba 1.18 ## reconsume
2127    
2128     delete $self->{current_token}->{correct};
2129     return ($self->{current_token}); # DOCTYPE
2130    
2131     redo A;
2132     } else {
2133     $self->{current_token}->{public_identifier} # DOCTYPE
2134     .= chr $self->{next_input_character};
2135     ## Stay in the state
2136    
2137     if (@{$self->{char}}) {
2138     $self->{next_input_character} = shift @{$self->{char}};
2139     } else {
2140     $self->{set_next_input_character}->($self);
2141     }
2142    
2143     redo A;
2144     }
2145 wakaba 1.59 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2146 wakaba 1.18 if ({
2147     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2148     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2149     }->{$self->{next_input_character}}) {
2150 wakaba 1.1 ## Stay in the state
2151    
2152     if (@{$self->{char}}) {
2153     $self->{next_input_character} = shift @{$self->{char}};
2154     } else {
2155     $self->{set_next_input_character}->($self);
2156     }
2157    
2158     redo A;
2159 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
2160     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2161 wakaba 1.59 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2162 wakaba 1.18
2163     if (@{$self->{char}}) {
2164     $self->{next_input_character} = shift @{$self->{char}};
2165     } else {
2166     $self->{set_next_input_character}->($self);
2167     }
2168    
2169     redo A;
2170     } elsif ($self->{next_input_character} == 0x0027) { # '
2171     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2172 wakaba 1.59 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2173 wakaba 1.18
2174     if (@{$self->{char}}) {
2175     $self->{next_input_character} = shift @{$self->{char}};
2176     } else {
2177     $self->{set_next_input_character}->($self);
2178     }
2179    
2180     redo A;
2181     } elsif ($self->{next_input_character} == 0x003E) { # >
2182 wakaba 1.59 $self->{state} = DATA_STATE;
2183 wakaba 1.18
2184     if (@{$self->{char}}) {
2185     $self->{next_input_character} = shift @{$self->{char}};
2186     } else {
2187     $self->{set_next_input_character}->($self);
2188     }
2189    
2190    
2191     return ($self->{current_token}); # DOCTYPE
2192    
2193     redo A;
2194     } elsif ($self->{next_input_character} == -1) {
2195     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2196    
2197 wakaba 1.59 $self->{state} = DATA_STATE;
2198 wakaba 1.26 ## reconsume
2199 wakaba 1.18
2200     delete $self->{current_token}->{correct};
2201     return ($self->{current_token}); # DOCTYPE
2202    
2203     redo A;
2204     } else {
2205     $self->{parse_error}-> (type => 'string after PUBLIC literal');
2206 wakaba 1.59 $self->{state} = BOGUS_DOCTYPE_STATE;
2207 wakaba 1.18
2208     if (@{$self->{char}}) {
2209     $self->{next_input_character} = shift @{$self->{char}};
2210     } else {
2211     $self->{set_next_input_character}->($self);
2212     }
2213    
2214     redo A;
2215 wakaba 1.1 }
2216 wakaba 1.59 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2217 wakaba 1.18 if ({
2218     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2219     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2220     }->{$self->{next_input_character}}) {
2221 wakaba 1.1 ## Stay in the state
2222    
2223     if (@{$self->{char}}) {
2224     $self->{next_input_character} = shift @{$self->{char}};
2225     } else {
2226     $self->{set_next_input_character}->($self);
2227     }
2228    
2229     redo A;
2230 wakaba 1.18 } elsif ($self->{next_input_character} == 0x0022) { # "
2231     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2232 wakaba 1.59 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2233 wakaba 1.18
2234     if (@{$self->{char}}) {
2235     $self->{next_input_character} = shift @{$self->{char}};
2236     } else {
2237     $self->{set_next_input_character}->($self);
2238     }
2239    
2240     redo A;
2241     } elsif ($self->{next_input_character} == 0x0027) { # '
2242     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2243 wakaba 1.59 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2244 wakaba 1.18
2245     if (@{$self->{char}}) {
2246     $self->{next_input_character} = shift @{$self->{char}};
2247     } else {
2248     $self->{set_next_input_character}->($self);
2249     }
2250    
2251     redo A;
2252 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003E) { # >
2253 wakaba 1.18 $self->{parse_error}-> (type => 'no SYSTEM literal');
2254 wakaba 1.59 $self->{state} = DATA_STATE;
2255 wakaba 1.1
2256     if (@{$self->{char}}) {
2257     $self->{next_input_character} = shift @{$self->{char}};
2258     } else {
2259     $self->{set_next_input_character}->($self);
2260     }
2261    
2262    
2263 wakaba 1.18 delete $self->{current_token}->{correct};
2264 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2265    
2266     redo A;
2267     } elsif ($self->{next_input_character} == -1) {
2268 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2269 wakaba 1.18
2270 wakaba 1.59 $self->{state} = DATA_STATE;
2271 wakaba 1.26 ## reconsume
2272 wakaba 1.18
2273     delete $self->{current_token}->{correct};
2274     return ($self->{current_token}); # DOCTYPE
2275    
2276     redo A;
2277     } else {
2278 wakaba 1.30 $self->{parse_error}-> (type => 'string after SYSTEM');
2279 wakaba 1.59 $self->{state} = BOGUS_DOCTYPE_STATE;
2280 wakaba 1.18
2281     if (@{$self->{char}}) {
2282     $self->{next_input_character} = shift @{$self->{char}};
2283     } else {
2284     $self->{set_next_input_character}->($self);
2285     }
2286    
2287     redo A;
2288     }
2289 wakaba 1.59 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2290 wakaba 1.18 if ($self->{next_input_character} == 0x0022) { # "
2291 wakaba 1.59 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2292 wakaba 1.18
2293     if (@{$self->{char}}) {
2294     $self->{next_input_character} = shift @{$self->{char}};
2295     } else {
2296     $self->{set_next_input_character}->($self);
2297     }
2298    
2299     redo A;
2300     } elsif ($self->{next_input_character} == -1) {
2301     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2302    
2303 wakaba 1.59 $self->{state} = DATA_STATE;
2304 wakaba 1.1 ## reconsume
2305    
2306 wakaba 1.18 delete $self->{current_token}->{correct};
2307 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2308    
2309     redo A;
2310     } else {
2311 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2312     .= chr $self->{next_input_character};
2313     ## Stay in the state
2314    
2315     if (@{$self->{char}}) {
2316     $self->{next_input_character} = shift @{$self->{char}};
2317     } else {
2318     $self->{set_next_input_character}->($self);
2319     }
2320    
2321     redo A;
2322     }
2323 wakaba 1.59 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2324 wakaba 1.18 if ($self->{next_input_character} == 0x0027) { # '
2325 wakaba 1.59 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2326 wakaba 1.18
2327     if (@{$self->{char}}) {
2328     $self->{next_input_character} = shift @{$self->{char}};
2329     } else {
2330     $self->{set_next_input_character}->($self);
2331     }
2332    
2333     redo A;
2334     } elsif ($self->{next_input_character} == -1) {
2335     $self->{parse_error}-> (type => 'unclosed SYSTEM literal');
2336    
2337 wakaba 1.59 $self->{state} = DATA_STATE;
2338 wakaba 1.18 ## reconsume
2339    
2340     delete $self->{current_token}->{correct};
2341     return ($self->{current_token}); # DOCTYPE
2342    
2343     redo A;
2344     } else {
2345     $self->{current_token}->{system_identifier} # DOCTYPE
2346     .= chr $self->{next_input_character};
2347     ## Stay in the state
2348    
2349     if (@{$self->{char}}) {
2350     $self->{next_input_character} = shift @{$self->{char}};
2351     } else {
2352     $self->{set_next_input_character}->($self);
2353     }
2354    
2355     redo A;
2356     }
2357 wakaba 1.59 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2358 wakaba 1.18 if ({
2359     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2360     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2361     }->{$self->{next_input_character}}) {
2362     ## Stay in the state
2363    
2364     if (@{$self->{char}}) {
2365     $self->{next_input_character} = shift @{$self->{char}};
2366     } else {
2367     $self->{set_next_input_character}->($self);
2368     }
2369    
2370     redo A;
2371     } elsif ($self->{next_input_character} == 0x003E) { # >
2372 wakaba 1.59 $self->{state} = DATA_STATE;
2373 wakaba 1.18
2374     if (@{$self->{char}}) {
2375     $self->{next_input_character} = shift @{$self->{char}};
2376     } else {
2377     $self->{set_next_input_character}->($self);
2378     }
2379    
2380    
2381     return ($self->{current_token}); # DOCTYPE
2382    
2383     redo A;
2384     } elsif ($self->{next_input_character} == -1) {
2385     $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2386    
2387 wakaba 1.59 $self->{state} = DATA_STATE;
2388 wakaba 1.26 ## reconsume
2389 wakaba 1.18
2390     delete $self->{current_token}->{correct};
2391     return ($self->{current_token}); # DOCTYPE
2392    
2393     redo A;
2394     } else {
2395     $self->{parse_error}-> (type => 'string after SYSTEM literal');
2396 wakaba 1.59 $self->{state} = BOGUS_DOCTYPE_STATE;
2397 wakaba 1.1
2398     if (@{$self->{char}}) {
2399     $self->{next_input_character} = shift @{$self->{char}};
2400     } else {
2401     $self->{set_next_input_character}->($self);
2402     }
2403    
2404     redo A;
2405     }
2406 wakaba 1.59 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2407 wakaba 1.1 if ($self->{next_input_character} == 0x003E) { # >
2408 wakaba 1.59 $self->{state} = DATA_STATE;
2409 wakaba 1.1
2410     if (@{$self->{char}}) {
2411     $self->{next_input_character} = shift @{$self->{char}};
2412     } else {
2413     $self->{set_next_input_character}->($self);
2414     }
2415    
2416    
2417 wakaba 1.18 delete $self->{current_token}->{correct};
2418 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2419    
2420     redo A;
2421     } elsif ($self->{next_input_character} == -1) {
2422 wakaba 1.3 $self->{parse_error}-> (type => 'unclosed DOCTYPE');
2423 wakaba 1.59 $self->{state} = DATA_STATE;
2424 wakaba 1.1 ## reconsume
2425    
2426 wakaba 1.18 delete $self->{current_token}->{correct};
2427 wakaba 1.1 return ($self->{current_token}); # DOCTYPE
2428    
2429     redo A;
2430     } else {
2431     ## Stay in the state
2432    
2433     if (@{$self->{char}}) {
2434     $self->{next_input_character} = shift @{$self->{char}};
2435     } else {
2436     $self->{set_next_input_character}->($self);
2437     }
2438    
2439     redo A;
2440     }
2441     } else {
2442     die "$0: $self->{state}: Unknown state";
2443     }
2444     } # A
2445    
2446     die "$0: _get_next_token: unexpected case";
2447     } # _get_next_token
2448    
2449 wakaba 1.26 sub _tokenize_attempt_to_consume_an_entity ($$) {
2450     my ($self, $in_attr) = @_;
2451 wakaba 1.20
2452     if ({
2453     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2454     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2455     }->{$self->{next_input_character}}) {
2456     ## Don't consume
2457     ## No error
2458     return undef;
2459     } elsif ($self->{next_input_character} == 0x0023) { # #
2460 wakaba 1.1
2461     if (@{$self->{char}}) {
2462     $self->{next_input_character} = shift @{$self->{char}};
2463     } else {
2464     $self->{set_next_input_character}->($self);
2465     }
2466    
2467     if ($self->{next_input_character} == 0x0078 or # x
2468     $self->{next_input_character} == 0x0058) { # X
2469 wakaba 1.26 my $code;
2470 wakaba 1.1 X: {
2471     my $x_char = $self->{next_input_character};
2472    
2473     if (@{$self->{char}}) {
2474     $self->{next_input_character} = shift @{$self->{char}};
2475     } else {
2476     $self->{set_next_input_character}->($self);
2477     }
2478    
2479     if (0x0030 <= $self->{next_input_character} and
2480     $self->{next_input_character} <= 0x0039) { # 0..9
2481 wakaba 1.26 $code ||= 0;
2482     $code *= 0x10;
2483     $code += $self->{next_input_character} - 0x0030;
2484 wakaba 1.1 redo X;
2485     } elsif (0x0061 <= $self->{next_input_character} and
2486     $self->{next_input_character} <= 0x0066) { # a..f
2487 wakaba 1.26 $code ||= 0;
2488     $code *= 0x10;
2489     $code += $self->{next_input_character} - 0x0060 + 9;
2490 wakaba 1.1 redo X;
2491     } elsif (0x0041 <= $self->{next_input_character} and
2492     $self->{next_input_character} <= 0x0046) { # A..F
2493 wakaba 1.26 $code ||= 0;
2494     $code *= 0x10;
2495     $code += $self->{next_input_character} - 0x0040 + 9;
2496 wakaba 1.1 redo X;
2497 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2498 wakaba 1.3 $self->{parse_error}-> (type => 'bare hcro');
2499 wakaba 1.37 unshift @{$self->{char}}, ($x_char, $self->{next_input_character});
2500 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
2501     return undef;
2502     } elsif ($self->{next_input_character} == 0x003B) { # ;
2503    
2504     if (@{$self->{char}}) {
2505     $self->{next_input_character} = shift @{$self->{char}};
2506     } else {
2507     $self->{set_next_input_character}->($self);
2508     }
2509    
2510     } else {
2511 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2512 wakaba 1.1 }
2513    
2514 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2515     $self->{parse_error}-> (type => sprintf 'invalid character reference:U+%04X', $code);
2516     $code = 0xFFFD;
2517     } elsif ($code > 0x10FFFF) {
2518     $self->{parse_error}-> (type => sprintf 'invalid character reference:U-%08X', $code);
2519     $code = 0xFFFD;
2520     } elsif ($code == 0x000D) {
2521     $self->{parse_error}-> (type => 'CR character reference');
2522     $code = 0x000A;
2523     } elsif (0x80 <= $code and $code <= 0x9F) {
2524 wakaba 1.30 $self->{parse_error}-> (type => sprintf 'C1 character reference:U+%04X', $code);
2525 wakaba 1.26 $code = $c1_entity_char->{$code};
2526 wakaba 1.1 }
2527    
2528 wakaba 1.57 return {type => CHARACTER_TOKEN, data => chr $code};
2529 wakaba 1.1 } # X
2530     } elsif (0x0030 <= $self->{next_input_character} and
2531     $self->{next_input_character} <= 0x0039) { # 0..9
2532     my $code = $self->{next_input_character} - 0x0030;
2533    
2534     if (@{$self->{char}}) {
2535     $self->{next_input_character} = shift @{$self->{char}};
2536     } else {
2537     $self->{set_next_input_character}->($self);
2538     }
2539    
2540    
2541     while (0x0030 <= $self->{next_input_character} and
2542     $self->{next_input_character} <= 0x0039) { # 0..9
2543     $code *= 10;
2544     $code += $self->{next_input_character} - 0x0030;
2545    
2546    
2547     if (@{$self->{char}}) {
2548     $self->{next_input_character} = shift @{$self->{char}};
2549     } else {
2550     $self->{set_next_input_character}->($self);
2551     }
2552    
2553     }
2554    
2555     if ($self->{next_input_character} == 0x003B) { # ;
2556    
2557     if (@{$self->{char}}) {
2558     $self->{next_input_character} = shift @{$self->{char}};
2559     } else {
2560     $self->{set_next_input_character}->($self);
2561     }
2562    
2563     } else {
2564 wakaba 1.3 $self->{parse_error}-> (type => 'no refc');
2565 wakaba 1.1 }
2566    
2567 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2568     $self->{parse_error}-> (type => sprintf 'invalid character reference:U+%04X', $code);
2569     $code = 0xFFFD;
2570     } elsif ($code > 0x10FFFF) {
2571     $self->{parse_error}-> (type => sprintf 'invalid character reference:U-%08X', $code);
2572     $code = 0xFFFD;
2573     } elsif ($code == 0x000D) {
2574     $self->{parse_error}-> (type => 'CR character reference');
2575     $code = 0x000A;
2576 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2577 wakaba 1.30 $self->{parse_error}-> (type => sprintf 'C1 character reference:U+%04X', $code);
2578 wakaba 1.4 $code = $c1_entity_char->{$code};
2579 wakaba 1.1 }
2580    
2581 wakaba 1.57 return {type => CHARACTER_TOKEN, data => chr $code};
2582 wakaba 1.1 } else {
2583 wakaba 1.3 $self->{parse_error}-> (type => 'bare nero');
2584 wakaba 1.1 unshift @{$self->{char}}, ($self->{next_input_character});
2585     $self->{next_input_character} = 0x0023; # #
2586     return undef;
2587     }
2588     } elsif ((0x0041 <= $self->{next_input_character} and
2589     $self->{next_input_character} <= 0x005A) or
2590     (0x0061 <= $self->{next_input_character} and
2591     $self->{next_input_character} <= 0x007A)) {
2592     my $entity_name = chr $self->{next_input_character};
2593    
2594     if (@{$self->{char}}) {
2595     $self->{next_input_character} = shift @{$self->{char}};
2596     } else {
2597     $self->{set_next_input_character}->($self);
2598     }
2599    
2600    
2601     my $value = $entity_name;
2602 wakaba 1.37 my $match = 0;
2603 wakaba 1.16 require Whatpm::_NamedEntityList;
2604     our $EntityChar;
2605 wakaba 1.1
2606     while (length $entity_name < 10 and
2607     ## NOTE: Some number greater than the maximum length of entity name
2608 wakaba 1.16 ((0x0041 <= $self->{next_input_character} and # a
2609     $self->{next_input_character} <= 0x005A) or # x
2610     (0x0061 <= $self->{next_input_character} and # a
2611     $self->{next_input_character} <= 0x007A) or # z
2612     (0x0030 <= $self->{next_input_character} and # 0
2613     $self->{next_input_character} <= 0x0039) or # 9
2614     $self->{next_input_character} == 0x003B)) { # ;
2615 wakaba 1.1 $entity_name .= chr $self->{next_input_character};
2616 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2617     if ($self->{next_input_character} == 0x003B) { # ;
2618 wakaba 1.26 $value = $EntityChar->{$entity_name};
2619 wakaba 1.16 $match = 1;
2620    
2621     if (@{$self->{char}}) {
2622     $self->{next_input_character} = shift @{$self->{char}};
2623     } else {
2624     $self->{set_next_input_character}->($self);
2625     }
2626    
2627     last;
2628 wakaba 1.37 } else {
2629 wakaba 1.26 $value = $EntityChar->{$entity_name};
2630     $match = -1;
2631 wakaba 1.37
2632     if (@{$self->{char}}) {
2633     $self->{next_input_character} = shift @{$self->{char}};
2634     } else {
2635     $self->{set_next_input_character}->($self);
2636     }
2637    
2638 wakaba 1.16 }
2639 wakaba 1.1 } else {
2640     $value .= chr $self->{next_input_character};
2641 wakaba 1.37 $match *= 2;
2642    
2643 wakaba 1.1 if (@{$self->{char}}) {
2644     $self->{next_input_character} = shift @{$self->{char}};
2645     } else {
2646     $self->{set_next_input_character}->($self);
2647     }
2648    
2649 wakaba 1.37 }
2650 wakaba 1.1 }
2651    
2652 wakaba 1.16 if ($match > 0) {
2653 wakaba 1.57 return {type => CHARACTER_TOKEN, data => $value};
2654 wakaba 1.16 } elsif ($match < 0) {
2655 wakaba 1.30 $self->{parse_error}-> (type => 'no refc');
2656 wakaba 1.37 if ($in_attr and $match < -1) {
2657 wakaba 1.57 return {type => CHARACTER_TOKEN, data => '&'.$entity_name};
2658 wakaba 1.37 } else {
2659 wakaba 1.57 return {type => CHARACTER_TOKEN, data => $value};
2660 wakaba 1.37 }
2661 wakaba 1.1 } else {
2662 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2663 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
2664 wakaba 1.57 return {type => CHARACTER_TOKEN, data => '&'.$value};
2665 wakaba 1.1 }
2666     } else {
2667     ## no characters are consumed
2668 wakaba 1.3 $self->{parse_error}-> (type => 'bare ero');
2669 wakaba 1.1 return undef;
2670     }
2671     } # _tokenize_attempt_to_consume_an_entity
2672    
2673     sub _initialize_tree_constructor ($) {
2674     my $self = shift;
2675     ## NOTE: $self->{document} MUST be specified before this method is called
2676     $self->{document}->strict_error_checking (0);
2677     ## TODO: Turn mutation events off # MUST
2678     ## TODO: Turn loose Document option (manakai extension) on
2679 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2680 wakaba 1.1 } # _initialize_tree_constructor
2681    
2682     sub _terminate_tree_constructor ($) {
2683     my $self = shift;
2684     $self->{document}->strict_error_checking (1);
2685     ## TODO: Turn mutation events on
2686     } # _terminate_tree_constructor
2687    
2688     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2689    
2690 wakaba 1.3 { # tree construction stage
2691     my $token;
2692    
2693 wakaba 1.1 sub _construct_tree ($) {
2694     my ($self) = @_;
2695    
2696     ## When an interactive UA render the $self->{document} available
2697     ## to the user, or when it begin accepting user input, are
2698     ## not defined.
2699    
2700     ## Append a character: collect it and all subsequent consecutive
2701     ## characters and insert one Text node whose data is concatenation
2702     ## of all those characters. # MUST
2703    
2704     $token = $self->_get_next_token;
2705    
2706 wakaba 1.56 $self->{insertion_mode} = BEFORE_HEAD_IM;
2707 wakaba 1.3 undef $self->{form_element};
2708     undef $self->{head_element};
2709     $self->{open_elements} = [];
2710     undef $self->{inner_html_node};
2711    
2712     $self->_tree_construction_initial; # MUST
2713     $self->_tree_construction_root_element;
2714     $self->_tree_construction_main;
2715     } # _construct_tree
2716    
2717     sub _tree_construction_initial ($) {
2718     my $self = shift;
2719 wakaba 1.18 INITIAL: {
2720 wakaba 1.57 if ($token->{type} == DOCTYPE_TOKEN) {
2721 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2722     ## error, switch to a conformance checking mode for another
2723     ## language.
2724     my $doctype_name = $token->{name};
2725     $doctype_name = '' unless defined $doctype_name;
2726     $doctype_name =~ tr/a-z/A-Z/;
2727     if (not defined $token->{name} or # <!DOCTYPE>
2728     defined $token->{public_identifier} or
2729     defined $token->{system_identifier}) {
2730     $self->{parse_error}-> (type => 'not HTML5');
2731     } elsif ($doctype_name ne 'HTML') {
2732     ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2733     $self->{parse_error}-> (type => 'not HTML5');
2734     }
2735    
2736     my $doctype = $self->{document}->create_document_type_definition
2737     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2738     $doctype->public_id ($token->{public_identifier})
2739     if defined $token->{public_identifier};
2740     $doctype->system_id ($token->{system_identifier})
2741     if defined $token->{system_identifier};
2742     ## NOTE: Other DocumentType attributes are null or empty lists.
2743     ## ISSUE: internalSubset = null??
2744     $self->{document}->append_child ($doctype);
2745    
2746     if (not $token->{correct} or $doctype_name ne 'HTML') {
2747     $self->{document}->manakai_compat_mode ('quirks');
2748     } elsif (defined $token->{public_identifier}) {
2749     my $pubid = $token->{public_identifier};
2750     $pubid =~ tr/a-z/A-z/;
2751     if ({
2752     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2753     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2754     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2755     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2756     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2757     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2758     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2759     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2760     "-//IETF//DTD HTML 2.0//EN" => 1,
2761     "-//IETF//DTD HTML 2.1E//EN" => 1,
2762     "-//IETF//DTD HTML 3.0//EN" => 1,
2763     "-//IETF//DTD HTML 3.0//EN//" => 1,
2764     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2765     "-//IETF//DTD HTML 3.2//EN" => 1,
2766     "-//IETF//DTD HTML 3//EN" => 1,
2767     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2768     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2769     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2770     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2771     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2772     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2773     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2774     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2775     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2776     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2777     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2778     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2779     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2780     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2781     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2782     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2783     "-//IETF//DTD HTML STRICT//EN" => 1,
2784     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2785     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2786     "-//IETF//DTD HTML//EN" => 1,
2787     "-//IETF//DTD HTML//EN//2.0" => 1,
2788     "-//IETF//DTD HTML//EN//3.0" => 1,
2789     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2790     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2791     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2792     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2793     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2794     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2795     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2796     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2797     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2798     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2799     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2800     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2801     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2802     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2803     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2804     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2805     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2806     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2807     "-//W3C//DTD HTML 3.2//EN" => 1,
2808     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2809     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2810     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2811     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2812     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2813     "-//W3C//DTD W3 HTML//EN" => 1,
2814     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2815     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2816     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2817     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2818     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2819     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2820     "HTML" => 1,
2821     }->{$pubid}) {
2822     $self->{document}->manakai_compat_mode ('quirks');
2823     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2824     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2825     if (defined $token->{system_identifier}) {
2826     $self->{document}->manakai_compat_mode ('quirks');
2827     } else {
2828     $self->{document}->manakai_compat_mode ('limited quirks');
2829 wakaba 1.3 }
2830 wakaba 1.18 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 Frameset//EN" or
2831     $pubid eq "-//W3C//DTD XHTML 1.0 Transitional//EN") {
2832     $self->{document}->manakai_compat_mode ('limited quirks');
2833     }
2834     }
2835     if (defined $token->{system_identifier}) {
2836     my $sysid = $token->{system_identifier};
2837     $sysid =~ tr/A-Z/a-z/;
2838     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2839     $self->{document}->manakai_compat_mode ('quirks');
2840     }
2841     }
2842    
2843     ## Go to the root element phase.
2844     $token = $self->_get_next_token;
2845     return;
2846     } elsif ({
2847 wakaba 1.57 START_TAG_TOKEN, 1,
2848     END_TAG_TOKEN, 1,
2849     END_OF_FILE_TOKEN, 1,
2850 wakaba 1.18 }->{$token->{type}}) {
2851     $self->{parse_error}-> (type => 'no DOCTYPE');
2852     $self->{document}->manakai_compat_mode ('quirks');
2853     ## Go to the root element phase
2854     ## reprocess
2855     return;
2856 wakaba 1.57 } elsif ($token->{type} == CHARACTER_TOKEN) {
2857 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2858     ## Ignore the token
2859 wakaba 1.26
2860 wakaba 1.18 unless (length $token->{data}) {
2861     ## Stay in the phase
2862     $token = $self->_get_next_token;
2863     redo INITIAL;
2864 wakaba 1.3 }
2865     }
2866 wakaba 1.18
2867     $self->{parse_error}-> (type => 'no DOCTYPE');
2868     $self->{document}->manakai_compat_mode ('quirks');
2869     ## Go to the root element phase
2870     ## reprocess
2871     return;
2872 wakaba 1.57 } elsif ($token->{type} == COMMENT_TOKEN) {
2873 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
2874     $self->{document}->append_child ($comment);
2875    
2876     ## Stay in the phase.
2877     $token = $self->_get_next_token;
2878     redo INITIAL;
2879     } else {
2880 wakaba 1.57 die "$0: $token->{type}: Unknown token type";
2881 wakaba 1.18 }
2882     } # INITIAL
2883 wakaba 1.3 } # _tree_construction_initial
2884    
2885     sub _tree_construction_root_element ($) {
2886     my $self = shift;
2887    
2888     B: {
2889 wakaba 1.57 if ($token->{type} == DOCTYPE_TOKEN) {
2890 wakaba 1.3 $self->{parse_error}-> (type => 'in html:#DOCTYPE');
2891     ## Ignore the token
2892     ## Stay in the phase
2893     $token = $self->_get_next_token;
2894     redo B;
2895 wakaba 1.57 } elsif ($token->{type} == COMMENT_TOKEN) {
2896 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
2897     $self->{document}->append_child ($comment);
2898     ## Stay in the phase
2899     $token = $self->_get_next_token;
2900     redo B;
2901 wakaba 1.57 } elsif ($token->{type} == CHARACTER_TOKEN) {
2902 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2903     ## Ignore the token.
2904    
2905 wakaba 1.3 unless (length $token->{data}) {
2906     ## Stay in the phase
2907     $token = $self->_get_next_token;
2908     redo B;
2909     }
2910     }
2911 wakaba 1.63
2912     $self->{application_cache_selection}->(undef);
2913    
2914     #
2915     } elsif ($token->{type} == START_TAG_TOKEN) {
2916     if ($token->{tag_name} eq 'html' and
2917     $token->{attributes}->{manifest}) { ## ISSUE: Spec spells as "application"
2918     $self->{application_cache_selection}
2919     ->($token->{attributes}->{manifest}->{value});
2920     ## ISSUE: No relative reference resolution?
2921     } else {
2922     $self->{application_cache_selection}->(undef);
2923     }
2924    
2925     ## ISSUE: There is an issue in the spec
2926 wakaba 1.3 #
2927     } elsif ({
2928 wakaba 1.57 END_TAG_TOKEN, 1,
2929     END_OF_FILE_TOKEN, 1,
2930 wakaba 1.3 }->{$token->{type}}) {
2931 wakaba 1.63 $self->{application_cache_selection}->(undef);
2932    
2933 wakaba 1.3 ## ISSUE: There is an issue in the spec
2934     #
2935     } else {
2936 wakaba 1.57 die "$0: $token->{type}: Unknown token type";
2937 wakaba 1.3 }
2938 wakaba 1.63
2939 wakaba 1.3 my $root_element;
2940     $root_element = $self->{document}->create_element_ns
2941     (q<http://www.w3.org/1999/xhtml>, [undef, 'html']);
2942    
2943     $self->{document}->append_child ($root_element);
2944     push @{$self->{open_elements}}, [$root_element, 'html'];
2945     ## reprocess
2946     #redo B;
2947 wakaba 1.35 return; ## Go to the main phase.
2948 wakaba 1.3 } # B
2949     } # _tree_construction_root_element
2950    
2951     sub _reset_insertion_mode ($) {
2952     my $self = shift;
2953    
2954     ## Step 1
2955     my $last;
2956    
2957     ## Step 2
2958     my $i = -1;
2959     my $node = $self->{open_elements}->[$i];
2960    
2961     ## Step 3
2962     S3: {
2963 wakaba 1.29 ## ISSUE: Oops! "If node is the first node in the stack of open
2964     ## elements, then set last to true. If the context element of the
2965     ## HTML fragment parsing algorithm is neither a td element nor a
2966     ## th element, then set node to the context element. (fragment case)":
2967     ## The second "if" is in the scope of the first "if"!?
2968     if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
2969     $last = 1;
2970     if (defined $self->{inner_html_node}) {
2971     if ($self->{inner_html_node}->[1] eq 'td' or
2972     $self->{inner_html_node}->[1] eq 'th') {
2973     #
2974     } else {
2975     $node = $self->{inner_html_node};
2976     }
2977 wakaba 1.3 }
2978     }
2979    
2980     ## Step 4..13
2981     my $new_mode = {
2982 wakaba 1.56 select => IN_SELECT_IM,
2983     td => IN_CELL_IM,
2984     th => IN_CELL_IM,
2985     tr => IN_ROW_IM,
2986     tbody => IN_TABLE_BODY_IM,
2987     thead => IN_TABLE_BODY_IM,
2988     tfoot => IN_TABLE_BODY_IM,
2989     caption => IN_CAPTION_IM,
2990     colgroup => IN_COLUMN_GROUP_IM,
2991     table => IN_TABLE_IM,
2992     head => IN_BODY_IM, # not in head!
2993     body => IN_BODY_IM,
2994     frameset => IN_FRAMESET_IM,
2995 wakaba 1.3 }->{$node->[1]};
2996     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
2997    
2998     ## Step 14
2999     if ($node->[1] eq 'html') {
3000     unless (defined $self->{head_element}) {
3001 wakaba 1.56 $self->{insertion_mode} = BEFORE_HEAD_IM;
3002 wakaba 1.3 } else {
3003 wakaba 1.56 $self->{insertion_mode} = AFTER_HEAD_IM;
3004 wakaba 1.3 }
3005     return;
3006     }
3007    
3008     ## Step 15
3009 wakaba 1.56 $self->{insertion_mode} = IN_BODY_IM and return if $last;
3010 wakaba 1.3
3011     ## Step 16
3012     $i--;
3013     $node = $self->{open_elements}->[$i];
3014    
3015     ## Step 17
3016     redo S3;
3017     } # S3
3018     } # _reset_insertion_mode
3019    
3020     sub _tree_construction_main ($) {
3021     my $self = shift;
3022    
3023 wakaba 1.1 my $active_formatting_elements = [];
3024    
3025     my $reconstruct_active_formatting_elements = sub { # MUST
3026     my $insert = shift;
3027    
3028     ## Step 1
3029     return unless @$active_formatting_elements;
3030    
3031     ## Step 3
3032     my $i = -1;
3033     my $entry = $active_formatting_elements->[$i];
3034    
3035     ## Step 2
3036     return if $entry->[0] eq '#marker';
3037 wakaba 1.3 for (@{$self->{open_elements}}) {
3038 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3039     return;
3040     }
3041     }
3042    
3043     S4: {
3044     ## Step 4
3045     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
3046    
3047     ## Step 5
3048     $i--;
3049     $entry = $active_formatting_elements->[$i];
3050    
3051     ## Step 6
3052     if ($entry->[0] eq '#marker') {
3053     #
3054     } else {
3055     my $in_open_elements;
3056 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
3057 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3058     $in_open_elements = 1;
3059     last OE;
3060     }
3061     }
3062     if ($in_open_elements) {
3063     #
3064     } else {
3065     redo S4;
3066     }
3067     }
3068    
3069     ## Step 7
3070     $i++;
3071     $entry = $active_formatting_elements->[$i];
3072     } # S4
3073    
3074     S7: {
3075     ## Step 8
3076     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
3077    
3078     ## Step 9
3079     $insert->($clone->[0]);
3080 wakaba 1.3 push @{$self->{open_elements}}, $clone;
3081 wakaba 1.1
3082     ## Step 10
3083 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
3084 wakaba 1.1
3085     ## Step 11
3086     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
3087     ## Step 7'
3088     $i++;
3089     $entry = $active_formatting_elements->[$i];
3090    
3091     redo S7;
3092     }
3093     } # S7
3094     }; # $reconstruct_active_formatting_elements
3095    
3096     my $clear_up_to_marker = sub {
3097     for (reverse 0..$#$active_formatting_elements) {
3098     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3099     splice @$active_formatting_elements, $_;
3100     return;
3101     }
3102     }
3103     }; # $clear_up_to_marker
3104    
3105 wakaba 1.25 my $parse_rcdata = sub ($$) {
3106     my ($content_model_flag, $insert) = @_;
3107    
3108     ## Step 1
3109     my $start_tag_name = $token->{tag_name};
3110     my $el;
3111    
3112     $el = $self->{document}->create_element_ns
3113     (q<http://www.w3.org/1999/xhtml>, [undef, $start_tag_name]);
3114 wakaba 1.1
3115 wakaba 1.6 for my $attr_name (keys %{ $token->{attributes}}) {
3116 wakaba 1.25 $el->set_attribute_ns (undef, [undef, $attr_name],
3117 wakaba 1.6 $token->{attributes} ->{$attr_name}->{value});
3118     }
3119    
3120 wakaba 1.25
3121     ## Step 2
3122     $insert->($el); # /context node/->append_child ($el)
3123    
3124     ## Step 3
3125 wakaba 1.41 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3126 wakaba 1.13 delete $self->{escape}; # MUST
3127 wakaba 1.25
3128     ## Step 4
3129 wakaba 1.1 my $text = '';
3130     $token = $self->_get_next_token;
3131 wakaba 1.57 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
3132 wakaba 1.1 $text .= $token->{data};
3133     $token = $self->_get_next_token;
3134 wakaba 1.25 }
3135    
3136     ## Step 5
3137 wakaba 1.1 if (length $text) {
3138 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3139     $el->append_child ($text);
3140 wakaba 1.1 }
3141 wakaba 1.25
3142     ## Step 6
3143 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL;
3144 wakaba 1.25
3145     ## Step 7
3146 wakaba 1.57 if ($token->{type} == END_TAG_TOKEN and $token->{tag_name} eq $start_tag_name) {
3147 wakaba 1.1 ## Ignore the token
3148 wakaba 1.41 } elsif ($content_model_flag == CDATA_CONTENT_MODEL) {
3149     $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
3150     } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3151     $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
3152 wakaba 1.1 } else {
3153 wakaba 1.41 die "$0: $content_model_flag in parse_rcdata";
3154 wakaba 1.1 }
3155     $token = $self->_get_next_token;
3156 wakaba 1.25 }; # $parse_rcdata
3157 wakaba 1.1
3158 wakaba 1.25 my $script_start_tag = sub ($) {
3159     my $insert = $_[0];
3160 wakaba 1.1 my $script_el;
3161    
3162     $script_el = $self->{document}->create_element_ns
3163     (q<http://www.w3.org/1999/xhtml>, [undef, 'script']);
3164    
3165     for my $attr_name (keys %{ $token->{attributes}}) {
3166     $script_el->set_attribute_ns (undef, [undef, $attr_name],
3167     $token->{attributes} ->{$attr_name}->{value});
3168     }
3169    
3170     ## TODO: mark as "parser-inserted"
3171    
3172 wakaba 1.41 $self->{content_model} = CDATA_CONTENT_MODEL;
3173 wakaba 1.13 delete $self->{escape}; # MUST
3174 wakaba 1.1
3175     my $text = '';
3176     $token = $self->_get_next_token;
3177 wakaba 1.57 while ($token->{type} == CHARACTER_TOKEN) {
3178 wakaba 1.1 $text .= $token->{data};
3179     $token = $self->_get_next_token;
3180     } # stop if non-character token or tokenizer stops tokenising
3181     if (length $text) {
3182     $script_el->manakai_append_text ($text);
3183     }
3184    
3185 wakaba 1.41 $self->{content_model} = PCDATA_CONTENT_MODEL;
3186 wakaba 1.1
3187 wakaba 1.57 if ($token->{type} == END_TAG_TOKEN and
3188 wakaba 1.1 $token->{tag_name} eq 'script') {
3189     ## Ignore the token
3190     } else {
3191 wakaba 1.3 $self->{parse_error}-> (type => 'in CDATA:#'.$token->{type});
3192 wakaba 1.1 ## ISSUE: And ignore?
3193     ## TODO: mark as "already executed"
3194     }
3195    
3196 wakaba 1.3 if (defined $self->{inner_html_node}) {
3197     ## TODO: mark as "already executed"
3198     } else {
3199 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3200     ## TODO: insertion point = just before the next input character
3201 wakaba 1.25
3202     $insert->($script_el);
3203 wakaba 1.1
3204     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3205    
3206     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3207     }
3208    
3209     $token = $self->_get_next_token;
3210     }; # $script_start_tag
3211    
3212     my $formatting_end_tag = sub {
3213     my $tag_name = shift;
3214    
3215     FET: {
3216     ## Step 1
3217     my $formatting_element;
3218     my $formatting_element_i_in_active;
3219     AFE: for (reverse 0..$#$active_formatting_elements) {
3220     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
3221     $formatting_element = $active_formatting_elements->[$_];
3222     $formatting_element_i_in_active = $_;
3223     last AFE;
3224     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
3225     last AFE;
3226     }
3227     } # AFE
3228     unless (defined $formatting_element) {
3229 wakaba 1.3 $self->{parse_error}-> (type => 'unmatched end tag:'.$tag_name);
3230 wakaba 1.1 ## Ignore the token
3231     $token = $self->_get_next_token;
3232     return;
3233     }
3234     ## has an element in scope
3235     my $in_scope = 1;
3236     my $formatting_element_i_in_open;
3237 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3238     my $node = $self->{open_elements}->[$_];
3239 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3240     if ($in_scope) {
3241     $formatting_element_i_in_open = $_;
3242     last INSCOPE;
3243     } else { # in open elements but not in scope
3244 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3245 wakaba 1.1 ## Ignore the token
3246     $token = $self->_get_next_token;
3247     return;
3248     }
3249     } elsif ({
3250     table => 1, caption => 1, td => 1, th => 1,
3251     button => 1, marquee => 1, object => 1, html => 1,
3252     }->{$node->[1]}) {
3253     $in_scope = 0;
3254     }
3255     } # INSCOPE
3256     unless (defined $formatting_element_i_in_open) {
3257 wakaba 1.4 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3258 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3259     $token = $self->_get_next_token; ## TODO: ok?
3260     return;
3261     }
3262 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3263 wakaba 1.4 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3264 wakaba 1.1 }
3265    
3266     ## Step 2
3267     my $furthest_block;
3268     my $furthest_block_i_in_open;
3269 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3270     my $node = $self->{open_elements}->[$_];
3271 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
3272     #not $phrasing_category->{$node->[1]} and
3273     ($special_category->{$node->[1]} or
3274     $scoping_category->{$node->[1]})) {
3275     $furthest_block = $node;
3276     $furthest_block_i_in_open = $_;
3277     } elsif ($node->[0] eq $formatting_element->[0]) {
3278     last OE;
3279     }
3280     } # OE
3281    
3282     ## Step 3
3283     unless (defined $furthest_block) { # MUST
3284 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3285 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3286     $token = $self->_get_next_token;
3287     return;
3288     }
3289    
3290     ## Step 4
3291 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3292 wakaba 1.1
3293     ## Step 5
3294     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3295     if (defined $furthest_block_parent) {
3296     $furthest_block_parent->remove_child ($furthest_block->[0]);
3297     }
3298    
3299     ## Step 6
3300     my $bookmark_prev_el
3301     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3302     ->[0];
3303    
3304     ## Step 7
3305     my $node = $furthest_block;
3306     my $node_i_in_open = $furthest_block_i_in_open;
3307     my $last_node = $furthest_block;
3308     S7: {
3309     ## Step 1
3310     $node_i_in_open--;
3311 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3312 wakaba 1.1
3313     ## Step 2
3314     my $node_i_in_active;
3315     S7S2: {
3316     for (reverse 0..$#$active_formatting_elements) {
3317     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3318     $node_i_in_active = $_;
3319     last S7S2;
3320     }
3321     }
3322 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3323 wakaba 1.1 redo S7;
3324     } # S7S2
3325    
3326     ## Step 3
3327     last S7 if $node->[0] eq $formatting_element->[0];
3328    
3329     ## Step 4
3330     if ($last_node->[0] eq $furthest_block->[0]) {
3331     $bookmark_prev_el = $node->[0];
3332     }
3333    
3334     ## Step 5
3335     if ($node->[0]->has_child_nodes ()) {
3336     my $clone = [$node->[0]->clone_node (0), $node->[1]];
3337     $active_formatting_elements->[$node_i_in_active] = $clone;
3338 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3339 wakaba 1.1 $node = $clone;
3340     }
3341    
3342     ## Step 6
3343     $node->[0]->append_child ($last_node->[0]);
3344    
3345     ## Step 7
3346     $last_node = $node;
3347    
3348     ## Step 8
3349     redo S7;
3350     } # S7
3351    
3352     ## Step 8
3353     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3354    
3355     ## Step 9
3356     my $clone = [$formatting_element->[0]->clone_node (0),
3357     $formatting_element->[1]];
3358    
3359     ## Step 10
3360     my @cn = @{$furthest_block->[0]->child_nodes};
3361     $clone->[0]->append_child ($_) for @cn;
3362    
3363     ## Step 11
3364     $furthest_block->[0]->append_child ($clone->[0]);
3365    
3366     ## Step 12
3367     my $i;
3368     AFE: for (reverse 0..$#$active_formatting_elements) {
3369     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3370     splice @$active_formatting_elements, $_, 1;
3371     $i-- and last AFE if defined $i;
3372     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3373     $i = $_;
3374     }
3375     } # AFE
3376     splice @$active_formatting_elements, $i + 1, 0, $clone;
3377    
3378     ## Step 13
3379     undef $i;
3380 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3381     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3382     splice @{$self->{open_elements}}, $_, 1;
3383 wakaba 1.1 $i-- and last OE if defined $i;
3384 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3385 wakaba 1.1 $i = $_;
3386     }
3387     } # OE
3388 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3389 wakaba 1.1
3390     ## Step 14
3391     redo FET;
3392     } # FET
3393     }; # $formatting_end_tag
3394    
3395     my $insert_to_current = sub {
3396 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3397 wakaba 1.1 }; # $insert_to_current
3398    
3399     my $insert_to_foster = sub {
3400     my $child = shift;
3401     if ({
3402     table => 1, tbody => 1, tfoot => 1,
3403     thead => 1, tr => 1,
3404 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3405 wakaba 1.1 # MUST
3406     my $foster_parent_element;
3407     my $next_sibling;
3408 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3409     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3410     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3411 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3412     $foster_parent_element = $parent;
3413 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3414 wakaba 1.1 } else {
3415     $foster_parent_element
3416 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3417 wakaba 1.1 }
3418     last OE;
3419     }
3420     } # OE
3421 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3422 wakaba 1.1 unless defined $foster_parent_element;
3423     $foster_parent_element->insert_before
3424     ($child, $next_sibling);
3425     } else {
3426 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
3427 wakaba 1.1 }
3428     }; # $insert_to_foster
3429    
3430 wakaba 1.54 my $insert;
3431    
3432     B: {
3433 wakaba 1.57 if ($token->{type} == DOCTYPE_TOKEN) {
3434 wakaba 1.54 $self->{parse_error}-> (type => 'DOCTYPE in the middle');
3435     ## Ignore the token
3436     ## Stay in the phase
3437     $token = $self->_get_next_token;
3438     redo B;
3439 wakaba 1.57 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3440 wakaba 1.58 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
3441 wakaba 1.54 #
3442     } else {
3443     ## Generate implied end tags
3444     if ({
3445     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
3446     tbody => 1, tfoot=> 1, thead => 1,
3447     }->{$self->{open_elements}->[-1]->[1]}) {
3448     unshift @{$self->{token}}, $token;
3449 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => $self->{open_elements}->[-1]->[1]};
3450 wakaba 1.54 redo B;
3451     }
3452 wakaba 1.1
3453 wakaba 1.54 if (@{$self->{open_elements}} > 2 or
3454     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
3455     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3456     } elsif (defined $self->{inner_html_node} and
3457     @{$self->{open_elements}} > 1 and
3458     $self->{open_elements}->[1]->[1] ne 'body') {
3459     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3460     }
3461    
3462     ## ISSUE: There is an issue in the spec.
3463     }
3464    
3465     ## Stop parsing
3466     last B;
3467 wakaba 1.57 } elsif ($token->{type} == START_TAG_TOKEN and
3468 wakaba 1.54 $token->{tag_name} eq 'html') {
3469 wakaba 1.56 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
3470 wakaba 1.54 ## Turn into the main phase
3471     $self->{parse_error}-> (type => 'after html:html');
3472 wakaba 1.56 $self->{insertion_mode} = AFTER_BODY_IM;
3473     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
3474 wakaba 1.54 ## Turn into the main phase
3475     $self->{parse_error}-> (type => 'after html:html');
3476 wakaba 1.56 $self->{insertion_mode} = AFTER_FRAMESET_IM;
3477 wakaba 1.54 }
3478    
3479     ## ISSUE: "aa<html>" is not a parse error.
3480     ## ISSUE: "<html>" in fragment is not a parse error.
3481     unless ($token->{first_start_tag}) {
3482     $self->{parse_error}-> (type => 'not first start tag');
3483     }
3484     my $top_el = $self->{open_elements}->[0]->[0];
3485     for my $attr_name (keys %{$token->{attributes}}) {
3486     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3487     $top_el->set_attribute_ns
3488     (undef, [undef, $attr_name],
3489     $token->{attributes}->{$attr_name}->{value});
3490     }
3491     }
3492     $token = $self->_get_next_token;
3493     redo B;
3494 wakaba 1.57 } elsif ($token->{type} == COMMENT_TOKEN) {
3495 wakaba 1.54 my $comment = $self->{document}->create_comment ($token->{data});
3496 wakaba 1.58 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
3497 wakaba 1.54 $self->{document}->append_child ($comment);
3498 wakaba 1.56 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
3499 wakaba 1.54 $self->{open_elements}->[0]->[0]->append_child ($comment);
3500     } else {
3501     $self->{open_elements}->[-1]->[0]->append_child ($comment);
3502     }
3503     $token = $self->_get_next_token;
3504     redo B;
3505 wakaba 1.58 } elsif ($self->{insertion_mode} & HEAD_IMS) {
3506 wakaba 1.57 if ($token->{type} == CHARACTER_TOKEN) {
3507 wakaba 1.54 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3508     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3509     unless (length $token->{data}) {
3510     $token = $self->_get_next_token;
3511     redo B;
3512     }
3513     }
3514    
3515 wakaba 1.56 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3516 wakaba 1.54 ## As if <head>
3517    
3518     $self->{head_element} = $self->{document}->create_element_ns
3519     (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
3520    
3521     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3522     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3523    
3524     ## Reprocess in the "in head" insertion mode...
3525     pop @{$self->{open_elements}};
3526    
3527     ## Reprocess in the "after head" insertion mode...
3528 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3529 wakaba 1.54 ## As if </noscript>
3530     pop @{$self->{open_elements}};
3531     $self->{parse_error}-> (type => 'in noscript:#character');
3532    
3533     ## Reprocess in the "in head" insertion mode...
3534     ## As if </head>
3535     pop @{$self->{open_elements}};
3536    
3537     ## Reprocess in the "after head" insertion mode...
3538 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3539 wakaba 1.54 pop @{$self->{open_elements}};
3540    
3541     ## Reprocess in the "after head" insertion mode...
3542     }
3543    
3544     ## "after head" insertion mode
3545     ## As if <body>
3546    
3547     {
3548     my $el;
3549    
3550     $el = $self->{document}->create_element_ns
3551     (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
3552    
3553     $self->{open_elements}->[-1]->[0]->append_child ($el);
3554     push @{$self->{open_elements}}, [$el, 'body'];
3555     }
3556    
3557 wakaba 1.56 $self->{insertion_mode} = IN_BODY_IM;
3558 wakaba 1.54 ## reprocess
3559     redo B;
3560 wakaba 1.57 } elsif ($token->{type} == START_TAG_TOKEN) {
3561 wakaba 1.54 if ($token->{tag_name} eq 'head') {
3562 wakaba 1.56 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3563 wakaba 1.54
3564     $self->{head_element} = $self->{document}->create_element_ns
3565     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3566    
3567     for my $attr_name (keys %{ $token->{attributes}}) {
3568     $self->{head_element}->set_attribute_ns (undef, [undef, $attr_name],
3569     $token->{attributes} ->{$attr_name}->{value});
3570     }
3571    
3572     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3573     push @{$self->{open_elements}}, [$self->{head_element}, $token->{tag_name}];
3574 wakaba 1.56 $self->{insertion_mode} = IN_HEAD_IM;
3575 wakaba 1.54 $token = $self->_get_next_token;
3576     redo B;
3577 wakaba 1.56 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3578     #
3579     } else {
3580 wakaba 1.54 $self->{parse_error}-> (type => 'in head:head'); # or in head noscript
3581     ## Ignore the token
3582     $token = $self->_get_next_token;
3583     redo B;
3584     }
3585 wakaba 1.56 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3586 wakaba 1.54 ## As if <head>
3587    
3588     $self->{head_element} = $self->{document}->create_element_ns
3589     (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
3590    
3591     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3592     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3593    
3594 wakaba 1.56 $self->{insertion_mode} = IN_HEAD_IM;
3595 wakaba 1.54 ## Reprocess in the "in head" insertion mode...
3596     }
3597    
3598     if ($token->{tag_name} eq 'base') {
3599 wakaba 1.56 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3600 wakaba 1.54 ## As if </noscript>
3601     pop @{$self->{open_elements}};
3602     $self->{parse_error}-> (type => 'in noscript:base');
3603    
3604 wakaba 1.56 $self->{insertion_mode} = IN_HEAD_IM;
3605 wakaba 1.54 ## Reprocess in the "in head" insertion mode...
3606     }
3607    
3608     ## NOTE: There is a "as if in head" code clone.
3609 wakaba 1.56 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3610 wakaba 1.54 $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
3611     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3612     }
3613    
3614     {
3615     my $el;
3616    
3617     $el = $self->{document}->create_element_ns
3618     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3619    
3620     for my $attr_name (keys %{ $token->{attributes}}) {
3621     $el->set_attribute_ns (undef, [undef, $attr_name],
3622     $token->{attributes} ->{$attr_name}->{value});
3623     }
3624    
3625     $self->{open_elements}->[-1]->[0]->append_child ($el);
3626     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3627     }
3628    
3629     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3630     pop @{$self->{open_elements}}
3631 wakaba 1.56 if $self->{insertion_mode} == AFTER_HEAD_IM;
3632 wakaba 1.54 $token = $self->_get_next_token;
3633     redo B;
3634     } elsif ($token->{tag_name} eq 'link') {
3635     ## NOTE: There is a "as if in head" code clone.
3636 wakaba 1.56 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3637 wakaba 1.54 $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
3638     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3639     }
3640    
3641 wakaba 1.25 {
3642     my $el;
3643    
3644 wakaba 1.1 $el = $self->{document}->create_element_ns
3645     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3646    
3647 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
3648 wakaba 1.1 $el->set_attribute_ns (undef, [undef, $attr_name],
3649 wakaba 1.25 $token->{attributes} ->{$attr_name}->{value});
3650 wakaba 1.1 }
3651    
3652 wakaba 1.54 $self->{open_elements}->[-1]->[0]->append_child ($el);
3653 wakaba 1.25 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3654     }
3655    
3656 wakaba 1.54 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3657     pop @{$self->{open_elements}}
3658 wakaba 1.56 if $self->{insertion_mode} == AFTER_HEAD_IM;
3659 wakaba 1.54 $token = $self->_get_next_token;
3660     redo B;
3661     } elsif ($token->{tag_name} eq 'meta') {
3662     ## NOTE: There is a "as if in head" code clone.
3663 wakaba 1.56 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3664 wakaba 1.54 $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
3665     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3666     }
3667    
3668 wakaba 1.34 {
3669     my $el;
3670    
3671     $el = $self->{document}->create_element_ns
3672     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3673    
3674     for my $attr_name (keys %{ $token->{attributes}}) {
3675     $el->set_attribute_ns (undef, [undef, $attr_name],
3676     $token->{attributes} ->{$attr_name}->{value});
3677     }
3678    
3679 wakaba 1.54 $self->{open_elements}->[-1]->[0]->append_child ($el);
3680 wakaba 1.34 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3681     }
3682    
3683 wakaba 1.54 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3684 wakaba 1.34
3685 wakaba 1.54 unless ($self->{confident}) {
3686     my $charset;
3687     if ($token->{attributes}->{charset}) { ## TODO: And if supported
3688     $charset = $token->{attributes}->{charset}->{value};
3689     }
3690     if ($token->{attributes}->{'http-equiv'}) {
3691     ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
3692     if ($token->{attributes}->{'http-equiv'}->{value}
3693     =~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*=
3694     [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
3695     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
3696     $charset = defined $1 ? $1 : defined $2 ? $2 : $3;
3697     } ## TODO: And if supported
3698     }
3699     ## TODO: Change the encoding
3700     }
3701 wakaba 1.34
3702 wakaba 1.54 ## TODO: Extracting |charset| from |meta|.
3703     pop @{$self->{open_elements}}
3704 wakaba 1.56 if $self->{insertion_mode} == AFTER_HEAD_IM;
3705 wakaba 1.54 $token = $self->_get_next_token;
3706     redo B;
3707     } elsif ($token->{tag_name} eq 'title') {
3708 wakaba 1.56 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3709 wakaba 1.54 ## As if </noscript>
3710     pop @{$self->{open_elements}};
3711     $self->{parse_error}-> (type => 'in noscript:title');
3712 wakaba 1.1
3713 wakaba 1.56 $self->{insertion_mode} = IN_HEAD_IM;
3714 wakaba 1.54 ## Reprocess in the "in head" insertion mode...
3715 wakaba 1.56 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3716 wakaba 1.54 $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
3717     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3718     }
3719    
3720     ## NOTE: There is a "as if in head" code clone.
3721     my $parent = defined $self->{head_element} ? $self->{head_element}
3722     : $self->{open_elements}->[-1]->[0];
3723     $parse_rcdata->(RCDATA_CONTENT_MODEL,
3724     sub { $parent->append_child ($_[0]) });
3725     pop @{$self->{open_elements}}
3726 wakaba 1.56 if $self->{insertion_mode} == AFTER_HEAD_IM;
3727 wakaba 1.54 redo B;
3728     } elsif ($token->{tag_name} eq 'style') {
3729     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
3730 wakaba 1.56 ## insertion mode IN_HEAD_IM)
3731 wakaba 1.54 ## NOTE: There is a "as if in head" code clone.
3732 wakaba 1.56 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3733 wakaba 1.54 $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
3734     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3735     }
3736     $parse_rcdata->(CDATA_CONTENT_MODEL, $insert_to_current);
3737     pop @{$self->{open_elements}}
3738 wakaba 1.56 if $self->{insertion_mode} == AFTER_HEAD_IM;
3739 wakaba 1.54 redo B;
3740     } elsif ($token->{tag_name} eq 'noscript') {
3741 wakaba 1.56 if ($self->{insertion_mode} == IN_HEAD_IM) {
3742 wakaba 1.54 ## NOTE: and scripting is disalbed
3743    
3744 wakaba 1.1 {
3745     my $el;
3746    
3747     $el = $self->{document}->create_element_ns
3748     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3749    
3750     for my $attr_name (keys %{ $token->{attributes}}) {
3751     $el->set_attribute_ns (undef, [undef, $attr_name],
3752     $token->{attributes} ->{$attr_name}->{value});
3753     }
3754    
3755 wakaba 1.54 $self->{open_elements}->[-1]->[0]->append_child ($el);
3756 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3757 wakaba 1.1 }
3758    
3759 wakaba 1.56 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
3760 wakaba 1.54 $token = $self->_get_next_token;
3761     redo B;
3762 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3763 wakaba 1.54 $self->{parse_error}-> (type => 'in noscript:noscript');
3764     ## Ignore the token
3765     $token = $self->_get_next_token;
3766     redo B;
3767     } else {
3768     #
3769     }
3770     } elsif ($token->{tag_name} eq 'script') {
3771 wakaba 1.56 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3772 wakaba 1.54 ## As if </noscript>
3773     pop @{$self->{open_elements}};
3774     $self->{parse_error}-> (type => 'in noscript:script');
3775    
3776 wakaba 1.56 $self->{insertion_mode} = IN_HEAD_IM;
3777 wakaba 1.54 ## Reprocess in the "in head" insertion mode...
3778 wakaba 1.56 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3779 wakaba 1.54 $self->{parse_error}-> (type => 'after head:'.$token->{tag_name});
3780     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3781     }
3782    
3783     ## NOTE: There is a "as if in head" code clone.
3784     $script_start_tag->($insert_to_current);
3785     pop @{$self->{open_elements}}
3786 wakaba 1.56 if $self->{insertion_mode} == AFTER_HEAD_IM;
3787 wakaba 1.54 redo B;
3788     } elsif ($token->{tag_name} eq 'body' or
3789     $token->{tag_name} eq 'frameset') {
3790 wakaba 1.56 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3791 wakaba 1.54 ## As if </noscript>
3792     pop @{$self->{open_elements}};
3793     $self->{parse_error}-> (type => 'in noscript:'.$token->{tag_name});
3794    
3795     ## Reprocess in the "in head" insertion mode...
3796     ## As if </head>
3797     pop @{$self->{open_elements}};
3798    
3799     ## Reprocess in the "after head" insertion mode...
3800 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3801 wakaba 1.54 pop @{$self->{open_elements}};
3802    
3803     ## Reprocess in the "after head" insertion mode...
3804     }
3805    
3806     ## "after head" insertion mode
3807    
3808 wakaba 1.1 {
3809     my $el;
3810    
3811     $el = $self->{document}->create_element_ns
3812     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
3813    
3814     for my $attr_name (keys %{ $token->{attributes}}) {
3815     $el->set_attribute_ns (undef, [undef, $attr_name],
3816     $token->{attributes} ->{$attr_name}->{value});
3817     }
3818    
3819 wakaba 1.54 $self->{open_elements}->[-1]->[0]->append_child ($el);
3820 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
3821 wakaba 1.1 }
3822    
3823 wakaba 1.56 if ($token->{tag_name} eq 'body') {
3824     $self->{insertion_mode} = IN_BODY_IM;
3825     } elsif ($token->{tag_name} eq 'frameset') {
3826     $self->{insertion_mode} = IN_FRAMESET_IM;
3827     } else {
3828     die "$0: tag name: $self->{tag_name}";
3829     }
3830 wakaba 1.54 $token = $self->_get_next_token;
3831     redo B;
3832     } else {
3833     #
3834 wakaba 1.8 }
3835 wakaba 1.54
3836 wakaba 1.56 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3837 wakaba 1.54 ## As if </noscript>
3838     pop @{$self->{open_elements}};
3839     $self->{parse_error}-> (type => 'in noscript:/'.$token->{tag_name});
3840    
3841     ## Reprocess in the "in head" insertion mode...
3842     ## As if </head>
3843     pop @{$self->{open_elements}};
3844    
3845     ## Reprocess in the "after head" insertion mode...
3846 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3847 wakaba 1.54 ## As if </head>
3848     pop @{$self->{open_elements}};
3849    
3850     ## Reprocess in the "after head" insertion mode...
3851     }
3852    
3853     ## "after head" insertion mode
3854     ## As if <body>
3855    
3856 wakaba 1.1 {
3857     my $el;
3858    
3859     $el = $self->{document}->create_element_ns
3860 wakaba 1.54 (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
3861 wakaba 1.1
3862 wakaba 1.54 $self->{open_elements}->[-1]->[0]->append_child ($el);
3863     push @{$self->{open_elements}}, [$el, 'body'];
3864 wakaba 1.1 }
3865    
3866 wakaba 1.56 $self->{insertion_mode} = IN_BODY_IM;
3867 wakaba 1.54 ## reprocess
3868     redo B;
3869 wakaba 1.57 } elsif ($token->{type} == END_TAG_TOKEN) {
3870 wakaba 1.54 if ($token->{tag_name} eq 'head') {
3871 wakaba 1.56 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3872 wakaba 1.54 ## As if <head>
3873    
3874     $self->{head_element} = $self->{document}->create_element_ns
3875     (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
3876    
3877     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3878     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3879    
3880     ## Reprocess in the "in head" insertion mode...
3881     pop @{$self->{open_elements}};
3882 wakaba 1.56 $self->{insertion_mode} = AFTER_HEAD_IM;
3883 wakaba 1.54 $token = $self->_get_next_token;
3884     redo B;
3885 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3886 wakaba 1.54 ## As if </noscript>
3887     pop @{$self->{open_elements}};
3888     $self->{parse_error}-> (type => 'in noscript:script');
3889    
3890     ## Reprocess in the "in head" insertion mode...
3891     pop @{$self->{open_elements}};
3892 wakaba 1.56 $self->{insertion_mode} = AFTER_HEAD_IM;
3893 wakaba 1.54 $token = $self->_get_next_token;
3894     redo B;
3895 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3896 wakaba 1.54 pop @{$self->{open_elements}};
3897 wakaba 1.56 $self->{insertion_mode} = AFTER_HEAD_IM;
3898 wakaba 1.54 $token = $self->_get_next_token;
3899     redo B;
3900     } else {
3901     #
3902     }
3903     } elsif ($token->{tag_name} eq 'noscript') {
3904 wakaba 1.56 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3905 wakaba 1.54 pop @{$self->{open_elements}};
3906 wakaba 1.56 $self->{insertion_mode} = IN_HEAD_IM;
3907 wakaba 1.54 $token = $self->_get_next_token;
3908     redo B;
3909 wakaba 1.56 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3910 wakaba 1.54 $self->{parse_error}-> (type => 'unmatched end tag:noscript');
3911     ## Ignore the token ## ISSUE: An issue in the spec.
3912     $token = $self->_get_next_token;
3913     redo B;
3914     } else {
3915     #
3916     }
3917     } elsif ({
3918     body => 1, html => 1,
3919     }->{$token->{tag_name}}) {
3920 wakaba 1.56 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3921 wakaba 1.54 ## As if <head>
3922    
3923     $self->{head_element} = $self->{document}->create_element_ns
3924     (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
3925    
3926     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3927     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3928    
3929 wakaba 1.56 $self->{insertion_mode} = IN_HEAD_IM;
3930 wakaba 1.54 ## Reprocess in the "in head" insertion mode...
3931 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3932 wakaba 1.54 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3933     ## Ignore the token
3934     $token = $self->_get_next_token;
3935     redo B;
3936     }
3937    
3938     #
3939     } elsif ({
3940     p => 1, br => 1,
3941     }->{$token->{tag_name}}) {
3942 wakaba 1.56 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3943 wakaba 1.54 ## As if <head>
3944    
3945     $self->{head_element} = $self->{document}->create_element_ns
3946     (q<http://www.w3.org/1999/xhtml>, [undef, 'head']);
3947    
3948     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3949     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3950    
3951 wakaba 1.56 $self->{insertion_mode} = IN_HEAD_IM;
3952 wakaba 1.54 ## Reprocess in the "in head" insertion mode...
3953     }
3954    
3955     #
3956     } else {
3957 wakaba 1.56 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3958     #
3959     } else {
3960 wakaba 1.54 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3961     ## Ignore the token
3962     $token = $self->_get_next_token;
3963     redo B;
3964     }
3965     }
3966    
3967 wakaba 1.56 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3968 wakaba 1.54 ## As if </noscript>
3969     pop @{$self->{open_elements}};
3970     $self->{parse_error}-> (type => 'in noscript:/'.$token->{tag_name});
3971    
3972     ## Reprocess in the "in head" insertion mode...
3973     ## As if </head>
3974     pop @{$self->{open_elements}};
3975    
3976     ## Reprocess in the "after head" insertion mode...
3977 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3978 wakaba 1.54 ## As if </head>
3979     pop @{$self->{open_elements}};
3980    
3981     ## Reprocess in the "after head" insertion mode...
3982 wakaba 1.56 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3983 wakaba 1.54 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
3984     ## Ignore the token ## ISSUE: An issue in the spec.
3985     $token = $self->_get_next_token;
3986     redo B;
3987 wakaba 1.8 }
3988 wakaba 1.54
3989     ## "after head" insertion mode
3990     ## As if <body>
3991    
3992 wakaba 1.1 {
3993     my $el;
3994    
3995     $el = $self->{document}->create_element_ns
3996 wakaba 1.54 (q<http://www.w3.org/1999/xhtml>, [undef, 'body']);
3997 wakaba 1.1
3998 wakaba 1.54 $self->{open_elements}->[-1]->[0]->append_child ($el);
3999     push @{$self->{open_elements}}, [$el, 'body'];
4000 wakaba 1.1 }
4001    
4002 wakaba 1.56 $self->{insertion_mode} = IN_BODY_IM;
4003 wakaba 1.54 ## reprocess
4004     redo B;
4005     } else {
4006     die "$0: $token->{type}: Unknown token type";
4007 wakaba 1.1 }
4008 wakaba 1.54
4009     ## ISSUE: An issue in the spec.
4010 wakaba 1.58 } elsif ($self->{insertion_mode} & BODY_IMS) {
4011 wakaba 1.57 if ($token->{type} == CHARACTER_TOKEN) {
4012 wakaba 1.54 ## NOTE: There is a code clone of "character in body".
4013     $reconstruct_active_formatting_elements->($insert_to_current);
4014    
4015     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4016    
4017     $token = $self->_get_next_token;
4018     redo B;
4019 wakaba 1.57 } elsif ($token->{type} == START_TAG_TOKEN) {
4020 wakaba 1.54 if ({
4021     caption => 1, col => 1, colgroup => 1, tbody => 1,
4022     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4023     }->{$token->{tag_name}}) {
4024 wakaba 1.56 if ($self->{insertion_mode} == IN_CELL_IM) {
4025 wakaba 1.54 ## have an element in table scope
4026     my $tn;
4027     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4028     my $node = $self->{open_elements}->[$_];
4029     if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4030     $tn = $node->[1];
4031     last INSCOPE;
4032     } elsif ({
4033     table => 1, html => 1,
4034     }->{$node->[1]}) {
4035     last INSCOPE;
4036     }
4037     } # INSCOPE
4038     unless (defined $tn) {
4039     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4040     ## Ignore the token
4041     $token = $self->_get_next_token;
4042     redo B;
4043     }
4044    
4045     ## Close the cell
4046     unshift @{$self->{token}}, $token; # <?>
4047 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => $tn};
4048 wakaba 1.54 redo B;
4049 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4050 wakaba 1.54 $self->{parse_error}-> (type => 'not closed:caption');
4051    
4052     ## As if </caption>
4053     ## have a table element in table scope
4054     my $i;
4055     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4056     my $node = $self->{open_elements}->[$_];
4057     if ($node->[1] eq 'caption') {
4058     $i = $_;
4059     last INSCOPE;
4060     } elsif ({
4061     table => 1, html => 1,
4062     }->{$node->[1]}) {
4063     last INSCOPE;
4064     }
4065     } # INSCOPE
4066     unless (defined $i) {
4067     $self->{parse_error}-> (type => 'unmatched end tag:caption');
4068     ## Ignore the token
4069     $token = $self->_get_next_token;
4070     redo B;
4071     }
4072    
4073     ## generate implied end tags
4074     if ({
4075     dd => 1, dt => 1, li => 1, p => 1,
4076     td => 1, th => 1, tr => 1,
4077     tbody => 1, tfoot=> 1, thead => 1,
4078     }->{$self->{open_elements}->[-1]->[1]}) {
4079     unshift @{$self->{token}}, $token; # <?>
4080 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'caption'};
4081 wakaba 1.54 unshift @{$self->{token}}, $token;
4082 wakaba 1.57 $token = {type => END_TAG_TOKEN,
4083 wakaba 1.54 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4084     redo B;
4085     }
4086    
4087     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4088     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4089     }
4090    
4091     splice @{$self->{open_elements}}, $i;
4092    
4093     $clear_up_to_marker->();
4094    
4095 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_IM;
4096 wakaba 1.54
4097     ## reprocess
4098     redo B;
4099     } else {
4100     #
4101     }
4102     } else {
4103     #
4104     }
4105 wakaba 1.57 } elsif ($token->{type} == END_TAG_TOKEN) {
4106 wakaba 1.54 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4107 wakaba 1.56 if ($self->{insertion_mode} == IN_CELL_IM) {
4108 wakaba 1.54 ## have an element in table scope
4109     my $i;
4110     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4111     my $node = $self->{open_elements}->[$_];
4112     if ($node->[1] eq $token->{tag_name}) {
4113     $i = $_;
4114     last INSCOPE;
4115     } elsif ({
4116     table => 1, html => 1,
4117     }->{$node->[1]}) {
4118     last INSCOPE;
4119     }
4120     } # INSCOPE
4121     unless (defined $i) {
4122     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4123     ## Ignore the token
4124     $token = $self->_get_next_token;
4125     redo B;
4126     }
4127    
4128     ## generate implied end tags
4129     if ({
4130     dd => 1, dt => 1, li => 1, p => 1,
4131     td => ($token->{tag_name} eq 'th'),
4132     th => ($token->{tag_name} eq 'td'),
4133     tr => 1,
4134     tbody => 1, tfoot=> 1, thead => 1,
4135     }->{$self->{open_elements}->[-1]->[1]}) {
4136     unshift @{$self->{token}}, $token;
4137 wakaba 1.57 $token = {type => END_TAG_TOKEN,
4138 wakaba 1.54 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4139     redo B;
4140     }
4141    
4142     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4143     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4144     }
4145    
4146     splice @{$self->{open_elements}}, $i;
4147    
4148     $clear_up_to_marker->();
4149    
4150 wakaba 1.56 $self->{insertion_mode} = IN_ROW_IM;
4151 wakaba 1.54
4152     $token = $self->_get_next_token;
4153     redo B;
4154 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4155 wakaba 1.54 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4156     ## Ignore the token
4157     $token = $self->_get_next_token;
4158     redo B;
4159     } else {
4160     #
4161     }
4162     } elsif ($token->{tag_name} eq 'caption') {
4163 wakaba 1.56 if ($self->{insertion_mode} == IN_CAPTION_IM) {
4164 wakaba 1.54 ## have a table element in table scope
4165     my $i;
4166     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4167     my $node = $self->{open_elements}->[$_];
4168     if ($node->[1] eq $token->{tag_name}) {
4169     $i = $_;
4170     last INSCOPE;
4171     } elsif ({
4172     table => 1, html => 1,
4173     }->{$node->[1]}) {
4174     last INSCOPE;
4175     }
4176     } # INSCOPE
4177     unless (defined $i) {
4178     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4179     ## Ignore the token
4180     $token = $self->_get_next_token;
4181     redo B;
4182     }
4183    
4184     ## generate implied end tags
4185     if ({
4186     dd => 1, dt => 1, li => 1, p => 1,
4187     td => 1, th => 1, tr => 1,
4188     tbody => 1, tfoot=> 1, thead => 1,
4189     }->{$self->{open_elements}->[-1]->[1]}) {
4190     unshift @{$self->{token}}, $token;
4191 wakaba 1.57 $token = {type => END_TAG_TOKEN,
4192 wakaba 1.54 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4193     redo B;
4194     }
4195    
4196     if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4197     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4198     }
4199    
4200     splice @{$self->{open_elements}}, $i;
4201    
4202     $clear_up_to_marker->();
4203    
4204 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_IM;
4205 wakaba 1.54
4206     $token = $self->_get_next_token;
4207     redo B;
4208 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
4209 wakaba 1.54 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4210     ## Ignore the token
4211     $token = $self->_get_next_token;
4212     redo B;
4213     } else {
4214     #
4215 wakaba 1.1 }
4216 wakaba 1.54 } elsif ({
4217     table => 1, tbody => 1, tfoot => 1,
4218     thead => 1, tr => 1,
4219     }->{$token->{tag_name}} and
4220 wakaba 1.56 $self->{insertion_mode} == IN_CELL_IM) {
4221 wakaba 1.54 ## have an element in table scope
4222     my $i;
4223     my $tn;
4224     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4225     my $node = $self->{open_elements}->[$_];
4226     if ($node->[1] eq $token->{tag_name}) {
4227     $i = $_;
4228     last INSCOPE;
4229     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4230     $tn = $node->[1];
4231     ## NOTE: There is exactly one |td| or |th| element
4232     ## in scope in the stack of open elements by definition.
4233     } elsif ({
4234     table => 1, html => 1,
4235     }->{$node->[1]}) {
4236     last INSCOPE;
4237     }
4238     } # INSCOPE
4239     unless (defined $i) {
4240     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4241     ## Ignore the token
4242     $token = $self->_get_next_token;
4243     redo B;
4244 wakaba 1.1 }
4245    
4246 wakaba 1.54 ## Close the cell
4247     unshift @{$self->{token}}, $token; # </?>
4248 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => $tn};
4249 wakaba 1.54 redo B;
4250     } elsif ($token->{tag_name} eq 'table' and
4251 wakaba 1.56 $self->{insertion_mode} == IN_CAPTION_IM) {
4252 wakaba 1.54 $self->{parse_error}-> (type => 'not closed:caption');
4253 wakaba 1.31
4254 wakaba 1.54 ## As if </caption>
4255     ## have a table element in table scope
4256     my $i;
4257     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4258     my $node = $self->{open_elements}->[$_];
4259     if ($node->[1] eq 'caption') {
4260     $i = $_;
4261     last INSCOPE;
4262     } elsif ({
4263     table => 1, html => 1,
4264     }->{$node->[1]}) {
4265     last INSCOPE;
4266     }
4267     } # INSCOPE
4268     unless (defined $i) {
4269     $self->{parse_error}-> (type => 'unmatched end tag:caption');
4270     ## Ignore the token
4271     $token = $self->_get_next_token;
4272     redo B;
4273     }
4274    
4275     ## generate implied end tags
4276     if ({
4277     dd => 1, dt => 1, li => 1, p => 1,
4278     td => 1, th => 1, tr => 1,
4279     tbody => 1, tfoot=> 1, thead => 1,
4280     }->{$self->{open_elements}->[-1]->[1]}) {
4281     unshift @{$self->{token}}, $token; # </table>
4282 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'caption'};
4283 wakaba 1.54 unshift @{$self->{token}}, $token;
4284 wakaba 1.57 $token = {type => END_TAG_TOKEN,
4285 wakaba 1.54 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4286     redo B;
4287     }
4288 wakaba 1.20
4289 wakaba 1.54 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
4290     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4291     }
4292 wakaba 1.12
4293 wakaba 1.54 splice @{$self->{open_elements}}, $i;
4294 wakaba 1.31
4295 wakaba 1.54 $clear_up_to_marker->();
4296 wakaba 1.1
4297 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_IM;
4298 wakaba 1.3
4299 wakaba 1.54 ## reprocess
4300     redo B;
4301     } elsif ({
4302     body => 1, col => 1, colgroup => 1, html => 1,
4303     }->{$token->{tag_name}}) {
4304 wakaba 1.58 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
4305 wakaba 1.54 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4306     ## Ignore the token
4307     $token = $self->_get_next_token;
4308     redo B;
4309     } else {
4310     #
4311     }
4312     } elsif ({
4313     tbody => 1, tfoot => 1,
4314     thead => 1, tr => 1,
4315     }->{$token->{tag_name}} and
4316 wakaba 1.56 $self->{insertion_mode} == IN_CAPTION_IM) {
4317 wakaba 1.25 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4318 wakaba 1.1 ## Ignore the token
4319     $token = $self->_get_next_token;
4320 wakaba 1.54 redo B;
4321     } else {
4322     #
4323 wakaba 1.1 }
4324 wakaba 1.54 } else {
4325     die "$0: $token->{type}: Unknown token type";
4326 wakaba 1.1 }
4327    
4328 wakaba 1.54 $insert = $insert_to_current;
4329     #
4330 wakaba 1.58 } elsif ($self->{insertion_mode} & TABLE_IMS) {
4331 wakaba 1.60 if ($token->{type} == CHARACTER_TOKEN) {
4332 wakaba 1.54 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4333     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4334    
4335     unless (length $token->{data}) {
4336     $token = $self->_get_next_token;
4337     redo B;
4338     }
4339     }
4340 wakaba 1.1
4341 wakaba 1.54 $self->{parse_error}-> (type => 'in table:#character');
4342 wakaba 1.1
4343 wakaba 1.54 ## As if in body, but insert into foster parent element
4344     ## ISSUE: Spec says that "whenever a node would be inserted
4345     ## into the current node" while characters might not be
4346     ## result in a new Text node.
4347     $reconstruct_active_formatting_elements->($insert_to_foster);
4348    
4349     if ({
4350     table => 1, tbody => 1, tfoot => 1,
4351     thead => 1, tr => 1,
4352     }->{$self->{open_elements}->[-1]->[1]}) {
4353     # MUST
4354     my $foster_parent_element;
4355     my $next_sibling;
4356     my $prev_sibling;
4357     OE: for (reverse 0..$#{$self->{open_elements}}) {
4358     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4359     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4360     if (defined $parent and $parent->node_type == 1) {
4361     $foster_parent_element = $parent;
4362     $next_sibling = $self->{open_elements}->[$_]->[0];
4363     $prev_sibling = $next_sibling->previous_sibling;
4364     } else {
4365     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4366     $prev_sibling = $foster_parent_element->last_child;
4367     }
4368     last OE;
4369     }
4370     } # OE
4371     $foster_parent_element = $self->{open_elements}->[0]->[0] and
4372     $prev_sibling = $foster_parent_element->last_child
4373     unless defined $foster_parent_element;
4374     if (defined $prev_sibling and
4375     $prev_sibling->node_type == 3) {
4376     $prev_sibling->manakai_append_text ($token->{data});
4377     } else {
4378     $foster_parent_element->insert_before
4379     ($self->{document}->create_text_node ($token->{data}),
4380     $next_sibling);
4381     }
4382     } else {
4383     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4384     }
4385    
4386 wakaba 1.52 $token = $self->_get_next_token;
4387 wakaba 1.1 redo B;
4388 wakaba 1.60 } elsif ($token->{type} == START_TAG_TOKEN) {
4389 wakaba 1.54 if ({
4390 wakaba 1.56 tr => ($self->{insertion_mode} != IN_ROW_IM),
4391 wakaba 1.54 th => 1, td => 1,
4392     }->{$token->{tag_name}}) {
4393 wakaba 1.56 if ($self->{insertion_mode} == IN_TABLE_IM) {
4394 wakaba 1.54 ## Clear back to table context
4395     while ($self->{open_elements}->[-1]->[1] ne 'table' and
4396     $self->{open_elements}->[-1]->[1] ne 'html') {
4397     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4398     pop @{$self->{open_elements}};
4399     }
4400    
4401    
4402     {
4403     my $el;
4404    
4405     $el = $self->{document}->create_element_ns
4406     (q<http://www.w3.org/1999/xhtml>, [undef, 'tbody']);
4407    
4408     $self->{open_elements}->[-1]->[0]->append_child ($el);
4409     push @{$self->{open_elements}}, [$el, 'tbody'];
4410     }
4411    
4412 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4413 wakaba 1.54 ## reprocess in the "in table body" insertion mode...
4414     }
4415    
4416 wakaba 1.56 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4417 wakaba 1.54 unless ($token->{tag_name} eq 'tr') {
4418     $self->{parse_error}-> (type => 'missing start tag:tr');
4419     }
4420    
4421     ## Clear back to table body context
4422     while (not {
4423     tbody => 1, tfoot => 1, thead => 1, html => 1,
4424     }->{$self->{open_elements}->[-1]->[1]}) {
4425     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4426     pop @{$self->{open_elements}};
4427     }
4428    
4429 wakaba 1.56 $self->{insertion_mode} = IN_ROW_IM;
4430 wakaba 1.54 if ($token->{tag_name} eq 'tr') {
4431    
4432     {
4433     my $el;
4434    
4435     $el = $self->{document}->create_element_ns
4436     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4437    
4438     for my $attr_name (keys %{ $token->{attributes}}) {
4439     $el->set_attribute_ns (undef, [undef, $attr_name],
4440     $token->{attributes} ->{$attr_name}->{value});
4441 wakaba 1.1 }
4442 wakaba 1.54
4443     $self->{open_elements}->[-1]->[0]->append_child ($el);
4444     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4445     }
4446    
4447     $token = $self->_get_next_token;
4448     redo B;
4449     } else {
4450    
4451     {
4452     my $el;
4453    
4454     $el = $self->{document}->create_element_ns
4455     (q<http://www.w3.org/1999/xhtml>, [undef, 'tr']);
4456    
4457     $self->{open_elements}->[-1]->[0]->append_child ($el);
4458     push @{$self->{open_elements}}, [$el, 'tr'];
4459     }
4460    
4461     ## reprocess in the "in row" insertion mode
4462     }
4463     }
4464 wakaba 1.52
4465 wakaba 1.54 ## Clear back to table row context
4466     while (not {
4467     tr => 1, html => 1,
4468     }->{$self->{open_elements}->[-1]->[1]}) {
4469     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4470     pop @{$self->{open_elements}};
4471     }
4472    
4473    
4474     {
4475     my $el;
4476    
4477     $el = $self->{document}->create_element_ns
4478     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4479 wakaba 1.1
4480 wakaba 1.54 for my $attr_name (keys %{ $token->{attributes}}) {
4481     $el->set_attribute_ns (undef, [undef, $attr_name],
4482     $token->{attributes} ->{$attr_name}->{value});
4483     }
4484    
4485     $self->{open_elements}->[-1]->[0]->append_child ($el);
4486     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4487     }
4488    
4489 wakaba 1.56 $self->{insertion_mode} = IN_CELL_IM;
4490 wakaba 1.54
4491     push @$active_formatting_elements, ['#marker', ''];
4492    
4493     $token = $self->_get_next_token;
4494     redo B;
4495     } elsif ({
4496     caption => 1, col => 1, colgroup => 1,
4497     tbody => 1, tfoot => 1, thead => 1,
4498 wakaba 1.56 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
4499 wakaba 1.54 }->{$token->{tag_name}}) {
4500 wakaba 1.56 if ($self->{insertion_mode} == IN_ROW_IM) {
4501 wakaba 1.54 ## As if </tr>
4502     ## have an element in table scope
4503     my $i;
4504     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4505     my $node = $self->{open_elements}->[$_];
4506     if ($node->[1] eq 'tr') {
4507     $i = $_;
4508     last INSCOPE;
4509     } elsif ({
4510     table => 1, html => 1,
4511     }->{$node->[1]}) {
4512     last INSCOPE;
4513     }
4514     } # INSCOPE
4515     unless (defined $i) {
4516     $self->{parse_error}-> (type => 'unmacthed end tag:'.$token->{tag_name});
4517     ## Ignore the token
4518     $token = $self->_get_next_token;
4519     redo B;
4520     }
4521    
4522     ## Clear back to table row context
4523     while (not {
4524     tr => 1, html => 1,
4525     }->{$self->{open_elements}->[-1]->[1]}) {
4526     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4527     pop @{$self->{open_elements}};
4528     }
4529    
4530     pop @{$self->{open_elements}}; # tr
4531 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4532 wakaba 1.54 if ($token->{tag_name} eq 'tr') {
4533     ## reprocess
4534     redo B;
4535     } else {
4536     ## reprocess in the "in table body" insertion mode...
4537     }
4538     }
4539 wakaba 1.52
4540 wakaba 1.56 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4541 wakaba 1.54 ## have an element in table scope
4542     my $i;
4543     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4544     my $node = $self->{open_elements}->[$_];
4545     if ({
4546     tbody => 1, thead => 1, tfoot => 1,
4547     }->{$node->[1]}) {
4548     $i = $_;
4549     last INSCOPE;
4550     } elsif ({
4551     table => 1, html => 1,
4552     }->{$node->[1]}) {
4553     last INSCOPE;
4554     }
4555     } # INSCOPE
4556     unless (defined $i) {
4557     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4558     ## Ignore the token
4559     $token = $self->_get_next_token;
4560     redo B;
4561     }
4562 wakaba 1.52
4563 wakaba 1.54 ## Clear back to table body context
4564     while (not {
4565     tbody => 1, tfoot => 1, thead => 1, html => 1,
4566     }->{$self->{open_elements}->[-1]->[1]}) {
4567     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4568     pop @{$self->{open_elements}};
4569     }
4570    
4571     ## As if <{current node}>
4572     ## have an element in table scope
4573     ## true by definition
4574    
4575     ## Clear back to table body context
4576     ## nop by definition
4577    
4578     pop @{$self->{open_elements}};
4579 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_IM;
4580 wakaba 1.54 ## reprocess in "in table" insertion mode...
4581     }
4582 wakaba 1.51
4583 wakaba 1.54 if ($token->{tag_name} eq 'col') {
4584     ## Clear back to table context
4585     while ($self->{open_elements}->[-1]->[1] ne 'table' and
4586     $self->{open_elements}->[-1]->[1] ne 'html') {
4587     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4588     pop @{$self->{open_elements}};
4589     }
4590    
4591    
4592 wakaba 1.51 {
4593     my $el;
4594    
4595     $el = $self->{document}->create_element_ns
4596 wakaba 1.54 (q<http://www.w3.org/1999/xhtml>, [undef, 'colgroup']);
4597 wakaba 1.51
4598     $self->{open_elements}->[-1]->[0]->append_child ($el);
4599 wakaba 1.54 push @{$self->{open_elements}}, [$el, 'colgroup'];
4600 wakaba 1.51 }
4601    
4602 wakaba 1.56 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
4603 wakaba 1.54 ## reprocess
4604     redo B;
4605     } elsif ({
4606     caption => 1,
4607     colgroup => 1,
4608     tbody => 1, tfoot => 1, thead => 1,
4609     }->{$token->{tag_name}}) {
4610     ## Clear back to table context
4611     while ($self->{open_elements}->[-1]->[1] ne 'table' and
4612     $self->{open_elements}->[-1]->[1] ne 'html') {
4613     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4614     pop @{$self->{open_elements}};
4615     }
4616    
4617     push @$active_formatting_elements, ['#marker', '']
4618     if $token->{tag_name} eq 'caption';
4619    
4620 wakaba 1.52
4621 wakaba 1.54 {
4622     my $el;
4623    
4624     $el = $self->{document}->create_element_ns
4625 wakaba 1.52 (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4626    
4627 wakaba 1.54 for my $attr_name (keys %{ $token->{attributes}}) {
4628     $el->set_attribute_ns (undef, [undef, $attr_name],
4629     $token->{attributes} ->{$attr_name}->{value});
4630 wakaba 1.52 }
4631    
4632 wakaba 1.54 $self->{open_elements}->[-1]->[0]->append_child ($el);
4633     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4634     }
4635    
4636     $self->{insertion_mode} = {
4637 wakaba 1.56 caption => IN_CAPTION_IM,
4638     colgroup => IN_COLUMN_GROUP_IM,
4639     tbody => IN_TABLE_BODY_IM,
4640     tfoot => IN_TABLE_BODY_IM,
4641     thead => IN_TABLE_BODY_IM,
4642 wakaba 1.54 }->{$token->{tag_name}};
4643 wakaba 1.52 $token = $self->_get_next_token;
4644     redo B;
4645 wakaba 1.54 } else {
4646     die "$0: in table: <>: $token->{tag_name}";
4647     }
4648     } elsif ($token->{tag_name} eq 'table') {
4649     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4650    
4651     ## As if </table>
4652     ## have a table element in table scope
4653     my $i;
4654     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4655     my $node = $self->{open_elements}->[$_];
4656     if ($node->[1] eq 'table') {
4657     $i = $_;
4658     last INSCOPE;
4659     } elsif ({
4660     table => 1, html => 1,
4661     }->{$node->[1]}) {
4662     last INSCOPE;
4663     }
4664     } # INSCOPE
4665     unless (defined $i) {
4666     $self->{parse_error}-> (type => 'unmatched end tag:table');
4667     ## Ignore tokens </table><table>
4668 wakaba 1.52 $token = $self->_get_next_token;
4669     redo B;
4670     }
4671    
4672 wakaba 1.54 ## generate implied end tags
4673     if ({
4674     dd => 1, dt => 1, li => 1, p => 1,
4675     td => 1, th => 1, tr => 1,
4676     tbody => 1, tfoot=> 1, thead => 1,
4677     }->{$self->{open_elements}->[-1]->[1]}) {
4678     unshift @{$self->{token}}, $token; # <table>
4679 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'table'};
4680 wakaba 1.54 unshift @{$self->{token}}, $token;
4681 wakaba 1.57 $token = {type => END_TAG_TOKEN,
4682 wakaba 1.54 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4683     redo B;
4684     }
4685    
4686     if ($self->{open_elements}->[-1]->[1] ne 'table') {
4687     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4688     }
4689    
4690     splice @{$self->{open_elements}}, $i;
4691    
4692     $self->_reset_insertion_mode;
4693 wakaba 1.52
4694 wakaba 1.54 ## reprocess
4695     redo B;
4696 wakaba 1.60 } else {
4697     $self->{parse_error}-> (type => 'in table:'.$token->{tag_name});
4698    
4699     $insert = $insert_to_foster;
4700     #
4701     }
4702     } elsif ($token->{type} == END_TAG_TOKEN) {
4703 wakaba 1.54 if ($token->{tag_name} eq 'tr' and
4704 wakaba 1.56 $self->{insertion_mode} == IN_ROW_IM) {
4705 wakaba 1.54 ## have an element in table scope
4706     my $i;
4707     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4708     my $node = $self->{open_elements}->[$_];
4709     if ($node->[1] eq $token->{tag_name}) {
4710     $i = $_;
4711     last INSCOPE;
4712     } elsif ({
4713     table => 1, html => 1,
4714     }->{$node->[1]}) {
4715     last INSCOPE;
4716     }
4717     } # INSCOPE
4718     unless (defined $i) {
4719     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4720     ## Ignore the token
4721     $token = $self->_get_next_token;
4722     redo B;
4723     }
4724    
4725     ## Clear back to table row context
4726     while (not {
4727     tr => 1, html => 1,
4728     }->{$self->{open_elements}->[-1]->[1]}) {
4729     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4730     pop @{$self->{open_elements}};
4731     }
4732    
4733     pop @{$self->{open_elements}}; # tr
4734 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4735 wakaba 1.54 $token = $self->_get_next_token;
4736     redo B;
4737     } elsif ($token->{tag_name} eq 'table') {
4738 wakaba 1.56 if ($self->{insertion_mode} == IN_ROW_IM) {
4739 wakaba 1.54 ## As if </tr>
4740     ## have an element in table scope
4741     my $i;
4742     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4743     my $node = $self->{open_elements}->[$_];
4744     if ($node->[1] eq 'tr') {
4745     $i = $_;
4746     last INSCOPE;
4747     } elsif ({
4748     table => 1, html => 1,
4749     }->{$node->[1]}) {
4750     last INSCOPE;
4751     }
4752     } # INSCOPE
4753     unless (defined $i) {
4754     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{type});
4755     ## Ignore the token
4756     $token = $self->_get_next_token;
4757     redo B;
4758     }
4759    
4760     ## Clear back to table row context
4761     while (not {
4762     tr => 1, html => 1,
4763     }->{$self->{open_elements}->[-1]->[1]}) {
4764     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4765     pop @{$self->{open_elements}};
4766     }
4767    
4768     pop @{$self->{open_elements}}; # tr
4769 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4770 wakaba 1.54 ## reprocess in the "in table body" insertion mode...
4771     }
4772    
4773 wakaba 1.56 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4774 wakaba 1.54 ## have an element in table scope
4775     my $i;
4776     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4777     my $node = $self->{open_elements}->[$_];
4778     if ({
4779     tbody => 1, thead => 1, tfoot => 1,
4780     }->{$node->[1]}) {
4781     $i = $_;
4782     last INSCOPE;
4783     } elsif ({
4784     table => 1, html => 1,
4785     }->{$node->[1]}) {
4786     last INSCOPE;
4787     }
4788     } # INSCOPE
4789     unless (defined $i) {
4790     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4791     ## Ignore the token
4792     $token = $self->_get_next_token;
4793     redo B;
4794     }
4795    
4796     ## Clear back to table body context
4797     while (not {
4798     tbody => 1, tfoot => 1, thead => 1, html => 1,
4799     }->{$self->{open_elements}->[-1]->[1]}) {
4800     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4801     pop @{$self->{open_elements}};
4802     }
4803    
4804     ## As if <{current node}>
4805     ## have an element in table scope
4806     ## true by definition
4807    
4808     ## Clear back to table body context
4809     ## nop by definition
4810    
4811     pop @{$self->{open_elements}};
4812 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_IM;
4813 wakaba 1.54 ## reprocess in the "in table" insertion mode...
4814     }
4815 wakaba 1.52
4816 wakaba 1.54 ## have a table element in table scope
4817     my $i;
4818     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4819     my $node = $self->{open_elements}->[$_];
4820     if ($node->[1] eq $token->{tag_name}) {
4821     $i = $_;
4822     last INSCOPE;
4823     } elsif ({
4824     table => 1, html => 1,
4825     }->{$node->[1]}) {
4826     last INSCOPE;
4827     }
4828     } # INSCOPE
4829     unless (defined $i) {
4830     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4831     ## Ignore the token
4832     $token = $self->_get_next_token;
4833     redo B;
4834 wakaba 1.51 }
4835    
4836 wakaba 1.54 ## generate implied end tags
4837     if ({
4838     dd => 1, dt => 1, li => 1, p => 1,
4839     td => 1, th => 1, tr => 1,
4840     tbody => 1, tfoot=> 1, thead => 1,
4841     }->{$self->{open_elements}->[-1]->[1]}) {
4842     unshift @{$self->{token}}, $token;
4843 wakaba 1.57 $token = {type => END_TAG_TOKEN,
4844 wakaba 1.54 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4845     redo B;
4846 wakaba 1.51 }
4847    
4848 wakaba 1.54 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4849     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4850 wakaba 1.25 }
4851 wakaba 1.54
4852     splice @{$self->{open_elements}}, $i;
4853    
4854     $self->_reset_insertion_mode;
4855 wakaba 1.1
4856     $token = $self->_get_next_token;
4857 wakaba 1.25 redo B;
4858 wakaba 1.54 } elsif ({
4859     tbody => 1, tfoot => 1, thead => 1,
4860     }->{$token->{tag_name}} and
4861 wakaba 1.58 $self->{insertion_mode} & ROW_IMS) {
4862 wakaba 1.56 if ($self->{insertion_mode} == IN_ROW_IM) {
4863 wakaba 1.54 ## have an element in table scope
4864     my $i;
4865     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4866     my $node = $self->{open_elements}->[$_];
4867     if ($node->[1] eq $token->{tag_name}) {
4868     $i = $_;
4869     last INSCOPE;
4870     } elsif ({
4871     table => 1, html => 1,
4872     }->{$node->[1]}) {
4873     last INSCOPE;
4874     }
4875     } # INSCOPE
4876     unless (defined $i) {
4877     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4878     ## Ignore the token
4879     $token = $self->_get_next_token;
4880     redo B;
4881     }
4882    
4883     ## As if </tr>
4884     ## have an element in table scope
4885     my $i;
4886     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4887     my $node = $self->{open_elements}->[$_];
4888     if ($node->[1] eq 'tr') {
4889     $i = $_;
4890     last INSCOPE;
4891     } elsif ({
4892     table => 1, html => 1,
4893     }->{$node->[1]}) {
4894     last INSCOPE;
4895     }
4896     } # INSCOPE
4897     unless (defined $i) {
4898     $self->{parse_error}-> (type => 'unmatched end tag:tr');
4899     ## Ignore the token
4900     $token = $self->_get_next_token;
4901     redo B;
4902     }
4903    
4904     ## Clear back to table row context
4905     while (not {
4906     tr => 1, html => 1,
4907     }->{$self->{open_elements}->[-1]->[1]}) {
4908     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4909     pop @{$self->{open_elements}};
4910     }
4911    
4912     pop @{$self->{open_elements}}; # tr
4913 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4914 wakaba 1.54 ## reprocess in the "in table body" insertion mode...
4915 wakaba 1.34 }
4916    
4917 wakaba 1.54 ## have an element in table scope
4918     my $i;
4919     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4920     my $node = $self->{open_elements}->[$_];
4921     if ($node->[1] eq $token->{tag_name}) {
4922     $i = $_;
4923     last INSCOPE;
4924     } elsif ({
4925     table => 1, html => 1,
4926     }->{$node->[1]}) {
4927     last INSCOPE;
4928 wakaba 1.34 }
4929 wakaba 1.54 } # INSCOPE
4930     unless (defined $i) {
4931     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4932     ## Ignore the token
4933     $token = $self->_get_next_token;
4934     redo B;
4935 wakaba 1.34 }
4936    
4937 wakaba 1.54 ## Clear back to table body context
4938     while (not {
4939     tbody => 1, tfoot => 1, thead => 1, html => 1,
4940     }->{$self->{open_elements}->[-1]->[1]}) {
4941     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4942 wakaba 1.51 pop @{$self->{open_elements}};
4943 wakaba 1.25 }
4944 wakaba 1.51
4945 wakaba 1.54 pop @{$self->{open_elements}};
4946 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_IM;
4947 wakaba 1.54 $token = $self->_get_next_token;
4948     redo B;
4949     } elsif ({
4950     body => 1, caption => 1, col => 1, colgroup => 1,
4951     html => 1, td => 1, th => 1,
4952 wakaba 1.56 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
4953     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
4954 wakaba 1.54 }->{$token->{tag_name}}) {
4955     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
4956     ## Ignore the token
4957     $token = $self->_get_next_token;
4958 wakaba 1.1 redo B;
4959 wakaba 1.60 } else {
4960     $self->{parse_error}-> (type => 'in table:/'.$token->{tag_name});
4961 wakaba 1.54
4962 wakaba 1.60 $insert = $insert_to_foster;
4963     #
4964     }
4965     } else {
4966     die "$0: $token->{type}: Unknown token type";
4967     }
4968 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
4969 wakaba 1.57 if ($token->{type} == CHARACTER_TOKEN) {
4970 wakaba 1.54 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4971     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4972     unless (length $token->{data}) {
4973     $token = $self->_get_next_token;
4974     redo B;
4975 wakaba 1.25 }
4976 wakaba 1.54 }
4977    
4978     #
4979 wakaba 1.57 } elsif ($token->{type} == START_TAG_TOKEN) {
4980 wakaba 1.54 if ($token->{tag_name} eq 'col') {
4981    
4982 wakaba 1.25 {
4983     my $el;
4984    
4985 wakaba 1.1 $el = $self->{document}->create_element_ns
4986     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
4987    
4988 wakaba 1.25 for my $attr_name (keys %{ $token->{attributes}}) {
4989 wakaba 1.1 $el->set_attribute_ns (undef, [undef, $attr_name],
4990 wakaba 1.25 $token->{attributes} ->{$attr_name}->{value});
4991 wakaba 1.1 }
4992    
4993 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($el);
4994     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
4995     }
4996    
4997 wakaba 1.54 pop @{$self->{open_elements}};
4998     $token = $self->_get_next_token;
4999     redo B;
5000     } else {
5001     #
5002     }
5003 wakaba 1.57 } elsif ($token->{type} == END_TAG_TOKEN) {
5004 wakaba 1.54 if ($token->{tag_name} eq 'colgroup') {
5005     if ($self->{open_elements}->[-1]->[1] eq 'html') {
5006     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
5007 wakaba 1.25 ## Ignore the token
5008 wakaba 1.42 $token = $self->_get_next_token;
5009 wakaba 1.25 redo B;
5010 wakaba 1.24 } else {
5011 wakaba 1.54 pop @{$self->{open_elements}}; # colgroup
5012 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_IM;
5013 wakaba 1.54 $token = $self->_get_next_token;
5014     redo B;
5015 wakaba 1.25 }
5016 wakaba 1.54 } elsif ($token->{tag_name} eq 'col') {
5017     $self->{parse_error}-> (type => 'unmatched end tag:col');
5018     ## Ignore the token
5019     $token = $self->_get_next_token;
5020     redo B;
5021     } else {
5022     #
5023     }
5024     } else {
5025     #
5026     }
5027 wakaba 1.51
5028 wakaba 1.54 ## As if </colgroup>
5029     if ($self->{open_elements}->[-1]->[1] eq 'html') {
5030     $self->{parse_error}-> (type => 'unmatched end tag:colgroup');
5031     ## Ignore the token
5032     $token = $self->_get_next_token;
5033     redo B;
5034     } else {
5035     pop @{$self->{open_elements}}; # colgroup
5036 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_IM;
5037 wakaba 1.54 ## reprocess
5038     redo B;
5039     }
5040 wakaba 1.56 } elsif ($self->{insertion_mode} == IN_SELECT_IM) {
5041 wakaba 1.60 if ($token->{type} == CHARACTER_TOKEN) {
5042     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5043     $token = $self->_get_next_token;
5044     redo B;
5045     } elsif ($token->{type} == START_TAG_TOKEN) {
5046 wakaba 1.54 if ($token->{tag_name} eq 'option') {
5047     if ($self->{open_elements}->[-1]->[1] eq 'option') {
5048     ## As if </option>
5049 wakaba 1.51 pop @{$self->{open_elements}};
5050     }
5051    
5052 wakaba 1.1
5053     {
5054     my $el;
5055    
5056     $el = $self->{document}->create_element_ns
5057 wakaba 1.51 (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5058 wakaba 1.1
5059     for my $attr_name (keys %{ $token->{attributes}}) {
5060     $el->set_attribute_ns (undef, [undef, $attr_name],
5061     $token->{attributes} ->{$attr_name}->{value});
5062     }
5063    
5064 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5065 wakaba 1.51 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5066 wakaba 1.1 }
5067    
5068     $token = $self->_get_next_token;
5069     redo B;
5070 wakaba 1.54 } elsif ($token->{tag_name} eq 'optgroup') {
5071     if ($self->{open_elements}->[-1]->[1] eq 'option') {
5072     ## As if </option>
5073 wakaba 1.51 pop @{$self->{open_elements}};
5074     }
5075 wakaba 1.54
5076     if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
5077     ## As if </optgroup>
5078 wakaba 1.51 pop @{$self->{open_elements}};
5079     }
5080    
5081    
5082 wakaba 1.1 {
5083     my $el;
5084    
5085     $el = $self->{document}->create_element_ns
5086 wakaba 1.54 (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5087 wakaba 1.1
5088 wakaba 1.54 for my $attr_name (keys %{ $token->{attributes}}) {
5089     $el->set_attribute_ns (undef, [undef, $attr_name],
5090     $token->{attributes} ->{$attr_name}->{value});
5091     }
5092    
5093 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($el);
5094 wakaba 1.54 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5095 wakaba 1.1 }
5096    
5097 wakaba 1.54 $token = $self->_get_next_token;
5098     redo B;
5099     } elsif ($token->{tag_name} eq 'select') {
5100     $self->{parse_error}-> (type => 'not closed:select');
5101     ## As if </select> instead
5102     ## have an element in table scope
5103     my $i;
5104     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5105     my $node = $self->{open_elements}->[$_];
5106     if ($node->[1] eq $token->{tag_name}) {
5107     $i = $_;
5108     last INSCOPE;
5109     } elsif ({
5110     table => 1, html => 1,
5111     }->{$node->[1]}) {
5112     last INSCOPE;
5113 wakaba 1.44 }
5114 wakaba 1.54 } # INSCOPE
5115     unless (defined $i) {
5116     $self->{parse_error}-> (type => 'unmatched end tag:select');
5117     ## Ignore the token
5118     $token = $self->_get_next_token;
5119 wakaba 1.44 redo B;
5120     }
5121 wakaba 1.54
5122     splice @{$self->{open_elements}}, $i;
5123    
5124     $self->_reset_insertion_mode;
5125    
5126     $token = $self->_get_next_token;
5127     redo B;
5128 wakaba 1.60 } else {
5129     $self->{parse_error}-> (type => 'in select:'.$token->{tag_name});
5130     ## Ignore the token
5131     $token = $self->_get_next_token;
5132     redo B;
5133     }
5134     } elsif ($token->{type} == END_TAG_TOKEN) {
5135 wakaba 1.54 if ($token->{tag_name} eq 'optgroup') {
5136     if ($self->{open_elements}->[-1]->[1] eq 'option' and
5137     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
5138     ## As if </option>
5139     splice @{$self->{open_elements}}, -2;
5140     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
5141     pop @{$self->{open_elements}};
5142     } else {
5143 wakaba 1.44 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5144 wakaba 1.43 ## Ignore the token
5145 wakaba 1.54 }
5146     $token = $self->_get_next_token;
5147     redo B;
5148     } elsif ($token->{tag_name} eq 'option') {
5149     if ($self->{open_elements}->[-1]->[1] eq 'option') {
5150     pop @{$self->{open_elements}};
5151 wakaba 1.44 } else {
5152 wakaba 1.54 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5153     ## Ignore the token
5154 wakaba 1.43 }
5155 wakaba 1.54 $token = $self->_get_next_token;
5156     redo B;
5157     } elsif ($token->{tag_name} eq 'select') {
5158     ## have an element in table scope
5159     my $i;
5160     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5161     my $node = $self->{open_elements}->[$_];
5162     if ($node->[1] eq $token->{tag_name}) {
5163     $i = $_;
5164     last INSCOPE;
5165     } elsif ({
5166     table => 1, html => 1,
5167     }->{$node->[1]}) {
5168     last INSCOPE;
5169 wakaba 1.44 }
5170 wakaba 1.54 } # INSCOPE
5171     unless (defined $i) {
5172 wakaba 1.44 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5173     ## Ignore the token
5174     $token = $self->_get_next_token;
5175 wakaba 1.43 redo B;
5176     }
5177 wakaba 1.54
5178     splice @{$self->{open_elements}}, $i;
5179    
5180     $self->_reset_insertion_mode;
5181    
5182     $token = $self->_get_next_token;
5183     redo B;
5184 wakaba 1.44 } elsif ({
5185 wakaba 1.54 caption => 1, table => 1, tbody => 1,
5186     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
5187     }->{$token->{tag_name}}) {
5188     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5189    
5190 wakaba 1.44 ## have an element in table scope
5191 wakaba 1.43 my $i;
5192     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5193     my $node = $self->{open_elements}->[$_];
5194     if ($node->[1] eq $token->{tag_name}) {
5195     $i = $_;
5196     last INSCOPE;
5197     } elsif ({
5198     table => 1, html => 1,
5199     }->{$node->[1]}) {
5200     last INSCOPE;
5201     }
5202     } # INSCOPE
5203     unless (defined $i) {
5204     ## Ignore the token
5205     $token = $self->_get_next_token;
5206     redo B;
5207     }
5208 wakaba 1.54
5209     ## As if </select>
5210     ## have an element in table scope
5211     undef $i;
5212 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5213     my $node = $self->{open_elements}->[$_];
5214 wakaba 1.54 if ($node->[1] eq 'select') {
5215 wakaba 1.43 $i = $_;
5216     last INSCOPE;
5217     } elsif ({
5218     table => 1, html => 1,
5219     }->{$node->[1]}) {
5220     last INSCOPE;
5221     }
5222     } # INSCOPE
5223     unless (defined $i) {
5224 wakaba 1.54 $self->{parse_error}-> (type => 'unmatched end tag:select');
5225     ## Ignore the </select> token
5226     $token = $self->_get_next_token; ## TODO: ok?
5227 wakaba 1.43 redo B;
5228     }
5229 wakaba 1.54
5230     splice @{$self->{open_elements}}, $i;
5231    
5232     $self->_reset_insertion_mode;
5233    
5234     ## reprocess
5235     redo B;
5236 wakaba 1.60 } else {
5237     $self->{parse_error}-> (type => 'in select:/'.$token->{tag_name});
5238 wakaba 1.54 ## Ignore the token
5239     $token = $self->_get_next_token;
5240     redo B;
5241 wakaba 1.60 }
5242     } else {
5243     die "$0: $token->{type}: Unknown token type";
5244     }
5245 wakaba 1.58 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
5246 wakaba 1.57 if ($token->{type} == CHARACTER_TOKEN) {
5247 wakaba 1.54 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5248     my $data = $1;
5249     ## As if in body
5250     $reconstruct_active_formatting_elements->($insert_to_current);
5251    
5252     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5253    
5254     unless (length $token->{data}) {
5255     $token = $self->_get_next_token;
5256     redo B;
5257     }
5258     }
5259    
5260 wakaba 1.56 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5261 wakaba 1.54 $self->{parse_error}-> (type => 'after html:#character');
5262    
5263     ## Reprocess in the "main" phase, "after body" insertion mode...
5264     }
5265    
5266     ## "after body" insertion mode
5267     $self->{parse_error}-> (type => 'after body:#character');
5268    
5269 wakaba 1.56 $self->{insertion_mode} = IN_BODY_IM;
5270 wakaba 1.54 ## reprocess
5271     redo B;
5272 wakaba 1.57 } elsif ($token->{type} == START_TAG_TOKEN) {
5273 wakaba 1.56 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5274 wakaba 1.54 $self->{parse_error}-> (type => 'after html:'.$token->{tag_name});
5275    
5276     ## Reprocess in the "main" phase, "after body" insertion mode...
5277     }
5278    
5279     ## "after body" insertion mode
5280     $self->{parse_error}-> (type => 'after body:'.$token->{tag_name});
5281    
5282 wakaba 1.56 $self->{insertion_mode} = IN_BODY_IM;
5283 wakaba 1.54 ## reprocess
5284     redo B;
5285 wakaba 1.57 } elsif ($token->{type} == END_TAG_TOKEN) {
5286 wakaba 1.56 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5287 wakaba 1.54 $self->{parse_error}-> (type => 'after html:/'.$token->{tag_name});
5288    
5289 wakaba 1.56 $self->{insertion_mode} = AFTER_BODY_IM;
5290 wakaba 1.54 ## Reprocess in the "main" phase, "after body" insertion mode...
5291     }
5292    
5293     ## "after body" insertion mode
5294     if ($token->{tag_name} eq 'html') {
5295     if (defined $self->{inner_html_node}) {
5296     $self->{parse_error}-> (type => 'unmatched end tag:html');
5297     ## Ignore the token
5298     $token = $self->_get_next_token;
5299     redo B;
5300     } else {
5301 wakaba 1.56 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
5302 wakaba 1.54 $token = $self->_get_next_token;
5303     redo B;
5304     }
5305     } else {
5306     $self->{parse_error}-> (type => 'after body:/'.$token->{tag_name});
5307    
5308 wakaba 1.56 $self->{insertion_mode} = IN_BODY_IM;
5309 wakaba 1.54 ## reprocess
5310     redo B;
5311     }
5312     } else {
5313     die "$0: $token->{type}: Unknown token type";
5314     }
5315 wakaba 1.58 } elsif ($self->{insertion_mode} & FRAME_IMS) {
5316 wakaba 1.57 if ($token->{type} == CHARACTER_TOKEN) {
5317 wakaba 1.54 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5318     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5319    
5320     unless (length $token->{data}) {
5321     $token = $self->_get_next_token;
5322     redo B;
5323     }
5324     }
5325    
5326     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
5327 wakaba 1.56 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5328 wakaba 1.54 $self->{parse_error}-> (type => 'in frameset:#character');
5329 wakaba 1.56 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
5330 wakaba 1.54 $self->{parse_error}-> (type => 'after frameset:#character');
5331     } else { # "after html frameset"
5332     $self->{parse_error}-> (type => 'after html:#character');
5333    
5334 wakaba 1.56 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5335 wakaba 1.54 ## Reprocess in the "main" phase, "after frameset"...
5336     $self->{parse_error}-> (type => 'after frameset:#character');
5337     }
5338    
5339     ## Ignore the token.
5340     if (length $token->{data}) {
5341     ## reprocess the rest of characters
5342     } else {
5343     $token = $self->_get_next_token;
5344     }
5345     redo B;
5346     }
5347    
5348     die qq[$0: Character "$token->{data}"];
5349 wakaba 1.57 } elsif ($token->{type} == START_TAG_TOKEN) {
5350 wakaba 1.56 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5351 wakaba 1.54 $self->{parse_error}-> (type => 'after html:'.$token->{tag_name});
5352    
5353 wakaba 1.56 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5354 wakaba 1.54 ## Process in the "main" phase, "after frameset" insertion mode...
5355     }
5356    
5357     if ($token->{tag_name} eq 'frameset' and
5358 wakaba 1.56 $self->{insertion_mode} == IN_FRAMESET_IM) {
5359 wakaba 1.54
5360     {
5361     my $el;
5362    
5363     $el = $self->{document}->create_element_ns
5364     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5365    
5366     for my $attr_name (keys %{ $token->{attributes}}) {
5367     $el->set_attribute_ns (undef, [undef, $attr_name],
5368     $token->{attributes} ->{$attr_name}->{value});
5369     }
5370    
5371     $self->{open_elements}->[-1]->[0]->append_child ($el);
5372     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5373     }
5374    
5375     $token = $self->_get_next_token;
5376     redo B;
5377     } elsif ($token->{tag_name} eq 'frame' and
5378 wakaba 1.56 $self->{insertion_mode} == IN_FRAMESET_IM) {
5379 wakaba 1.54
5380     {
5381     my $el;
5382    
5383     $el = $self->{document}->create_element_ns
5384     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5385    
5386     for my $attr_name (keys %{ $token->{attributes}}) {
5387     $el->set_attribute_ns (undef, [undef, $attr_name],
5388     $token->{attributes} ->{$attr_name}->{value});
5389     }
5390    
5391     $self->{open_elements}->[-1]->[0]->append_child ($el);
5392     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5393     }
5394    
5395     pop @{$self->{open_elements}};
5396     $token = $self->_get_next_token;
5397     redo B;
5398     } elsif ($token->{tag_name} eq 'noframes') {
5399     ## NOTE: As if in body.
5400     $parse_rcdata->(CDATA_CONTENT_MODEL, $insert_to_current);
5401     redo B;
5402     } else {
5403 wakaba 1.56 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5404 wakaba 1.54 $self->{parse_error}-> (type => 'in frameset:'.$token->{tag_name});
5405     } else {
5406     $self->{parse_error}-> (type => 'after frameset:'.$token->{tag_name});
5407     }
5408     ## Ignore the token
5409     $token = $self->_get_next_token;
5410     redo B;
5411     }
5412 wakaba 1.57 } elsif ($token->{type} == END_TAG_TOKEN) {
5413 wakaba 1.56 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5414 wakaba 1.54 $self->{parse_error}-> (type => 'after html:/'.$token->{tag_name});
5415    
5416 wakaba 1.56 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5417 wakaba 1.54 ## Process in the "main" phase, "after frameset" insertion mode...
5418     }
5419    
5420     if ($token->{tag_name} eq 'frameset' and
5421 wakaba 1.56 $self->{insertion_mode} == IN_FRAMESET_IM) {
5422 wakaba 1.54 if ($self->{open_elements}->[-1]->[1] eq 'html' and
5423     @{$self->{open_elements}} == 1) {
5424     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
5425     ## Ignore the token
5426     $token = $self->_get_next_token;
5427     } else {
5428     pop @{$self->{open_elements}};
5429     $token = $self->_get_next_token;
5430     }
5431 wakaba 1.43
5432 wakaba 1.54 if (not defined $self->{inner_html_node} and
5433     $self->{open_elements}->[-1]->[1] ne 'frameset') {
5434 wakaba 1.56 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5435 wakaba 1.54 }
5436     redo B;
5437     } elsif ($token->{tag_name} eq 'html' and
5438 wakaba 1.56 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
5439     $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
5440 wakaba 1.54 $token = $self->_get_next_token;
5441     redo B;
5442     } else {
5443 wakaba 1.56 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5444 wakaba 1.54 $self->{parse_error}-> (type => 'in frameset:/'.$token->{tag_name});
5445     } else {
5446     $self->{parse_error}-> (type => 'after frameset:/'.$token->{tag_name});
5447     }
5448     ## Ignore the token
5449     $token = $self->_get_next_token;
5450     redo B;
5451     }
5452     } else {
5453     die "$0: $token->{type}: Unknown token type";
5454     }
5455 wakaba 1.43
5456 wakaba 1.54 ## ISSUE: An issue in spec here
5457     } else {
5458     die "$0: $self->{insertion_mode}: Unknown insertion mode";
5459     }
5460 wakaba 1.43
5461 wakaba 1.54 ## "in body" insertion mode
5462 wakaba 1.57 if ($token->{type} == START_TAG_TOKEN) {
5463 wakaba 1.54 if ($token->{tag_name} eq 'script') {
5464     ## NOTE: This is an "as if in head" code clone
5465     $script_start_tag->($insert);
5466 wakaba 1.55 redo B;
5467 wakaba 1.54 } elsif ($token->{tag_name} eq 'style') {
5468     ## NOTE: This is an "as if in head" code clone
5469     $parse_rcdata->(CDATA_CONTENT_MODEL, $insert);
5470 wakaba 1.55 redo B;
5471 wakaba 1.54 } elsif ({
5472     base => 1, link => 1,
5473     }->{$token->{tag_name}}) {
5474     ## NOTE: This is an "as if in head" code clone, only "-t" differs
5475    
5476     {
5477     my $el;
5478    
5479     $el = $self->{document}->create_element_ns
5480     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5481    
5482     for my $attr_name (keys %{ $token->{attributes}}) {
5483     $el->set_attribute_ns (undef, [undef, $attr_name],
5484     $token->{attributes} ->{$attr_name}->{value});
5485     }
5486    
5487     $insert->($el);
5488     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5489     }
5490    
5491     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5492     $token = $self->_get_next_token;
5493 wakaba 1.55 redo B;
5494 wakaba 1.54 } elsif ($token->{tag_name} eq 'meta') {
5495     ## NOTE: This is an "as if in head" code clone, only "-t" differs
5496    
5497     {
5498     my $el;
5499    
5500     $el = $self->{document}->create_element_ns
5501     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5502    
5503     for my $attr_name (keys %{ $token->{attributes}}) {
5504     $el->set_attribute_ns (undef, [undef, $attr_name],
5505     $token->{attributes} ->{$attr_name}->{value});
5506     }
5507    
5508     $insert->($el);
5509     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5510     }
5511    
5512     pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5513 wakaba 1.43
5514 wakaba 1.54 unless ($self->{confident}) {
5515     my $charset;
5516     if ($token->{attributes}->{charset}) { ## TODO: And if supported
5517     $charset = $token->{attributes}->{charset}->{value};
5518     }
5519     if ($token->{attributes}->{'http-equiv'}) {
5520     ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
5521     if ($token->{attributes}->{'http-equiv'}->{value}
5522     =~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*=
5523     [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
5524     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
5525     $charset = defined $1 ? $1 : defined $2 ? $2 : $3;
5526     } ## TODO: And if supported
5527     }
5528     ## TODO: Change the encoding
5529     }
5530 wakaba 1.43
5531 wakaba 1.54 $token = $self->_get_next_token;
5532 wakaba 1.55 redo B;
5533 wakaba 1.54 } elsif ($token->{tag_name} eq 'title') {
5534     $self->{parse_error}-> (type => 'in body:title');
5535     ## NOTE: This is an "as if in head" code clone
5536     $parse_rcdata->(RCDATA_CONTENT_MODEL, sub {
5537     if (defined $self->{head_element}) {
5538     $self->{head_element}->append_child ($_[0]);
5539 wakaba 1.1 } else {
5540 wakaba 1.54 $insert->($_[0]);
5541 wakaba 1.1 }
5542 wakaba 1.54 });
5543 wakaba 1.55 redo B;
5544 wakaba 1.54 } elsif ($token->{tag_name} eq 'body') {
5545     $self->{parse_error}-> (type => 'in body:body');
5546 wakaba 1.1
5547 wakaba 1.54 if (@{$self->{open_elements}} == 1 or
5548     $self->{open_elements}->[1]->[1] ne 'body') {
5549     ## Ignore the token
5550     } else {
5551     my $body_el = $self->{open_elements}->[1]->[0];
5552     for my $attr_name (keys %{$token->{attributes}}) {
5553     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
5554     $body_el->set_attribute_ns
5555     (undef, [undef, $attr_name],
5556     $token->{attributes}->{$attr_name}->{value});
5557 wakaba 1.1 }
5558 wakaba 1.54 }
5559     }
5560     $token = $self->_get_next_token;
5561 wakaba 1.55 redo B;
5562 wakaba 1.54 } elsif ({
5563     address => 1, blockquote => 1, center => 1, dir => 1,
5564     div => 1, dl => 1, fieldset => 1, listing => 1,
5565     menu => 1, ol => 1, p => 1, ul => 1,
5566     pre => 1,
5567     }->{$token->{tag_name}}) {
5568     ## has a p element in scope
5569     INSCOPE: for (reverse @{$self->{open_elements}}) {
5570     if ($_->[1] eq 'p') {
5571     unshift @{$self->{token}}, $token;
5572 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5573 wakaba 1.55 redo B;
5574 wakaba 1.54 } elsif ({
5575     table => 1, caption => 1, td => 1, th => 1,
5576     button => 1, marquee => 1, object => 1, html => 1,
5577     }->{$_->[1]}) {
5578     last INSCOPE;
5579     }
5580     } # INSCOPE
5581    
5582    
5583 wakaba 1.48 {
5584     my $el;
5585    
5586     $el = $self->{document}->create_element_ns
5587 wakaba 1.54 (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5588 wakaba 1.48
5589 wakaba 1.54 for my $attr_name (keys %{ $token->{attributes}}) {
5590     $el->set_attribute_ns (undef, [undef, $attr_name],
5591     $token->{attributes} ->{$attr_name}->{value});
5592     }
5593    
5594     $insert->($el);
5595     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5596 wakaba 1.48 }
5597    
5598 wakaba 1.54 if ($token->{tag_name} eq 'pre') {
5599     $token = $self->_get_next_token;
5600 wakaba 1.57 if ($token->{type} == CHARACTER_TOKEN) {
5601 wakaba 1.54 $token->{data} =~ s/^\x0A//;
5602     unless (length $token->{data}) {
5603     $token = $self->_get_next_token;
5604     }
5605     }
5606     } else {
5607     $token = $self->_get_next_token;
5608     }
5609 wakaba 1.55 redo B;
5610 wakaba 1.54 } elsif ($token->{tag_name} eq 'form') {
5611     if (defined $self->{form_element}) {
5612     $self->{parse_error}-> (type => 'in form:form');
5613     ## Ignore the token
5614     $token = $self->_get_next_token;
5615 wakaba 1.55 redo B;
5616 wakaba 1.54 } else {
5617     ## has a p element in scope
5618     INSCOPE: for (reverse @{$self->{open_elements}}) {
5619     if ($_->[1] eq 'p') {
5620     unshift @{$self->{token}}, $token;
5621 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5622 wakaba 1.55 redo B;
5623 wakaba 1.54 } elsif ({
5624     table => 1, caption => 1, td => 1, th => 1,
5625     button => 1, marquee => 1, object => 1, html => 1,
5626     }->{$_->[1]}) {
5627     last INSCOPE;
5628     }
5629     } # INSCOPE
5630    
5631    
5632 wakaba 1.1 {
5633     my $el;
5634    
5635     $el = $self->{document}->create_element_ns
5636     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5637    
5638     for my $attr_name (keys %{ $token->{attributes}}) {
5639     $el->set_attribute_ns (undef, [undef, $attr_name],
5640     $token->{attributes} ->{$attr_name}->{value});
5641     }
5642    
5643 wakaba 1.54 $insert->($el);
5644 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5645 wakaba 1.1 }
5646    
5647 wakaba 1.54 $self->{form_element} = $self->{open_elements}->[-1]->[0];
5648     $token = $self->_get_next_token;
5649 wakaba 1.55 redo B;
5650 wakaba 1.54 }
5651     } elsif ($token->{tag_name} eq 'li') {
5652     ## has a p element in scope
5653     INSCOPE: for (reverse @{$self->{open_elements}}) {
5654     if ($_->[1] eq 'p') {
5655     unshift @{$self->{token}}, $token;
5656 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5657 wakaba 1.55 redo B;
5658 wakaba 1.54 } elsif ({
5659     table => 1, caption => 1, td => 1, th => 1,
5660     button => 1, marquee => 1, object => 1, html => 1,
5661     }->{$_->[1]}) {
5662     last INSCOPE;
5663     }
5664     } # INSCOPE
5665    
5666     ## Step 1
5667     my $i = -1;
5668     my $node = $self->{open_elements}->[$i];
5669     LI: {
5670     ## Step 2
5671     if ($node->[1] eq 'li') {
5672     if ($i != -1) {
5673     $self->{parse_error}-> (type => 'end tag missing:'.
5674     $self->{open_elements}->[-1]->[1]);
5675     }
5676     splice @{$self->{open_elements}}, $i;
5677     last LI;
5678     }
5679    
5680     ## Step 3
5681     if (not $formatting_category->{$node->[1]} and
5682     #not $phrasing_category->{$node->[1]} and
5683     ($special_category->{$node->[1]} or
5684     $scoping_category->{$node->[1]}) and
5685     $node->[1] ne 'address' and $node->[1] ne 'div') {
5686     last LI;
5687     }
5688    
5689     ## Step 4
5690     $i--;
5691     $node = $self->{open_elements}->[$i];
5692     redo LI;
5693     } # LI
5694    
5695    
5696 wakaba 1.48 {
5697     my $el;
5698    
5699     $el = $self->{document}->create_element_ns
5700 wakaba 1.54 (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5701 wakaba 1.48
5702 wakaba 1.54 for my $attr_name (keys %{ $token->{attributes}}) {
5703     $el->set_attribute_ns (undef, [undef, $attr_name],
5704     $token->{attributes} ->{$attr_name}->{value});
5705     }
5706    
5707     $insert->($el);
5708     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5709 wakaba 1.48 }
5710    
5711 wakaba 1.54 $token = $self->_get_next_token;
5712 wakaba 1.55 redo B;
5713 wakaba 1.54 } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
5714     ## has a p element in scope
5715     INSCOPE: for (reverse @{$self->{open_elements}}) {
5716     if ($_->[1] eq 'p') {
5717     unshift @{$self->{token}}, $token;
5718 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5719 wakaba 1.55 redo B;
5720 wakaba 1.54 } elsif ({
5721     table => 1, caption => 1, td => 1, th => 1,
5722     button => 1, marquee => 1, object => 1, html => 1,
5723     }->{$_->[1]}) {
5724     last INSCOPE;
5725     }
5726     } # INSCOPE
5727    
5728     ## Step 1
5729     my $i = -1;
5730     my $node = $self->{open_elements}->[$i];
5731     LI: {
5732     ## Step 2
5733     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
5734     if ($i != -1) {
5735     $self->{parse_error}-> (type => 'end tag missing:'.
5736     $self->{open_elements}->[-1]->[1]);
5737     }
5738     splice @{$self->{open_elements}}, $i;
5739     last LI;
5740     }
5741    
5742     ## Step 3
5743     if (not $formatting_category->{$node->[1]} and
5744     #not $phrasing_category->{$node->[1]} and
5745     ($special_category->{$node->[1]} or
5746     $scoping_category->{$node->[1]}) and
5747     $node->[1] ne 'address' and $node->[1] ne 'div') {
5748     last LI;
5749     }
5750    
5751     ## Step 4
5752     $i--;
5753     $node = $self->{open_elements}->[$i];
5754     redo LI;
5755     } # LI
5756    
5757    
5758 wakaba 1.49 {
5759     my $el;
5760    
5761     $el = $self->{document}->create_element_ns
5762     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5763    
5764     for my $attr_name (keys %{ $token->{attributes}}) {
5765     $el->set_attribute_ns (undef, [undef, $attr_name],
5766     $token->{attributes} ->{$attr_name}->{value});
5767     }
5768    
5769 wakaba 1.54 $insert->($el);
5770 wakaba 1.49 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5771     }
5772    
5773 wakaba 1.54 $token = $self->_get_next_token;
5774 wakaba 1.55 redo B;
5775 wakaba 1.54 } elsif ($token->{tag_name} eq 'plaintext') {
5776     ## has a p element in scope
5777     INSCOPE: for (reverse @{$self->{open_elements}}) {
5778     if ($_->[1] eq 'p') {
5779     unshift @{$self->{token}}, $token;
5780 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5781 wakaba 1.55 redo B;
5782 wakaba 1.54 } elsif ({
5783     table => 1, caption => 1, td => 1, th => 1,
5784     button => 1, marquee => 1, object => 1, html => 1,
5785     }->{$_->[1]}) {
5786     last INSCOPE;
5787     }
5788     } # INSCOPE
5789    
5790    
5791 wakaba 1.1 {
5792     my $el;
5793    
5794     $el = $self->{document}->create_element_ns
5795 wakaba 1.54 (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5796 wakaba 1.1
5797 wakaba 1.54 for my $attr_name (keys %{ $token->{attributes}}) {
5798     $el->set_attribute_ns (undef, [undef, $attr_name],
5799     $token->{attributes} ->{$attr_name}->{value});
5800     }
5801    
5802     $insert->($el);
5803     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5804 wakaba 1.1 }
5805    
5806 wakaba 1.54
5807     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
5808    
5809     $token = $self->_get_next_token;
5810 wakaba 1.55 redo B;
5811 wakaba 1.54 } elsif ({
5812     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5813     }->{$token->{tag_name}}) {
5814     ## has a p element in scope
5815     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5816     my $node = $self->{open_elements}->[$_];
5817     if ($node->[1] eq 'p') {
5818     unshift @{$self->{token}}, $token;
5819 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
5820 wakaba 1.55 redo B;
5821 wakaba 1.54 } elsif ({
5822     table => 1, caption => 1, td => 1, th => 1,
5823     button => 1, marquee => 1, object => 1, html => 1,
5824     }->{$node->[1]}) {
5825     last INSCOPE;
5826     }
5827     } # INSCOPE
5828    
5829     ## NOTE: See <http://html5.org/tools/web-apps-tracker?from=925&to=926>
5830     ## has an element in scope
5831     #my $i;
5832     #INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5833     # my $node = $self->{open_elements}->[$_];
5834     # if ({
5835     # h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5836     # }->{$node->[1]}) {
5837     # $i = $_;
5838     # last INSCOPE;
5839     # } elsif ({
5840     # table => 1, caption => 1, td => 1, th => 1,
5841     # button => 1, marquee => 1, object => 1, html => 1,
5842     # }->{$node->[1]}) {
5843     # last INSCOPE;
5844     # }
5845     #} # INSCOPE
5846     #
5847     #if (defined $i) {
5848     # !!! parse-error (type => 'in hn:hn');
5849     # splice @{$self->{open_elements}}, $i;
5850     #}
5851    
5852    
5853 wakaba 1.48 {
5854     my $el;
5855    
5856     $el = $self->{document}->create_element_ns
5857     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5858    
5859     for my $attr_name (keys %{ $token->{attributes}}) {
5860     $el->set_attribute_ns (undef, [undef, $attr_name],
5861     $token->{attributes} ->{$attr_name}->{value});
5862     }
5863    
5864 wakaba 1.54 $insert->($el);
5865 wakaba 1.48 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5866     }
5867    
5868 wakaba 1.54
5869     $token = $self->_get_next_token;
5870 wakaba 1.55 redo B;
5871 wakaba 1.54 } elsif ($token->{tag_name} eq 'a') {
5872     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
5873     my $node = $active_formatting_elements->[$i];
5874     if ($node->[1] eq 'a') {
5875     $self->{parse_error}-> (type => 'in a:a');
5876    
5877     unshift @{$self->{token}}, $token;
5878 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'a'};
5879 wakaba 1.54 $formatting_end_tag->($token->{tag_name});
5880    
5881     AFE2: for (reverse 0..$#$active_formatting_elements) {
5882     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
5883     splice @$active_formatting_elements, $_, 1;
5884     last AFE2;
5885 wakaba 1.49 }
5886 wakaba 1.54 } # AFE2
5887     OE: for (reverse 0..$#{$self->{open_elements}}) {
5888     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
5889     splice @{$self->{open_elements}}, $_, 1;
5890     last OE;
5891 wakaba 1.49 }
5892 wakaba 1.54 } # OE
5893     last AFE;
5894     } elsif ($node->[0] eq '#marker') {
5895     last AFE;
5896     }
5897     } # AFE
5898    
5899     $reconstruct_active_formatting_elements->($insert_to_current);
5900 wakaba 1.49
5901 wakaba 1.54
5902     {
5903     my $el;
5904    
5905     $el = $self->{document}->create_element_ns
5906     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5907    
5908     for my $attr_name (keys %{ $token->{attributes}}) {
5909     $el->set_attribute_ns (undef, [undef, $attr_name],
5910     $token->{attributes} ->{$attr_name}->{value});
5911     }
5912    
5913     $insert->($el);
5914     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5915     }
5916    
5917     push @$active_formatting_elements, $self->{open_elements}->[-1];
5918 wakaba 1.48
5919 wakaba 1.54 $token = $self->_get_next_token;
5920 wakaba 1.55 redo B;
5921 wakaba 1.54 } elsif ({
5922     b => 1, big => 1, em => 1, font => 1, i => 1,
5923     s => 1, small => 1, strile => 1,
5924     strong => 1, tt => 1, u => 1,
5925     }->{$token->{tag_name}}) {
5926     $reconstruct_active_formatting_elements->($insert_to_current);
5927    
5928    
5929     {
5930     my $el;
5931    
5932     $el = $self->{document}->create_element_ns
5933     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5934    
5935     for my $attr_name (keys %{ $token->{attributes}}) {
5936     $el->set_attribute_ns (undef, [undef, $attr_name],
5937     $token->{attributes} ->{$attr_name}->{value});
5938     }
5939    
5940     $insert->($el);
5941     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5942     }
5943    
5944     push @$active_formatting_elements, $self->{open_elements}->[-1];
5945    
5946     $token = $self->_get_next_token;
5947 wakaba 1.55 redo B;
5948 wakaba 1.54 } elsif ($token->{tag_name} eq 'nobr') {
5949     $reconstruct_active_formatting_elements->($insert_to_current);
5950 wakaba 1.1
5951 wakaba 1.54 ## has a |nobr| element in scope
5952     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5953     my $node = $self->{open_elements}->[$_];
5954     if ($node->[1] eq 'nobr') {
5955 wakaba 1.60 $self->{parse_error}-> (type => 'in nobr:nobr');
5956 wakaba 1.54 unshift @{$self->{token}}, $token;
5957 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'nobr'};
5958 wakaba 1.55 redo B;
5959 wakaba 1.54 } elsif ({
5960     table => 1, caption => 1, td => 1, th => 1,
5961     button => 1, marquee => 1, object => 1, html => 1,
5962     }->{$node->[1]}) {
5963     last INSCOPE;
5964     }
5965     } # INSCOPE
5966    
5967    
5968     {
5969     my $el;
5970    
5971     $el = $self->{document}->create_element_ns
5972     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
5973    
5974     for my $attr_name (keys %{ $token->{attributes}}) {
5975     $el->set_attribute_ns (undef, [undef, $attr_name],
5976     $token->{attributes} ->{$attr_name}->{value});
5977     }
5978    
5979     $insert->($el);
5980     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
5981     }
5982    
5983     push @$active_formatting_elements, $self->{open_elements}->[-1];
5984    
5985     $token = $self->_get_next_token;
5986 wakaba 1.55 redo B;
5987 wakaba 1.54 } elsif ($token->{tag_name} eq 'button') {
5988     ## has a button element in scope
5989     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5990     my $node = $self->{open_elements}->[$_];
5991     if ($node->[1] eq 'button') {
5992     $self->{parse_error}-> (type => 'in button:button');
5993     unshift @{$self->{token}}, $token;
5994 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'button'};
5995 wakaba 1.55 redo B;
5996 wakaba 1.54 } elsif ({
5997     table => 1, caption => 1, td => 1, th => 1,
5998     button => 1, marquee => 1, object => 1, html => 1,
5999     }->{$node->[1]}) {
6000     last INSCOPE;
6001 wakaba 1.1 }
6002 wakaba 1.54 } # INSCOPE
6003    
6004     $reconstruct_active_formatting_elements->($insert_to_current);
6005    
6006    
6007     {
6008     my $el;
6009    
6010     $el = $self->{document}->create_element_ns
6011     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6012    
6013     for my $attr_name (keys %{ $token->{attributes}}) {
6014     $el->set_attribute_ns (undef, [undef, $attr_name],
6015     $token->{attributes} ->{$attr_name}->{value});
6016     }
6017    
6018     $insert->($el);
6019     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6020     }
6021    
6022     push @$active_formatting_elements, ['#marker', ''];
6023 wakaba 1.1
6024 wakaba 1.54 $token = $self->_get_next_token;
6025 wakaba 1.55 redo B;
6026 wakaba 1.54 } elsif ($token->{tag_name} eq 'marquee' or
6027     $token->{tag_name} eq 'object') {
6028     $reconstruct_active_formatting_elements->($insert_to_current);
6029    
6030    
6031 wakaba 1.1 {
6032     my $el;
6033    
6034     $el = $self->{document}->create_element_ns
6035     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6036    
6037     for my $attr_name (keys %{ $token->{attributes}}) {
6038     $el->set_attribute_ns (undef, [undef, $attr_name],
6039     $token->{attributes} ->{$attr_name}->{value});
6040     }
6041    
6042 wakaba 1.54 $insert->($el);
6043 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6044 wakaba 1.1 }
6045    
6046 wakaba 1.54 push @$active_formatting_elements, ['#marker', ''];
6047    
6048     $token = $self->_get_next_token;
6049 wakaba 1.55 redo B;
6050 wakaba 1.54 } elsif ($token->{tag_name} eq 'xmp') {
6051     $reconstruct_active_formatting_elements->($insert_to_current);
6052     $parse_rcdata->(CDATA_CONTENT_MODEL, $insert);
6053 wakaba 1.55 redo B;
6054 wakaba 1.54 } elsif ($token->{tag_name} eq 'table') {
6055     ## has a p element in scope
6056     INSCOPE: for (reverse @{$self->{open_elements}}) {
6057     if ($_->[1] eq 'p') {
6058     unshift @{$self->{token}}, $token;
6059 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
6060 wakaba 1.55 redo B;
6061 wakaba 1.54 } elsif ({
6062     table => 1, caption => 1, td => 1, th => 1,
6063     button => 1, marquee => 1, object => 1, html => 1,
6064     }->{$_->[1]}) {
6065     last INSCOPE;
6066 wakaba 1.1 }
6067 wakaba 1.54 } # INSCOPE
6068    
6069    
6070     {
6071     my $el;
6072    
6073     $el = $self->{document}->create_element_ns
6074     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6075    
6076     for my $attr_name (keys %{ $token->{attributes}}) {
6077     $el->set_attribute_ns (undef, [undef, $attr_name],
6078     $token->{attributes} ->{$attr_name}->{value});
6079     }
6080    
6081     $insert->($el);
6082     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6083     }
6084    
6085    
6086 wakaba 1.56 $self->{insertion_mode} = IN_TABLE_IM;
6087 wakaba 1.54
6088     $token = $self->_get_next_token;
6089 wakaba 1.55 redo B;
6090 wakaba 1.54 } elsif ({
6091     area => 1, basefont => 1, bgsound => 1, br => 1,
6092     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
6093     image => 1,
6094     }->{$token->{tag_name}}) {
6095     if ($token->{tag_name} eq 'image') {
6096     $self->{parse_error}-> (type => 'image');
6097     $token->{tag_name} = 'img';
6098     }
6099 wakaba 1.1
6100 wakaba 1.54 ## NOTE: There is an "as if <br>" code clone.
6101     $reconstruct_active_formatting_elements->($insert_to_current);
6102    
6103    
6104     {
6105     my $el;
6106    
6107     $el = $self->{document}->create_element_ns
6108     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6109    
6110     for my $attr_name (keys %{ $token->{attributes}}) {
6111     $el->set_attribute_ns (undef, [undef, $attr_name],
6112     $token->{attributes} ->{$attr_name}->{value});
6113     }
6114    
6115     $insert->($el);
6116     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6117     }
6118    
6119     pop @{$self->{open_elements}};
6120    
6121     $token = $self->_get_next_token;
6122 wakaba 1.55 redo B;
6123 wakaba 1.54 } elsif ($token->{tag_name} eq 'hr') {
6124     ## has a p element in scope
6125     INSCOPE: for (reverse @{$self->{open_elements}}) {
6126     if ($_->[1] eq 'p') {
6127     unshift @{$self->{token}}, $token;
6128 wakaba 1.57 $token = {type => END_TAG_TOKEN, tag_name => 'p'};
6129 wakaba 1.55 redo B;
6130 wakaba 1.54 } elsif ({
6131     table => 1, caption => 1, td => 1, th => 1,
6132     button => 1, marquee => 1, object => 1, html => 1,
6133     }->{$_->[1]}) {
6134     last INSCOPE;
6135 wakaba 1.1 }
6136 wakaba 1.54 } # INSCOPE
6137    
6138    
6139 wakaba 1.1 {
6140     my $el;
6141    
6142     $el = $self->{document}->create_element_ns
6143     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6144    
6145     for my $attr_name (keys %{ $token->{attributes}}) {
6146     $el->set_attribute_ns (undef, [undef, $attr_name],
6147     $token->{attributes} ->{$attr_name}->{value});
6148     }
6149    
6150 wakaba 1.54 $insert->($el);
6151 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6152 wakaba 1.1 }
6153    
6154 wakaba 1.54 pop @{$self->{open_elements}};
6155    
6156     $token = $self->_get_next_token;
6157 wakaba 1.55 redo B;
6158 wakaba 1.54 } elsif ($token->{tag_name} eq 'input') {
6159     $reconstruct_active_formatting_elements->($insert_to_current);
6160    
6161    
6162 wakaba 1.1 {
6163     my $el;
6164    
6165     $el = $self->{document}->create_element_ns
6166     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6167    
6168     for my $attr_name (keys %{ $token->{attributes}}) {
6169     $el->set_attribute_ns (undef, [undef, $attr_name],
6170     $token->{attributes} ->{$attr_name}->{value});
6171     }
6172    
6173 wakaba 1.54 $insert->($el);
6174 wakaba 1.3 push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6175 wakaba 1.1 }
6176    
6177 wakaba 1.54 ## TODO: associate with $self->{form_element} if defined
6178     pop @{$self->{open_elements}};
6179    
6180     $token = $self->_get_next_token;
6181 wakaba 1.55 redo B;
6182 wakaba 1.54 } elsif ($token->{tag_name} eq 'isindex') {
6183     $self->{parse_error}-> (type => 'isindex');
6184    
6185     if (defined $self->{form_element}) {
6186     ## Ignore the token
6187     $token = $self->_get_next_token;
6188 wakaba 1.55 redo B;
6189 wakaba 1.54 } else {
6190     my $at = $token->{attributes};
6191     my $form_attrs;
6192     $form_attrs->{action} = $at->{action} if $at->{action};
6193     my $prompt_attr = $at->{prompt};
6194     $at->{name} = {name => 'name', value => 'isindex'};
6195     delete $at->{action};
6196     delete $at->{prompt};
6197     my @tokens = (
6198 wakaba 1.57 {type => START_TAG_TOKEN, tag_name => 'form',
6199 wakaba 1.54 attributes => $form_attrs},
6200 wakaba 1.57 {type => START_TAG_TOKEN, tag_name => 'hr'},
6201     {type => START_TAG_TOKEN, tag_name => 'p'},
6202     {type => START_TAG_TOKEN, tag_name => 'label'},
6203 wakaba 1.54 );
6204     if ($prompt_attr) {
6205 wakaba 1.57 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value}};
6206 wakaba 1.1 } else {
6207 wakaba 1.57 push @tokens, {type => CHARACTER_TOKEN,
6208 wakaba 1.54 data => 'This is a searchable index. Insert your search keywords here: '}; # SHOULD
6209     ## TODO: make this configurable
6210 wakaba 1.1 }
6211 wakaba 1.54 push @tokens,
6212 wakaba 1.57 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at},
6213     #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
6214     {type => END_TAG_TOKEN, tag_name => 'label'},
6215     {type => END_TAG_TOKEN, tag_name => 'p'},
6216     {type => START_TAG_TOKEN, tag_name => 'hr'},
6217     {type => END_TAG_TOKEN, tag_name => 'form'};
6218 wakaba 1.54 $token = shift @tokens;
6219     unshift @{$self->{token}}, (@tokens);
6220 wakaba 1.55 redo B;
6221 wakaba 1.54 }
6222     } elsif ($token->{tag_name} eq 'textarea') {
6223     my $tag_name = $token->{tag_name};
6224     my $el;
6225    
6226     $el = $self->{document}->create_element_ns
6227     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6228    
6229     for my $attr_name (keys %{ $token->{attributes}}) {
6230     $el->set_attribute_ns (undef, [undef, $attr_name],
6231     $token->{attributes} ->{$attr_name}->{value});
6232     }
6233    
6234    
6235     ## TODO: $self->{form_element} if defined
6236     $self->{content_model} = RCDATA_CONTENT_MODEL;
6237     delete $self->{escape}; # MUST
6238    
6239     $insert->($el);
6240    
6241     my $text = '';
6242     $token = $self->_get_next_token;
6243 wakaba 1.57 if ($token->{type} == CHARACTER_TOKEN) {
6244 wakaba 1.54 $token->{data} =~ s/^\x0A//;
6245 wakaba 1.53 unless (length $token->{data}) {
6246     $token = $self->_get_next_token;
6247     }
6248     }
6249 wakaba 1.57 while ($token->{type} == CHARACTER_TOKEN) {
6250 wakaba 1.54 $text .= $token->{data};
6251     $token = $self->_get_next_token;
6252     }
6253     if (length $text) {
6254     $el->manakai_append_text ($text);
6255     }
6256    
6257     $self->{content_model} = PCDATA_CONTENT_MODEL;
6258    
6259 wakaba 1.57 if ($token->{type} == END_TAG_TOKEN and
6260 wakaba 1.54 $token->{tag_name} eq $tag_name) {
6261     ## Ignore the token
6262     } else {
6263     $self->{parse_error}-> (type => 'in RCDATA:#'.$token->{type});
6264     }
6265     $token = $self->_get_next_token;
6266 wakaba 1.55 redo B;
6267 wakaba 1.54 } elsif ({
6268     iframe => 1,
6269     noembed => 1,
6270     noframes => 1,
6271     noscript => 0, ## TODO: 1 if scripting is enabled
6272     }->{$token->{tag_name}}) {
6273 wakaba 1.60 ## NOTE: There is an "as if in body" code clone.
6274 wakaba 1.54 $parse_rcdata->(CDATA_CONTENT_MODEL, $insert);
6275 wakaba 1.55 redo B;
6276 wakaba 1.54 } elsif ($token->{tag_name} eq 'select') {
6277     $reconstruct_active_formatting_elements->($insert_to_current);
6278    
6279    
6280     {
6281     my $el;
6282    
6283     $el = $self->{document}->create_element_ns
6284     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6285    
6286     for my $attr_name (keys %{ $token->{attributes}}) {
6287     $el->set_attribute_ns (undef, [undef, $attr_name],
6288     $token->{attributes} ->{$attr_name}->{value});
6289     }
6290    
6291     $insert->($el);
6292     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6293     }
6294    
6295    
6296 wakaba 1.56 $self->{insertion_mode} = IN_SELECT_IM;
6297 wakaba 1.54 $token = $self->_get_next_token;
6298 wakaba 1.55 redo B;
6299 wakaba 1.54 } elsif ({
6300     caption => 1, col => 1, colgroup => 1, frame => 1,
6301     frameset => 1, head => 1, option => 1, optgroup => 1,
6302     tbody => 1, td => 1, tfoot => 1, th => 1,
6303     thead => 1, tr => 1,
6304     }->{$token->{tag_name}}) {
6305     $self->{parse_error}-> (type => 'in body:'.$token->{tag_name});
6306     ## Ignore the token
6307     $token = $self->_get_next_token;
6308 wakaba 1.55 redo B;
6309 wakaba 1.54
6310     ## ISSUE: An issue on HTML5 new elements in the spec.
6311     } else {
6312     $reconstruct_active_formatting_elements->($insert_to_current);
6313 wakaba 1.53
6314 wakaba 1.54
6315     {
6316     my $el;
6317    
6318     $el = $self->{document}->create_element_ns
6319     (q<http://www.w3.org/1999/xhtml>, [undef, $token->{tag_name}]);
6320    
6321     for my $attr_name (keys %{ $token->{attributes}}) {
6322     $el->set_attribute_ns (undef, [undef, $attr_name],
6323     $token->{attributes} ->{$attr_name}->{value});
6324 wakaba 1.53 }
6325 wakaba 1.54
6326     $insert->($el);
6327     push @{$self->{open_elements}}, [$el, $token->{tag_name}];
6328     }
6329    
6330 wakaba 1.53
6331 wakaba 1.54 $token = $self->_get_next_token;
6332 wakaba 1.55 redo B;
6333 wakaba 1.54 }
6334 wakaba 1.57 } elsif ($token->{type} == END_TAG_TOKEN) {
6335 wakaba 1.54 if ($token->{tag_name} eq 'body') {
6336     if (@{$self->{open_elements}} > 1 and
6337     $self->{open_elements}->[1]->[1] eq 'body') {
6338     for (@{$self->{open_elements}}) {
6339     unless ({
6340     dd => 1, dt => 1, li => 1, p => 1, td => 1,
6341     th => 1, tr => 1, body => 1, html => 1,
6342     tbody => 1, tfoot => 1, thead => 1,
6343     }->{$_->[1]}) {
6344     $self->{parse_error}-> (type => 'not closed:'.$_->[1]);
6345     }
6346     }
6347 wakaba 1.53
6348 wakaba 1.56 $self->{insertion_mode} = AFTER_BODY_IM;
6349 wakaba 1.54 $token = $self->_get_next_token;
6350 wakaba 1.55 redo B;
6351 wakaba 1.54 } else {
6352     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6353     ## Ignore the token
6354     $token = $self->_get_next_token;
6355 wakaba 1.55 redo B;
6356 wakaba 1.53 }
6357 wakaba 1.54 } elsif ($token->{tag_name} eq 'html') {
6358     if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
6359     ## ISSUE: There is an issue in the spec.
6360     if ($self->{open_elements}->[-1]->[1] ne 'body') {
6361     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
6362     }
6363 wakaba 1.56 $self->{insertion_mode} = AFTER_BODY_IM;
6364 wakaba 1.54 ## reprocess
6365 wakaba 1.55 redo B;
6366 wakaba 1.54 } else {
6367     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6368     ## Ignore the token
6369     $token = $self->_get_next_token;
6370 wakaba 1.55 redo B;
6371 wakaba 1.53 }
6372 wakaba 1.54 } elsif ({
6373     address => 1, blockquote => 1, center => 1, dir => 1,
6374     div => 1, dl => 1, fieldset => 1, listing => 1,
6375     menu => 1, ol => 1, pre => 1, ul => 1,
6376     p => 1,
6377     dd => 1, dt => 1, li => 1,
6378     button => 1, marquee => 1, object => 1,
6379     }->{$token->{tag_name}}) {
6380     ## has an element in scope
6381     my $i;
6382     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6383     my $node = $self->{open_elements}->[$_];
6384     if ($node->[1] eq $token->{tag_name}) {
6385     ## generate implied end tags
6386     if ({
6387     dd => ($token->{tag_name} ne 'dd'),
6388     dt => ($token->{tag_name} ne 'dt'),
6389     li => ($token->{tag_name} ne 'li'),
6390     p => ($token->{tag_name} ne 'p'),
6391     td => 1, th => 1, tr => 1,
6392     tbody => 1, tfoot=> 1, thead => 1,
6393     }->{$self->{open_elements}->[-1]->[1]}) {
6394     unshift @{$self->{token}}, $token;
6395 wakaba 1.57 $token = {type => END_TAG_TOKEN,
6396 wakaba 1.54 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
6397 wakaba 1.55 redo B;
6398 wakaba 1.54 }
6399     $i = $_;
6400     last INSCOPE unless $token->{tag_name} eq 'p';
6401     } elsif ({
6402     table => 1, caption => 1, td => 1, th => 1,
6403     button => 1, marquee => 1, object => 1, html => 1,
6404     }->{$node->[1]}) {
6405     last INSCOPE;
6406     }
6407     } # INSCOPE
6408    
6409     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6410     if (defined $i) {
6411     $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
6412 wakaba 1.1 } else {
6413 wakaba 1.54 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6414 wakaba 1.1 }
6415 wakaba 1.53 }
6416 wakaba 1.54
6417     if (defined $i) {
6418     splice @{$self->{open_elements}}, $i;
6419     } elsif ($token->{tag_name} eq 'p') {
6420     ## As if <p>, then reprocess the current token
6421     my $el;
6422 wakaba 1.53
6423 wakaba 1.54 $el = $self->{document}->create_element_ns
6424     (q<http://www.w3.org/1999/xhtml>, [undef, 'p']);
6425    
6426     $insert->($el);
6427     }
6428     $clear_up_to_marker->()
6429     if {
6430     button => 1, marquee => 1, object => 1,
6431     }->{$token->{tag_name}};
6432     $token = $self->_get_next_token;
6433 wakaba 1.55 redo B;
6434 wakaba 1.54 } elsif ($token->{tag_name} eq 'form') {
6435     ## has an element in scope
6436     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6437     my $node = $self->{open_elements}->[$_];
6438     if ($node->[1] eq $token->{tag_name}) {
6439     ## generate implied end tags
6440     if ({
6441     dd => 1, dt => 1, li => 1, p => 1,
6442     td => 1, th => 1, tr => 1,
6443     tbody => 1, tfoot=> 1, thead => 1,
6444     }->{$self->{open_elements}->[-1]->[1]}) {
6445     unshift @{$self->{token}}, $token;
6446 wakaba 1.57 $token = {type => END_TAG_TOKEN,
6447 wakaba 1.54 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
6448 wakaba 1.55 redo B;
6449 wakaba 1.54 }
6450     last INSCOPE;
6451     } elsif ({
6452     table => 1, caption => 1, td => 1, th => 1,
6453     button => 1, marquee => 1, object => 1, html => 1,
6454     }->{$node->[1]}) {
6455     last INSCOPE;
6456 wakaba 1.36 }
6457 wakaba 1.54 } # INSCOPE
6458    
6459     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
6460     pop @{$self->{open_elements}};
6461     } else {
6462 wakaba 1.60 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6463 wakaba 1.36 }
6464    
6465 wakaba 1.54 undef $self->{form_element};
6466     $token = $self->_get_next_token;
6467 wakaba 1.55 redo B;
6468 wakaba 1.54 } elsif ({
6469     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6470     }->{$token->{tag_name}}) {
6471     ## has an element in scope
6472     my $i;
6473     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6474     my $node = $self->{open_elements}->[$_];
6475     if ({
6476     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6477     }->{$node->[1]}) {
6478     ## generate implied end tags
6479     if ({
6480     dd => 1, dt => 1, li => 1, p => 1,
6481     td => 1, th => 1, tr => 1,
6482     tbody => 1, tfoot=> 1, thead => 1,
6483     }->{$self->{open_elements}->[-1]->[1]}) {
6484     unshift @{$self->{token}}, $token;
6485 wakaba 1.57 $token = {type => END_TAG_TOKEN,
6486 wakaba 1.54 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
6487 wakaba 1.55 redo B;
6488 wakaba 1.54 }
6489     $i = $_;
6490     last INSCOPE;
6491     } elsif ({
6492     table => 1, caption => 1, td => 1, th => 1,
6493     button => 1, marquee => 1, object => 1, html => 1,
6494     }->{$node->[1]}) {
6495     last INSCOPE;
6496 wakaba 1.53 }
6497 wakaba 1.54 } # INSCOPE
6498    
6499     if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
6500 wakaba 1.60 $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6501 wakaba 1.53 }
6502    
6503 wakaba 1.54 splice @{$self->{open_elements}}, $i if defined $i;
6504     $token = $self->_get_next_token;
6505 wakaba 1.55 redo B;
6506 wakaba 1.54 } elsif ({
6507     a => 1,
6508     b => 1, big => 1, em => 1, font => 1, i => 1,
6509     nobr => 1, s => 1, small => 1, strile => 1,
6510     strong => 1, tt => 1, u => 1,
6511     }->{$token->{tag_name}}) {
6512     $formatting_end_tag->($token->{tag_name});
6513 wakaba 1.55 redo B;
6514 wakaba 1.54 } elsif ($token->{tag_name} eq 'br') {
6515     $self->{parse_error}-> (type => 'unmatched end tag:br');
6516 wakaba 1.53
6517 wakaba 1.54 ## As if <br>
6518     $reconstruct_active_formatting_elements->($insert_to_current);
6519    
6520     my $el;
6521    
6522 wakaba 1.1 $el = $self->{document}->create_element_ns
6523 wakaba 1.54 (q<http://www.w3.org/1999/xhtml>, [undef, 'br']);
6524 wakaba 1.1
6525 wakaba 1.54 $insert->($el);
6526    
6527     ## Ignore the token.
6528     $token = $self->_get_next_token;
6529 wakaba 1.55 redo B;
6530 wakaba 1.54 } elsif ({
6531     caption => 1, col => 1, colgroup => 1, frame => 1,
6532     frameset => 1, head => 1, option => 1, optgroup => 1,
6533     tbody => 1, td => 1, tfoot => 1, th => 1,
6534     thead => 1, tr => 1,
6535     area => 1, basefont => 1, bgsound => 1,
6536     embed => 1, hr => 1, iframe => 1, image => 1,
6537     img => 1, input => 1, isindex => 1, noembed => 1,
6538     noframes => 1, param => 1, select => 1, spacer => 1,
6539     table => 1, textarea => 1, wbr => 1,
6540     noscript => 0, ## TODO: if scripting is enabled
6541     }->{$token->{tag_name}}) {
6542     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6543     ## Ignore the token
6544     $token = $self->_get_next_token;
6545 wakaba 1.55 redo B;
6546 wakaba 1.54
6547     ## ISSUE: Issue on HTML5 new elements in spec
6548    
6549     } else {
6550     ## Step 1
6551     my $node_i = -1;
6552     my $node = $self->{open_elements}->[$node_i];
6553    
6554     ## Step 2
6555     S2: {
6556     if ($node->[1] eq $token->{tag_name}) {
6557     ## Step 1
6558     ## generate implied end tags
6559     if ({
6560     dd => 1, dt => 1, li => 1, p => 1,
6561     td => 1, th => 1, tr => 1,
6562 wakaba 1.57 tbody => 1, tfoot => 1, thead => 1,
6563 wakaba 1.54 }->{$self->{open_elements}->[-1]->[1]}) {
6564     unshift @{$self->{token}}, $token;
6565 wakaba 1.57 $token = {type => END_TAG_TOKEN,
6566 wakaba 1.54 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
6567 wakaba 1.55 redo B;
6568 wakaba 1.54 }
6569    
6570     ## Step 2
6571     if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
6572 wakaba 1.60 ## NOTE: <x><y></x>
6573 wakaba 1.54 $self->{parse_error}-> (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
6574     }
6575    
6576     ## Step 3
6577     splice @{$self->{open_elements}}, $node_i;
6578 wakaba 1.53
6579 wakaba 1.36 $token = $self->_get_next_token;
6580 wakaba 1.54 last S2;
6581 wakaba 1.1 } else {
6582 wakaba 1.54 ## Step 3
6583     if (not $formatting_category->{$node->[1]} and
6584     #not $phrasing_category->{$node->[1]} and
6585     ($special_category->{$node->[1]} or
6586     $scoping_category->{$node->[1]})) {
6587     $self->{parse_error}-> (type => 'unmatched end tag:'.$token->{tag_name});
6588     ## Ignore the token
6589     $token = $self->_get_next_token;
6590     last S2;
6591     }
6592 wakaba 1.1 }
6593 wakaba 1.54
6594     ## Step 4
6595     $node_i--;
6596     $node = $self->{open_elements}->[$node_i];
6597    
6598     ## Step 5;
6599     redo S2;
6600     } # S2
6601 wakaba 1.55 redo B;
6602 wakaba 1.1 }
6603     }
6604 wakaba 1.54 redo B;
6605 wakaba 1.1 } # B
6606    
6607 wakaba 1.53 ## NOTE: The "trailing end" phase in HTML5 is split into
6608     ## two insertion modes: "after html body" and "after html frameset".
6609     ## NOTE: States in the main stage is preserved while
6610     ## the parser stays in the trailing end phase. # MUST
6611    
6612 wakaba 1.1 ## Stop parsing # MUST
6613    
6614     ## TODO: script stuffs
6615 wakaba 1.3 } # _tree_construct_main
6616    
6617     sub set_inner_html ($$$) {
6618     my $class = shift;
6619     my $node = shift;
6620     my $s = \$_[0];
6621     my $onerror = $_[1];
6622    
6623     my $nt = $node->node_type;
6624     if ($nt == 9) {
6625     # MUST
6626    
6627     ## Step 1 # MUST
6628     ## TODO: If the document has an active parser, ...
6629     ## ISSUE: There is an issue in the spec.
6630    
6631     ## Step 2 # MUST
6632     my @cn = @{$node->child_nodes};
6633     for (@cn) {
6634     $node->remove_child ($_);
6635     }
6636    
6637     ## Step 3, 4, 5 # MUST
6638     $class->parse_string ($$s => $node, $onerror);
6639     } elsif ($nt == 1) {
6640     ## TODO: If non-html element
6641    
6642     ## NOTE: Most of this code is copied from |parse_string|
6643    
6644     ## Step 1 # MUST
6645 wakaba 1.14 my $this_doc = $node->owner_document;
6646     my $doc = $this_doc->implementation->create_document;
6647 wakaba 1.18 $doc->manakai_is_html (1);
6648 wakaba 1.3 my $p = $class->new;
6649     $p->{document} = $doc;
6650    
6651     ## Step 9 # MUST
6652     my $i = 0;
6653     my $line = 1;
6654     my $column = 0;
6655     $p->{set_next_input_character} = sub {
6656     my $self = shift;
6657 wakaba 1.14
6658     pop @{$self->{prev_input_character}};
6659     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
6660    
6661 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
6662     $self->{next_input_character} = ord substr $$s, $i++, 1;
6663     $column++;
6664 wakaba 1.4
6665     if ($self->{next_input_character} == 0x000A) { # LF
6666     $line++;
6667     $column = 0;
6668     } elsif ($self->{next_input_character} == 0x000D) { # CR
6669 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6670 wakaba 1.3 $self->{next_input_character} = 0x000A; # LF # MUST
6671     $line++;
6672 wakaba 1.4 $column = 0;
6673 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
6674     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6675     } elsif ($self->{next_input_character} == 0x0000) { # NULL
6676 wakaba 1.14 $self->{parse_error}-> (type => 'NULL');
6677 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6678     }
6679     };
6680 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
6681     $p->{next_input_character} = -1;
6682 wakaba 1.3
6683     my $ponerror = $onerror || sub {
6684     my (%opt) = @_;
6685     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
6686     };
6687     $p->{parse_error} = sub {
6688     $ponerror->(@_, line => $line, column => $column);
6689     };
6690    
6691     $p->_initialize_tokenizer;
6692     $p->_initialize_tree_constructor;
6693    
6694     ## Step 2
6695     my $node_ln = $node->local_name;
6696 wakaba 1.41 $p->{content_model} = {
6697     title => RCDATA_CONTENT_MODEL,
6698     textarea => RCDATA_CONTENT_MODEL,
6699     style => CDATA_CONTENT_MODEL,
6700     script => CDATA_CONTENT_MODEL,
6701     xmp => CDATA_CONTENT_MODEL,
6702     iframe => CDATA_CONTENT_MODEL,
6703     noembed => CDATA_CONTENT_MODEL,
6704     noframes => CDATA_CONTENT_MODEL,
6705     noscript => CDATA_CONTENT_MODEL,
6706     plaintext => PLAINTEXT_CONTENT_MODEL,
6707     }->{$node_ln};
6708     $p->{content_model} = PCDATA_CONTENT_MODEL
6709     unless defined $p->{content_model};
6710     ## ISSUE: What is "the name of the element"? local name?
6711 wakaba 1.3
6712     $p->{inner_html_node} = [$node, $node_ln];
6713    
6714     ## Step 4
6715     my $root = $doc->create_element_ns
6716     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6717    
6718     ## Step 5 # MUST
6719     $doc->append_child ($root);
6720    
6721     ## Step 6 # MUST
6722     push @{$p->{open_elements}}, [$root, 'html'];
6723    
6724     undef $p->{head_element};
6725    
6726     ## Step 7 # MUST
6727     $p->_reset_insertion_mode;
6728    
6729     ## Step 8 # MUST
6730     my $anode = $node;
6731     AN: while (defined $anode) {
6732     if ($anode->node_type == 1) {
6733     my $nsuri = $anode->namespace_uri;
6734     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6735     if ($anode->local_name eq 'form') { ## TODO: case?
6736     $p->{form_element} = $anode;
6737     last AN;
6738     }
6739     }
6740     }
6741     $anode = $anode->parent_node;
6742     } # AN
6743    
6744     ## Step 3 # MUST
6745     ## Step 10 # MUST
6746     {
6747     my $self = $p;
6748     $token = $self->_get_next_token;
6749     }
6750     $p->_tree_construction_main;
6751    
6752     ## Step 11 # MUST
6753     my @cn = @{$node->child_nodes};
6754     for (@cn) {
6755     $node->remove_child ($_);
6756     }
6757     ## ISSUE: mutation events? read-only?
6758    
6759     ## Step 12 # MUST
6760     @cn = @{$root->child_nodes};
6761     for (@cn) {
6762 wakaba 1.14 $this_doc->adopt_node ($_);
6763 wakaba 1.3 $node->append_child ($_);
6764     }
6765 wakaba 1.14 ## ISSUE: mutation events?
6766 wakaba 1.3
6767     $p->_terminate_tree_constructor;
6768     } else {
6769     die "$0: |set_inner_html| is not defined for node of type $nt";
6770     }
6771     } # set_inner_html
6772    
6773     } # tree construction stage
6774 wakaba 1.1
6775     sub get_inner_html ($$$) {
6776 wakaba 1.3 my (undef, $node, $on_error) = @_;
6777 wakaba 1.1
6778     ## Step 1
6779     my $s = '';
6780    
6781     my $in_cdata;
6782     my $parent = $node;
6783     while (defined $parent) {
6784     if ($parent->node_type == 1 and
6785     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
6786     {
6787     style => 1, script => 1, xmp => 1, iframe => 1,
6788     noembed => 1, noframes => 1, noscript => 1,
6789     }->{$parent->local_name}) { ## TODO: case thingy
6790     $in_cdata = 1;
6791     }
6792     $parent = $parent->parent_node;
6793     }
6794    
6795     ## Step 2
6796     my @node = @{$node->child_nodes};
6797     C: while (@node) {
6798     my $child = shift @node;
6799     unless (ref $child) {
6800     if ($child eq 'cdata-out') {
6801     $in_cdata = 0;
6802     } else {
6803     $s .= $child; # end tag
6804     }
6805     next C;
6806     }
6807    
6808     my $nt = $child->node_type;
6809     if ($nt == 1) { # Element
6810 wakaba 1.27 my $tag_name = $child->tag_name; ## TODO: manakai_tag_name
6811 wakaba 1.1 $s .= '<' . $tag_name;
6812 wakaba 1.27 ## NOTE: Non-HTML case:
6813     ## <http://permalink.gmane.org/gmane.org.w3c.whatwg.discuss/11191>
6814 wakaba 1.1
6815     my @attrs = @{$child->attributes}; # sort order MUST be stable
6816     for my $attr (@attrs) { # order is implementation dependent
6817 wakaba 1.27 my $attr_name = $attr->name; ## TODO: manakai_name
6818 wakaba 1.1 $s .= ' ' . $attr_name . '="';
6819     my $attr_value = $attr->value;
6820     ## escape
6821     $attr_value =~ s/&/&amp;/g;
6822     $attr_value =~ s/</&lt;/g;
6823     $attr_value =~ s/>/&gt;/g;
6824     $attr_value =~ s/"/&quot;/g;
6825     $s .= $attr_value . '"';
6826     }
6827     $s .= '>';
6828    
6829     next C if {
6830     area => 1, base => 1, basefont => 1, bgsound => 1,
6831     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
6832     img => 1, input => 1, link => 1, meta => 1, param => 1,
6833     spacer => 1, wbr => 1,
6834     }->{$tag_name};
6835    
6836 wakaba 1.23 $s .= "\x0A" if $tag_name eq 'pre' or $tag_name eq 'textarea';
6837    
6838 wakaba 1.1 if (not $in_cdata and {
6839     style => 1, script => 1, xmp => 1, iframe => 1,
6840     noembed => 1, noframes => 1, noscript => 1,
6841 wakaba 1.26 plaintext => 1,
6842 wakaba 1.1 }->{$tag_name}) {
6843     unshift @node, 'cdata-out';
6844     $in_cdata = 1;
6845     }
6846    
6847     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
6848     } elsif ($nt == 3 or $nt == 4) {
6849     if ($in_cdata) {
6850     $s .= $child->data;
6851     } else {
6852     my $value = $child->data;
6853     $value =~ s/&/&amp;/g;
6854     $value =~ s/</&lt;/g;
6855     $value =~ s/>/&gt;/g;
6856     $value =~ s/"/&quot;/g;
6857     $s .= $value;
6858     }
6859     } elsif ($nt == 8) {
6860     $s .= '<!--' . $child->data . '-->';
6861     } elsif ($nt == 10) {
6862     $s .= '<!DOCTYPE ' . $child->name . '>';
6863     } elsif ($nt == 5) { # entrefs
6864     push @node, @{$child->child_nodes};
6865     } else {
6866     $on_error->($child) if defined $on_error;
6867     }
6868     ## ISSUE: This code does not support PIs.
6869     } # C
6870    
6871     ## Step 3
6872     return \$s;
6873     } # get_inner_html
6874    
6875     1;
6876 wakaba 1.61 # $Date: 2007/10/14 09:21:46 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24