/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.40 - (hide annotations) (download)
Sat Feb 23 10:35:00 2008 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.39: +1147 -1569 lines
++ whatpm/t/ChangeLog	23 Feb 2008 10:34:52 -0000
2008-02-23  Wakaba  <wakaba@suika.fam.cx>

	* content-model-1.dat, content-model-2.dat: Wrong "significant"
	errors are removed.

++ whatpm/Whatpm/ChangeLog	23 Feb 2008 10:33:46 -0000
2008-02-23  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm (check_element): The way to traverse
	the tree is entirely revised to make it easier to track
	the state of ancestors/descendants.  As a result of this
	revision (which rewrites almost all of Whatpm::ContentChecker::HTML),
	support for content model checking for HTML elements |figure|,
	|object|, |video|, and |audio| and checking for XML elements (and
	some XMLNS checkings) are dropped for now.  They will be
	reimplemented in due cource.

++ whatpm/Whatpm/ContentChecker/ChangeLog	23 Feb 2008 10:34:01 -0000
2008-02-23  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Revised.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.29 ## December 2007 HTML5 Classification
8    
9     my $HTMLMetadataContent = {
10     $HTML_NS => {
11     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
12     'event-source' => 1, command => 1, datatemplate => 1,
13     ## NOTE: A |meta| with no |name| element is not allowed as
14     ## a metadata content other than |head| element.
15     meta => 1,
16     },
17     ## NOTE: RDF is mentioned in the HTML5 spec.
18     ## TODO: Other RDF elements?
19     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
20     };
21    
22     my $HTMLProseContent = {
23     $HTML_NS => {
24     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
25     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
26     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
27     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
28     details => 1, ## ISSUE: "Prose element" in spec.
29     datagrid => 1, ## ISSUE: "Prose element" in spec.
30     datatemplate => 1,
31     div => 1, ## ISSUE: No category in spec.
32     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
33     ## Additionally, it must be before any other element or
34     ## non-inter-element-whitespace text node.
35     style => 1,
36    
37 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
38 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
39     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
40     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
41     command => 1, font => 1,
42     a => 1,
43     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
44     ## NOTE: |area| is allowed only as a descendant of |map|.
45     area => 1,
46    
47     ins => 1, del => 1,
48    
49     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
50     menu => 1,
51    
52     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
53     canvas => 1,
54     },
55    
56     ## NOTE: Embedded
57     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
58     q<http://www.w3.org/2000/svg> => {svg => 1},
59     };
60    
61     my $HTMLSectioningContent = {
62     $HTML_NS => {
63     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
64     ## NOTE: |body| is only allowed in |html| element.
65     body => 1,
66     },
67     };
68    
69     my $HTMLHeadingContent = {
70     $HTML_NS => {
71     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
72     },
73     };
74    
75     my $HTMLPhrasingContent = {
76     ## NOTE: All phrasing content is also prose content.
77     $HTML_NS => {
78 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
79 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
80     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
81     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
82     command => 1, font => 1,
83     a => 1,
84     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
85     ## NOTE: |area| is allowed only as a descendant of |map|.
86     area => 1,
87    
88     ## NOTE: Transparent.
89     ins => 1, del => 1,
90    
91     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
92     menu => 1,
93    
94     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
95     canvas => 1,
96     },
97    
98     ## NOTE: Embedded
99     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
100     q<http://www.w3.org/2000/svg> => {svg => 1},
101    
102     ## NOTE: And non-inter-element-whitespace text nodes.
103     };
104    
105 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
106 wakaba 1.29
107     my $HTMLInteractiveContent = {
108     $HTML_NS => {
109     a => 1,
110 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
111 wakaba 1.29 },
112     };
113    
114 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
115     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
116    
117     ## -- Common attribute syntacx checkers
118    
119 wakaba 1.1 our $AttrChecker;
120    
121     my $GetHTMLEnumeratedAttrChecker = sub {
122     my $states = shift; # {value => conforming ? 1 : -1}
123     return sub {
124     my ($self, $attr) = @_;
125     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
126     if ($states->{$value} > 0) {
127     #
128     } elsif ($states->{$value}) {
129     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
130     } else {
131     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
132     }
133     };
134     }; # $GetHTMLEnumeratedAttrChecker
135    
136     my $GetHTMLBooleanAttrChecker = sub {
137     my $local_name = shift;
138     return sub {
139     my ($self, $attr) = @_;
140     my $value = $attr->value;
141     unless ($value eq $local_name or $value eq '') {
142     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
143     }
144     };
145     }; # $GetHTMLBooleanAttrChecker
146    
147 wakaba 1.8 ## Unordered set of space-separated tokens
148 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
149 wakaba 1.8 my ($self, $attr) = @_;
150     my %word;
151     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
152     unless ($word{$word}) {
153     $word{$word} = 1;
154     } else {
155     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
156     }
157     }
158 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
159 wakaba 1.8
160 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
161     ## whose allowed values are defined by the section on link types)
162     my $HTMLLinkTypesAttrChecker = sub {
163 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
164 wakaba 1.1 my %word;
165     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
166     unless ($word{$word}) {
167     $word{$word} = 1;
168 wakaba 1.18 } elsif ($word eq 'up') {
169     #
170 wakaba 1.1 } else {
171     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
172     }
173     }
174     ## NOTE: Case sensitive match (since HTML5 spec does not say link
175     ## types are case-insensitive and it says "The value should not
176     ## be confusingly similar to any other defined value (e.g.
177     ## differing only in case).").
178     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
179     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
180     ## values to be used conformingly.
181     require Whatpm::_LinkTypeList;
182     our $LinkType;
183     for my $word (keys %word) {
184     my $def = $LinkType->{$word};
185     if (defined $def) {
186     if ($def->{status} eq 'accepted') {
187     if (defined $def->{effect}->[$a_or_area]) {
188     #
189     } else {
190     $self->{onerror}->(node => $attr,
191     type => 'link type:bad context:'.$word);
192     }
193     } elsif ($def->{status} eq 'proposal') {
194     $self->{onerror}->(node => $attr, level => 's',
195     type => 'link type:proposed:'.$word);
196 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
197     #
198     } else {
199     $self->{onerror}->(node => $attr,
200     type => 'link type:bad context:'.$word);
201     }
202 wakaba 1.1 } else { # rejected or synonym
203     $self->{onerror}->(node => $attr,
204     type => 'link type:non-conforming:'.$word);
205     }
206 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
207     if ($word eq 'alternate') {
208     #
209     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
210     $todo->{has_hyperlink_link_type} = 1;
211     }
212     }
213 wakaba 1.1 if ($def->{unique}) {
214     unless ($self->{has_link_type}->{$word}) {
215     $self->{has_link_type}->{$word} = 1;
216     } else {
217     $self->{onerror}->(node => $attr,
218     type => 'link type:duplicate:'.$word);
219     }
220     }
221     } else {
222     $self->{onerror}->(node => $attr, level => 'unsupported',
223     type => 'link type:'.$word);
224     }
225     }
226 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
227     if $word{alternate} and not $word{stylesheet};
228 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
229     ## says that using both X-Pingback: header field and HTML
230     ## <link rel=pingback> is deprecated and if both appears they
231     ## SHOULD contain exactly the same value.
232     ## ISSUE: Pingback 1.0 specification defines the exact representation
233     ## of its link element, which cannot be tested by the current arch.
234     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
235     ## include any string that matches to the pattern for the rel=pingback link,
236     ## which again inpossible to test.
237     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
238 wakaba 1.12
239     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
240 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
241     ## then they SHOULD be described in different paragraphs.".
242 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
243 wakaba 1.20
244     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
245 wakaba 1.1
246     ## URI (or IRI)
247     my $HTMLURIAttrChecker = sub {
248     my ($self, $attr) = @_;
249     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
250     my $value = $attr->value;
251     Whatpm::URIChecker->check_iri_reference ($value, sub {
252     my %opt = @_;
253     $self->{onerror}->(node => $attr, level => $opt{level},
254     type => 'URI::'.$opt{type}.
255     (defined $opt{position} ? ':'.$opt{position} : ''));
256     });
257 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
258 wakaba 1.1 }; # $HTMLURIAttrChecker
259    
260     ## A space separated list of one or more URIs (or IRIs)
261     my $HTMLSpaceURIsAttrChecker = sub {
262     my ($self, $attr) = @_;
263     my $i = 0;
264     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
265     Whatpm::URIChecker->check_iri_reference ($value, sub {
266     my %opt = @_;
267     $self->{onerror}->(node => $attr, level => $opt{level},
268 wakaba 1.2 type => 'URIs:'.':'.
269     $opt{type}.':'.$i.
270 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
271     });
272     $i++;
273     }
274     ## ISSUE: Relative references?
275     ## ISSUE: Leading or trailing white spaces are conformant?
276     ## ISSUE: A sequence of white space characters are conformant?
277     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
278     ## NOTE: Duplication seems not an error.
279 wakaba 1.4 $self->{has_uri_attr} = 1;
280 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
281    
282     my $HTMLDatetimeAttrChecker = sub {
283     my ($self, $attr) = @_;
284     my $value = $attr->value;
285     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
286     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
287     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
288     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
289     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
290     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
291     if $d < 1 or
292     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
293     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
294     if $M == 2 and $d == 29 and
295     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
296     } else {
297     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
298     }
299     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
300     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
301     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
302     if defined $s and $s > 59;
303     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
304     if $zh > 23;
305     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
306     if $zm > 59;
307     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
308     } else {
309     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
310     }
311     }; # $HTMLDatetimeAttrChecker
312    
313     my $HTMLIntegerAttrChecker = sub {
314     my ($self, $attr) = @_;
315     my $value = $attr->value;
316     unless ($value =~ /\A-?[0-9]+\z/) {
317     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
318     }
319     }; # $HTMLIntegerAttrChecker
320    
321     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
322     my $range_check = shift;
323     return sub {
324     my ($self, $attr) = @_;
325     my $value = $attr->value;
326     if ($value =~ /\A[0-9]+\z/) {
327     unless ($range_check->($value + 0)) {
328     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
329     }
330     } else {
331     $self->{onerror}->(node => $attr,
332     type => 'nninteger:syntax error');
333     }
334     };
335     }; # $GetHTMLNonNegativeIntegerAttrChecker
336    
337     my $GetHTMLFloatingPointNumberAttrChecker = sub {
338     my $range_check = shift;
339     return sub {
340     my ($self, $attr) = @_;
341     my $value = $attr->value;
342     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
343     unless ($range_check->($value + 0)) {
344     $self->{onerror}->(node => $attr, type => 'float:out of range');
345     }
346     } else {
347     $self->{onerror}->(node => $attr,
348     type => 'float:syntax error');
349     }
350     };
351     }; # $GetHTMLFloatingPointNumberAttrChecker
352    
353     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
354     ## ISSUE: RFC 2046 does not define syntax of media types.
355     ## ISSUE: The definition of "a valid MIME type" is unknown.
356     ## Syntactical correctness?
357     my $HTMLIMTAttrChecker = sub {
358     my ($self, $attr) = @_;
359     my $value = $attr->value;
360     ## ISSUE: RFC 2045 Content-Type header field allows insertion
361     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
362     ## ISSUE: RFC 2231 extension? Maybe no.
363     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
364     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
365     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
366     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
367     my @type = ($1, $2);
368     my $param = $3;
369     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
370     if (defined $2) {
371     push @type, $1 => $2;
372     } else {
373     my $n = $1;
374     my $v = $2;
375     $v =~ s/\\(.)/$1/gs;
376     push @type, $n => $v;
377     }
378     }
379     require Whatpm::IMTChecker;
380     Whatpm::IMTChecker->check_imt (sub {
381     my %opt = @_;
382     $self->{onerror}->(node => $attr, level => $opt{level},
383     type => 'IMT:'.$opt{type});
384     }, @type);
385     } else {
386     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
387     }
388     }; # $HTMLIMTAttrChecker
389    
390     my $HTMLLanguageTagAttrChecker = sub {
391 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
392    
393 wakaba 1.1 my ($self, $attr) = @_;
394 wakaba 1.6 my $value = $attr->value;
395     require Whatpm::LangTag;
396     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
397     my %opt = @_;
398     my $type = 'LangTag:'.$opt{type};
399     $type .= ':' . $opt{subtag} if defined $opt{subtag};
400     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
401     level => $opt{level});
402     });
403 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
404 wakaba 1.6
405     ## TODO: testdata
406 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
407    
408     ## "A valid media query [MQ]"
409     my $HTMLMQAttrChecker = sub {
410     my ($self, $attr) = @_;
411     $self->{onerror}->(node => $attr, level => 'unsupported',
412     type => 'media query');
413     ## ISSUE: What is "a valid media query"?
414     }; # $HTMLMQAttrChecker
415    
416     my $HTMLEventHandlerAttrChecker = sub {
417     my ($self, $attr) = @_;
418     $self->{onerror}->(node => $attr, level => 'unsupported',
419     type => 'event handler');
420     ## TODO: MUST contain valid ECMAScript code matching the
421     ## ECMAScript |FunctionBody| production. [ECMA262]
422     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
423     ## ISSUE: Automatic semicolon insertion does not apply?
424     ## ISSUE: Other script languages?
425     }; # $HTMLEventHandlerAttrChecker
426    
427     my $HTMLUsemapAttrChecker = sub {
428     my ($self, $attr) = @_;
429     ## MUST be a valid hashed ID reference to a |map| element
430     my $value = $attr->value;
431     if ($value =~ s/^#//) {
432     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
433     push @{$self->{usemap}}, [$value => $attr];
434     } else {
435     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
436     }
437     ## NOTE: Space characters in hashed ID references are conforming.
438     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
439     }; # $HTMLUsemapAttrChecker
440    
441     my $HTMLTargetAttrChecker = sub {
442     my ($self, $attr) = @_;
443     my $value = $attr->value;
444     if ($value =~ /^_/) {
445     $value = lc $value; ## ISSUE: ASCII case-insentitive?
446     unless ({
447     _self => 1, _parent => 1, _top => 1,
448     }->{$value}) {
449     $self->{onerror}->(node => $attr,
450     type => 'reserved browsing context name');
451     }
452     } else {
453 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
454 wakaba 1.1 }
455     }; # $HTMLTargetAttrChecker
456    
457 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
458     my ($self, $attr) = @_;
459    
460     ## ISSUE: Namespace resolution?
461    
462     my $value = $attr->value;
463    
464     require Whatpm::CSS::SelectorsParser;
465     my $p = Whatpm::CSS::SelectorsParser->new;
466     $p->{pseudo_class}->{$_} = 1 for qw/
467     active checked disabled empty enabled first-child first-of-type
468     focus hover indeterminate last-child last-of-type link only-child
469     only-of-type root target visited
470     lang nth-child nth-last-child nth-of-type nth-last-of-type not
471     -manakai-contains -manakai-current
472     /;
473    
474     $p->{pseudo_element}->{$_} = 1 for qw/
475     after before first-letter first-line
476     /;
477    
478     $p->{must_level} = $self->{must_level};
479     $p->{onerror} = sub {
480     my %opt = @_;
481     $opt{type} = 'selectors:'.$opt{type};
482     $self->{onerror}->(%opt, node => $attr);
483     };
484     $p->parse_string ($value);
485     }; # $HTMLSelectorsAttrChecker
486    
487 wakaba 1.1 my $HTMLAttrChecker = {
488     id => sub {
489     ## NOTE: |map| has its own variant of |id=""| checker
490     my ($self, $attr) = @_;
491     my $value = $attr->value;
492     if (length $value > 0) {
493     if ($self->{id}->{$value}) {
494     $self->{onerror}->(node => $attr, type => 'duplicate ID');
495     push @{$self->{id}->{$value}}, $attr;
496     } else {
497     $self->{id}->{$value} = [$attr];
498     }
499     if ($value =~ /[\x09-\x0D\x20]/) {
500     $self->{onerror}->(node => $attr, type => 'space in ID');
501     }
502     } else {
503     ## NOTE: MUST contain at least one character
504     $self->{onerror}->(node => $attr, type => 'empty attribute value');
505     }
506     },
507     title => sub {}, ## NOTE: No conformance creteria
508     lang => sub {
509     my ($self, $attr) = @_;
510 wakaba 1.6 my $value = $attr->value;
511     if ($value eq '') {
512     #
513     } else {
514     require Whatpm::LangTag;
515     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
516     my %opt = @_;
517     my $type = 'LangTag:'.$opt{type};
518     $type .= ':' . $opt{subtag} if defined $opt{subtag};
519     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
520     level => $opt{level});
521     });
522     }
523 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
524     unless ($attr->owner_document->manakai_is_html) {
525     $self->{onerror}->(node => $attr, type => 'in XML:lang');
526     }
527 wakaba 1.6
528     ## TODO: test data
529 wakaba 1.1 },
530     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
531     class => sub {
532     my ($self, $attr) = @_;
533     my %word;
534     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
535     unless ($word{$word}) {
536     $word{$word} = 1;
537     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
538     } else {
539     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
540     }
541     }
542     },
543     contextmenu => sub {
544     my ($self, $attr) = @_;
545     my $value = $attr->value;
546     push @{$self->{contextmenu}}, [$value => $attr];
547     ## ISSUE: "The value must be the ID of a menu element in the DOM."
548     ## What is "in the DOM"? A menu Element node that is not part
549     ## of the Document tree is in the DOM? A menu Element node that
550     ## belong to another Document tree is in the DOM?
551     },
552     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
553 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
554     ## TODO: ref, template, registrationmark
555 wakaba 1.1 };
556    
557     for (qw/
558     onabort onbeforeunload onblur onchange onclick oncontextmenu
559     ondblclick ondrag ondragend ondragenter ondragleave ondragover
560     ondragstart ondrop onerror onfocus onkeydown onkeypress
561     onkeyup onload onmessage onmousedown onmousemove onmouseout
562     onmouseover onmouseup onmousewheel onresize onscroll onselect
563     onsubmit onunload
564     /) {
565     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
566     }
567    
568     my $GetHTMLAttrsChecker = sub {
569     my $element_specific_checker = shift;
570     return sub {
571 wakaba 1.40 my ($self, $item, $element_state) = @_;
572     for my $attr (@{$item->{node}->attributes}) {
573 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
574     $attr_ns = '' unless defined $attr_ns;
575     my $attr_ln = $attr->manakai_local_name;
576     my $checker;
577     if ($attr_ns eq '') {
578     $checker = $element_specific_checker->{$attr_ln}
579 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
580 wakaba 1.1 }
581     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
582 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
583 wakaba 1.1 if ($checker) {
584 wakaba 1.40 $checker->($self, $attr, $item);
585 wakaba 1.1 } else {
586     $self->{onerror}->(node => $attr, level => 'unsupported',
587     type => 'attribute');
588     ## ISSUE: No comformance createria for unknown attributes in the spec
589     }
590     }
591     };
592     }; # $GetHTMLAttrsChecker
593    
594 wakaba 1.40 my %HTMLChecker = (
595     %Whatpm::ContentChecker::AnyChecker,
596     check_attrs => $GetHTMLAttrsChecker->({}),
597     );
598    
599     my %HTMLEmptyChecker = (
600     %HTMLChecker,
601     check_child_element => sub {
602     my ($self, $item, $child_el, $child_nsuri, $child_ln,
603     $child_is_transparent, $element_state) = @_;
604     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
605     $self->{onerror}->(node => $child_el,
606     type => 'element not allowed:minus',
607     level => $self->{must_level});
608     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
609     #
610     } else {
611     $self->{onerror}->(node => $child_el,
612     type => 'element not allowed:empty',
613     level => $self->{must_level});
614     }
615     },
616     check_child_text => sub {
617     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
618     if ($has_significant) {
619     $self->{onerror}->(node => $child_node,
620     type => 'character not allowed:empty',
621     level => $self->{must_level});
622     }
623     },
624     );
625    
626     my %HTMLTextChecker = (
627     %HTMLChecker,
628     check_child_element => sub {
629     my ($self, $item, $child_el, $child_nsuri, $child_ln,
630     $child_is_transparent, $element_state) = @_;
631     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
632     $self->{onerror}->(node => $child_el,
633     type => 'element not allowed:minus',
634     level => $self->{must_level});
635     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
636     #
637     } else {
638     $self->{onerror}->(node => $child_el, type => 'element not allowed');
639     }
640     },
641     );
642    
643     my %HTMLProseContentChecker = (
644     %HTMLChecker,
645     check_child_element => sub {
646     my ($self, $item, $child_el, $child_nsuri, $child_ln,
647     $child_is_transparent, $element_state) = @_;
648     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
649     $self->{onerror}->(node => $child_el,
650     type => 'element not allowed:minus',
651     level => $self->{must_level});
652     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
653     #
654     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
655     if ($element_state->{has_non_style} or
656     not $child_el->has_attribute_ns (undef, 'scoped')) {
657     $self->{onerror}->(node => $child_el,
658     type => 'element not allowed:prose style',
659     level => $self->{must_level});
660     }
661     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
662     $element_state->{has_non_style} = 1;
663     } else {
664     $element_state->{has_non_style} = 1;
665     $self->{onerror}->(node => $child_el,
666     type => 'element not allowed:prose',
667     level => $self->{must_level})
668     }
669     },
670     check_child_text => sub {
671     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
672     if ($has_significant) {
673     $element_state->{has_non_style} = 1;
674     }
675     },
676     check_end => sub {
677     my ($self, $item, $element_state) = @_;
678     if ($element_state->{has_significant}) {
679     $item->{parent_state}->{has_significant} = 1;
680     } elsif ($item->{transparent}) {
681     #
682     } else {
683     $self->{onerror}->(node => $item->{node},
684     level => $self->{should_level},
685     type => 'no significant content');
686     }
687     },
688     );
689    
690     my %HTMLPhrasingContentChecker = (
691     %HTMLChecker,
692     check_child_element => sub {
693     my ($self, $item, $child_el, $child_nsuri, $child_ln,
694     $child_is_transparent, $element_state) = @_;
695     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
696     $self->{onerror}->(node => $child_el,
697     type => 'element not allowed:minus',
698     level => $self->{must_level});
699     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
700     #
701     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
702     #
703     } else {
704     $self->{onerror}->(node => $child_el,
705     type => 'element not allowed:phrasing',
706     level => $self->{must_level});
707     }
708     },
709     check_end => $HTMLProseContentChecker{check_end},
710     ## NOTE: The definition for |li| assumes that the only differences
711     ## between prose and phrasing content checkers are |check_child_element|
712     ## and |check_child_text|.
713     );
714    
715     my %HTMLTransparentChecker = %HTMLProseContentChecker;
716     ## ISSUE: Significant content rule should be applied to transparent element
717     ## with parent? Currently, applied to |video| but not to others.
718    
719 wakaba 1.1 our $Element;
720     our $ElementDefault;
721    
722     $Element->{$HTML_NS}->{''} = {
723 wakaba 1.40 %HTMLChecker,
724     check_start => $ElementDefault->{check_start},
725 wakaba 1.1 };
726    
727     $Element->{$HTML_NS}->{html} = {
728     is_root => 1,
729 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
730 wakaba 1.16 manifest => $HTMLURIAttrChecker,
731 wakaba 1.1 xmlns => sub {
732     my ($self, $attr) = @_;
733     my $value = $attr->value;
734     unless ($value eq $HTML_NS) {
735     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
736     }
737     unless ($attr->owner_document->manakai_is_html) {
738     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
739     ## TODO: Test
740     }
741     },
742     }),
743 wakaba 1.40 check_start => sub {
744     my ($self, $item, $element_state) = @_;
745     $element_state->{phase} = 'before head';
746     },
747     check_child_element => sub {
748     my ($self, $item, $child_el, $child_nsuri, $child_ln,
749     $child_is_transparent, $element_state) = @_;
750     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
751     $self->{onerror}->(node => $child_el,
752     type => 'element not allowed:minus',
753     level => $self->{must_level});
754     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
755     #
756     } elsif ($element_state->{phase} eq 'before head') {
757     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
758     $element_state->{phase} = 'after head';
759     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
760     $self->{onerror}->(node => $child_el,
761     type => 'ps element missing:head');
762     $element_state->{phase} = 'after body';
763     } else {
764     $self->{onerror}->(node => $child_el,
765     type => 'element not allowed');
766     }
767     } elsif ($element_state->{phase} eq 'after head') {
768     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
769     $element_state->{phase} = 'after body';
770     } else {
771     $self->{onerror}->(node => $child_el,
772     type => 'element not allowed');
773     }
774     } elsif ($element_state->{phase} eq 'after body') {
775     $self->{onerror}->(node => $child_el,
776     type => 'element not allowed');
777     } else {
778     die "check_child_element: Bad |html| phase: $element_state->{phase}";
779     }
780     },
781     check_child_text => sub {
782     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
783     if ($has_significant) {
784     $self->{onerror}->(node => $child_node,
785     type => 'character not allowed');
786     }
787     },
788     check_end => sub {
789     my ($self, $item, $element_state) = @_;
790     if ($element_state->{phase} eq 'after body') {
791     #
792     } elsif ($element_state->{phase} eq 'before head') {
793     $self->{onerror}->(node => $item->{node},
794     type => 'child element missing:head');
795     $self->{onerror}->(node => $item->{node},
796     type => 'child element missing:body');
797     } elsif ($element_state->{phase} eq 'after head') {
798     $self->{onerror}->(node => $item->{node},
799     type => 'child element missing:body');
800     } else {
801     die "check_end: Bad |html| phase: $element_state->{phase}";
802     }
803 wakaba 1.1
804 wakaba 1.40 $HTMLChecker{check_end}->(@_);
805     },
806     };
807 wakaba 1.25
808 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
809     check_attrs => $GetHTMLAttrsChecker->({}),
810     check_child_element => sub {
811     my ($self, $item, $child_el, $child_nsuri, $child_ln,
812     $child_is_transparent, $element_state) = @_;
813     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
814     $self->{onerror}->(node => $child_el,
815     type => 'element not allowed:minus',
816     level => $self->{must_level});
817     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
818     #
819     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
820     unless ($element_state->{has_title}) {
821     $element_state->{has_title} = 1;
822     } else {
823     $self->{onerror}->(node => $child_el,
824     type => 'element not allowed:head title',
825     level => $self->{must_level});
826     }
827     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
828     if ($child_el->has_attribute_ns (undef, 'scoped')) {
829     $self->{onerror}->(node => $child_el,
830     type => 'element not allowed:head style',
831     level => $self->{must_level});
832 wakaba 1.1 }
833 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
834     #
835    
836     ## NOTE: |meta| is a metadata content. However, strictly speaking,
837     ## a |meta| element with none of |charset|, |name|,
838     ## or |http-equiv| attribute is not allowed. It is non-conforming
839     ## anyway.
840     } else {
841     $self->{onerror}->(node => $child_el,
842     type => 'element not allowed:metadata',
843     level => $self->{must_level});
844     }
845     $element_state->{in_head_original} = $self->{flag}->{in_head};
846     $self->{flag}->{in_head} = 1;
847     },
848     check_child_text => sub {
849     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
850     if ($has_significant) {
851     $self->{onerror}->(node => $child_node, type => 'character not allowed');
852 wakaba 1.1 }
853 wakaba 1.40 },
854     check_end => sub {
855     my ($self, $item, $element_state) = @_;
856     unless ($element_state->{has_title}) {
857     $self->{onerror}->(node => $item->{node},
858     type => 'child element missing:title');
859 wakaba 1.1 }
860 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
861 wakaba 1.1
862 wakaba 1.40 $HTMLChecker{check_end}->(@_);
863 wakaba 1.1 },
864     };
865    
866 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
867     %HTMLTextChecker,
868     };
869 wakaba 1.1
870 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
871     %HTMLEmptyChecker,
872     check_attrs => sub {
873     my ($self, $item, $element_state) = @_;
874 wakaba 1.1
875 wakaba 1.40 if ($self->{has_base}) {
876     $self->{onerror}->(node => $item->{node},
877     type => 'element not allowed:base');
878     } else {
879     $self->{has_base} = 1;
880 wakaba 1.29 }
881    
882 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
883     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
884 wakaba 1.14
885     if ($self->{has_uri_attr} and $has_href) {
886 wakaba 1.4 ## ISSUE: Are these examples conforming?
887     ## <head profile="a b c"><base href> (except for |profile|'s
888     ## non-conformance)
889     ## <title xml:base="relative"/><base href/> (maybe it should be)
890     ## <unknown xmlns="relative"/><base href/> (assuming that
891     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
892     ## <style>@import 'relative';</style><base href>
893     ## <script>location.href = 'relative';</script><base href>
894 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
895     ## an exception.
896 wakaba 1.40 $self->{onerror}->(node => $item->{node},
897 wakaba 1.4 type => 'basehref after URI attribute');
898     }
899 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
900 wakaba 1.4 ## ISSUE: Are these examples conforming?
901     ## <head><title xlink:href=""/><base target="name"/></head>
902     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
903     ## (assuming that |xbl:xbl| is allowed before |base|)
904     ## NOTE: These are non-conformant anyway because of |head|'s content model:
905     ## <link href=""/><base target="name"/>
906     ## <link rel=unknown href=""><base target=name>
907 wakaba 1.40 $self->{onerror}->(node => $item->{node},
908 wakaba 1.4 type => 'basetarget after hyperlink');
909     }
910    
911 wakaba 1.14 if (not $has_href and not $has_target) {
912 wakaba 1.40 $self->{onerror}->(node => $item->{node},
913 wakaba 1.14 type => 'attribute missing:href|target');
914     }
915    
916 wakaba 1.4 return $GetHTMLAttrsChecker->({
917     href => $HTMLURIAttrChecker,
918     target => $HTMLTargetAttrChecker,
919 wakaba 1.40 })->($self, $item, $element_state);
920 wakaba 1.4 },
921 wakaba 1.1 };
922    
923     $Element->{$HTML_NS}->{link} = {
924 wakaba 1.40 %HTMLEmptyChecker,
925     check_attrs => sub {
926     my ($self, $item, $element_state) = @_;
927 wakaba 1.1 $GetHTMLAttrsChecker->({
928     href => $HTMLURIAttrChecker,
929 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
930 wakaba 1.1 media => $HTMLMQAttrChecker,
931     hreflang => $HTMLLanguageTagAttrChecker,
932     type => $HTMLIMTAttrChecker,
933     ## NOTE: Though |title| has special semantics,
934     ## syntactically same as the |title| as global attribute.
935 wakaba 1.40 })->($self, $item, $element_state);
936     if ($item->{node}->has_attribute_ns (undef, 'href')) {
937     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
938 wakaba 1.4 } else {
939 wakaba 1.40 $self->{onerror}->(node => $item->{node},
940 wakaba 1.1 type => 'attribute missing:href');
941     }
942 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
943     $self->{onerror}->(node => $item->{node},
944 wakaba 1.1 type => 'attribute missing:rel');
945     }
946     },
947     };
948    
949     $Element->{$HTML_NS}->{meta} = {
950 wakaba 1.40 %HTMLEmptyChecker,
951     check_attrs => sub {
952     my ($self, $item, $element_state) = @_;
953 wakaba 1.1 my $name_attr;
954     my $http_equiv_attr;
955     my $charset_attr;
956     my $content_attr;
957 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
958 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
959     $attr_ns = '' unless defined $attr_ns;
960     my $attr_ln = $attr->manakai_local_name;
961     my $checker;
962     if ($attr_ns eq '') {
963     if ($attr_ln eq 'content') {
964     $content_attr = $attr;
965     $checker = 1;
966     } elsif ($attr_ln eq 'name') {
967     $name_attr = $attr;
968     $checker = 1;
969     } elsif ($attr_ln eq 'http-equiv') {
970     $http_equiv_attr = $attr;
971     $checker = 1;
972     } elsif ($attr_ln eq 'charset') {
973     $charset_attr = $attr;
974     $checker = 1;
975     } else {
976     $checker = $HTMLAttrChecker->{$attr_ln}
977     || $AttrChecker->{$attr_ns}->{$attr_ln}
978     || $AttrChecker->{$attr_ns}->{''};
979     }
980     } else {
981     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
982     || $AttrChecker->{$attr_ns}->{''};
983     }
984     if ($checker) {
985     $checker->($self, $attr) if ref $checker;
986     } else {
987     $self->{onerror}->(node => $attr, level => 'unsupported',
988     type => 'attribute');
989     ## ISSUE: No comformance createria for unknown attributes in the spec
990     }
991     }
992    
993     if (defined $name_attr) {
994     if (defined $http_equiv_attr) {
995     $self->{onerror}->(node => $http_equiv_attr,
996     type => 'attribute not allowed');
997     } elsif (defined $charset_attr) {
998     $self->{onerror}->(node => $charset_attr,
999     type => 'attribute not allowed');
1000     }
1001     my $metadata_name = $name_attr->value;
1002     my $metadata_value;
1003     if (defined $content_attr) {
1004     $metadata_value = $content_attr->value;
1005     } else {
1006 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1007 wakaba 1.1 type => 'attribute missing:content');
1008     $metadata_value = '';
1009     }
1010     } elsif (defined $http_equiv_attr) {
1011     if (defined $charset_attr) {
1012     $self->{onerror}->(node => $charset_attr,
1013     type => 'attribute not allowed');
1014     }
1015     unless (defined $content_attr) {
1016 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1017 wakaba 1.1 type => 'attribute missing:content');
1018     }
1019     } elsif (defined $charset_attr) {
1020     if (defined $content_attr) {
1021     $self->{onerror}->(node => $content_attr,
1022     type => 'attribute not allowed');
1023     }
1024     } else {
1025     if (defined $content_attr) {
1026     $self->{onerror}->(node => $content_attr,
1027     type => 'attribute not allowed');
1028 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1029 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1030     } else {
1031 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1032 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1033     }
1034     }
1035    
1036 wakaba 1.32 my $check_charset_decl = sub () {
1037 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1038 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1039     for my $el (@{$parent->child_nodes}) {
1040     next unless $el->node_type == 1; # ELEMENT_NODE
1041 wakaba 1.40 unless ($el eq $item->{node}) {
1042 wakaba 1.29 ## NOTE: Not the first child element.
1043 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1044 wakaba 1.32 type => 'element not allowed:meta charset',
1045     level => $self->{must_level});
1046 wakaba 1.29 }
1047     last;
1048     ## NOTE: Entity references are not supported.
1049     }
1050     } else {
1051 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1052 wakaba 1.32 type => 'element not allowed:meta charset',
1053     level => $self->{must_level});
1054 wakaba 1.29 }
1055    
1056 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1057     $self->{onerror}->(node => $item->{node},
1058 wakaba 1.32 type => 'in XML:charset',
1059     level => $self->{must_level});
1060 wakaba 1.1 }
1061 wakaba 1.32 }; # $check_charset_decl
1062 wakaba 1.21
1063 wakaba 1.32 my $check_charset = sub ($$) {
1064     my ($attr, $charset_value) = @_;
1065 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1066     ## is not explicitly spelled in the HTML5 spec, the Character Set
1067     ## registry of IANA, which is referenced from HTML5 spec, says that
1068     ## charset name is case-insensitive.
1069     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1070    
1071     require Message::Charset::Info;
1072     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1073 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1074 wakaba 1.21 if (defined $ic) {
1075     ## TODO: Test for this case
1076     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1077     if ($charset ne $ic_charset) {
1078 wakaba 1.32 $self->{onerror}->(node => $attr,
1079 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1080 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1081     level => $self->{must_level});
1082 wakaba 1.21 }
1083     } else {
1084     ## NOTE: MUST, but not checkable, since the document is not originally
1085     ## in serialized form (or the parser does not preserve the input
1086     ## encoding information).
1087 wakaba 1.32 $self->{onerror}->(node => $attr,
1088     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1089 wakaba 1.21 level => 'unsupported');
1090     }
1091    
1092     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1093     ## Syntactically valid and registered? What about x-charset names?
1094     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1095     ($charset_value)) {
1096 wakaba 1.32 $self->{onerror}->(node => $attr,
1097     type => 'charset:syntax error:'.$charset_value, ## TODO
1098     level => $self->{must_level});
1099 wakaba 1.21 }
1100    
1101     if ($charset) {
1102     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1103     ## with no "preferred MIME name" label)?
1104     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1105     if (($charset_status &
1106     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1107     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1108 wakaba 1.32 $self->{onerror}->(node => $attr,
1109 wakaba 1.21 type => 'charset:not preferred:'.
1110 wakaba 1.32 $charset_value, ## TODO
1111     level => $self->{must_level});
1112 wakaba 1.21 }
1113     if (($charset_status &
1114     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1115     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1116     if ($charset_value =~ /^x-/) {
1117 wakaba 1.32 $self->{onerror}->(node => $attr,
1118     type => 'charset:private:'.$charset_value, ## TODO
1119 wakaba 1.21 level => $self->{good_level});
1120     } else {
1121 wakaba 1.32 $self->{onerror}->(node => $attr,
1122 wakaba 1.21 type => 'charset:not registered:'.
1123 wakaba 1.32 $charset_value, ## TODO
1124 wakaba 1.21 level => $self->{good_level});
1125     }
1126     }
1127     } elsif ($charset_value =~ /^x-/) {
1128 wakaba 1.32 $self->{onerror}->(node => $attr,
1129     type => 'charset:private:'.$charset_value, ## TODO
1130 wakaba 1.21 level => $self->{good_level});
1131     } else {
1132 wakaba 1.32 $self->{onerror}->(node => $attr,
1133     type => 'charset:not registered:'.$charset_value, ## TODO
1134 wakaba 1.21 level => $self->{good_level});
1135     }
1136    
1137 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1138     $self->{onerror}->(node => $attr,
1139 wakaba 1.22 type => 'character reference in charset',
1140     level => $self->{must_level});
1141     }
1142 wakaba 1.32 }; # $check_charset
1143    
1144     ## TODO: metadata conformance
1145    
1146     ## TODO: pragma conformance
1147     if (defined $http_equiv_attr) { ## An enumerated attribute
1148     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1149     if ({
1150     'refresh' => 1,
1151     'default-style' => 1,
1152     }->{$keyword}) {
1153     #
1154 wakaba 1.33
1155     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1156 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1157 wakaba 1.33 ## ISSUE: Though it is renamed as "Encoding declaration" state in rev
1158     ## 1221, there are still many occurence of "Content-Type" state in
1159     ## the spec.
1160    
1161 wakaba 1.32 $check_charset_decl->();
1162     if ($content_attr) {
1163     my $content = $content_attr->value;
1164     if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) {
1165     $check_charset->($content_attr, $1);
1166     } else {
1167     $self->{onerror}->(node => $content_attr,
1168     type => 'meta content-type syntax error',
1169     level => $self->{must_level});
1170     }
1171     }
1172     } else {
1173     $self->{onerror}->(node => $http_equiv_attr,
1174     type => 'enumerated:invalid');
1175     }
1176     }
1177    
1178     if (defined $charset_attr) {
1179     $check_charset_decl->();
1180     $check_charset->($charset_attr, $charset_attr->value);
1181 wakaba 1.1 }
1182     },
1183     };
1184    
1185     $Element->{$HTML_NS}->{style} = {
1186 wakaba 1.40 %HTMLChecker,
1187     check_attrs => $GetHTMLAttrsChecker->({
1188 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1189     media => $HTMLMQAttrChecker,
1190     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1191     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1192     ## not different
1193     }),
1194 wakaba 1.40 check_start => sub {
1195     my ($self, $item, $element_state) = @_;
1196    
1197 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1198 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1199 wakaba 1.27 if (not defined $type or
1200     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1201 wakaba 1.40 $element_state->{allow_element} = 0;
1202     $element_state->{style_type} = 'text/css';
1203     } else {
1204     $element_state->{allow_element} = 1; # unknown
1205     $element_state->{style_type} = $type; ## TODO: $type normalization
1206     }
1207     },
1208     check_child_element => sub {
1209     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1210     $child_is_transparent, $element_state) = @_;
1211     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1212     $self->{onerror}->(node => $child_el,
1213     type => 'element not allowed:minus',
1214     level => $self->{must_level});
1215     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1216     #
1217     } elsif ($element_state->{allow_element}) {
1218     #
1219     } else {
1220     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1221     }
1222     },
1223     check_child_text => sub {
1224     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1225     $element_state->{text} .= $child_node->text_content;
1226     },
1227     check_end => sub {
1228     my ($self, $item, $element_state) = @_;
1229     if ($element_state->{style_type} eq 'text/css') {
1230     $self->{onsubdoc}->({s => $element_state->{text},
1231     container_node => $item->{node},
1232 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1233 wakaba 1.27 } else {
1234 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1235     type => 'style:'.$element_state->{style_type});
1236 wakaba 1.27 }
1237 wakaba 1.40
1238     $HTMLChecker{check_end}->(@_);
1239 wakaba 1.1 },
1240     };
1241 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1242 wakaba 1.1
1243     $Element->{$HTML_NS}->{body} = {
1244 wakaba 1.40 %HTMLProseContentChecker,
1245 wakaba 1.1 };
1246    
1247     $Element->{$HTML_NS}->{section} = {
1248 wakaba 1.40 %HTMLProseContentChecker,
1249 wakaba 1.1 };
1250    
1251     $Element->{$HTML_NS}->{nav} = {
1252 wakaba 1.40 %HTMLProseContentChecker,
1253 wakaba 1.1 };
1254    
1255     $Element->{$HTML_NS}->{article} = {
1256 wakaba 1.40 %HTMLProseContentChecker,
1257 wakaba 1.1 };
1258    
1259     $Element->{$HTML_NS}->{blockquote} = {
1260 wakaba 1.40 %HTMLProseContentChecker,
1261     check_attrs => $GetHTMLAttrsChecker->({
1262 wakaba 1.1 cite => $HTMLURIAttrChecker,
1263     }),
1264     };
1265    
1266     $Element->{$HTML_NS}->{aside} = {
1267 wakaba 1.40 %HTMLProseContentChecker,
1268 wakaba 1.1 };
1269    
1270     $Element->{$HTML_NS}->{h1} = {
1271 wakaba 1.40 %HTMLPhrasingContentChecker,
1272     check_start => sub {
1273     my ($self, $item, $element_state) = @_;
1274     $self->{flag}->{has_hn} = 1;
1275 wakaba 1.1 },
1276     };
1277    
1278 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1279 wakaba 1.1
1280 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1281 wakaba 1.1
1282 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1283 wakaba 1.1
1284 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1285 wakaba 1.1
1286 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1287 wakaba 1.1
1288 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1289    
1290 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1291 wakaba 1.40 %HTMLProseContentChecker,
1292     check_start => sub {
1293     my ($self, $item, $element_state) = @_;
1294     $self->_add_minus_elements ($element_state,
1295     {$HTML_NS => {qw/header 1 footer 1/}},
1296     $HTMLSectioningContent);
1297     $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1298     $self->{flag}->{has_hn} = 0;
1299     },
1300     check_end => sub {
1301     my ($self, $item, $element_state) = @_;
1302     $self->_remove_minus_elements ($element_state);
1303     unless ($self->{flag}->{has_hn}) {
1304     $self->{onerror}->(node => $item->{node},
1305     type => 'element missing:hn');
1306     }
1307     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1308 wakaba 1.1
1309 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1310 wakaba 1.1 },
1311 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1312 wakaba 1.1 };
1313    
1314     $Element->{$HTML_NS}->{footer} = {
1315 wakaba 1.40 %HTMLProseContentChecker,
1316     check_start => sub {
1317     my ($self, $item, $element_state) = @_;
1318     $self->_add_minus_elements ($element_state,
1319     {$HTML_NS => {footer => 1}},
1320     $HTMLSectioningContent, $HTMLHeadingContent);
1321     },
1322     check_end => sub {
1323     my ($self, $item, $element_state) = @_;
1324     $self->_remove_minus_elements ($element_state);
1325 wakaba 1.1
1326 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1327 wakaba 1.1 },
1328     };
1329    
1330     $Element->{$HTML_NS}->{address} = {
1331 wakaba 1.40 %HTMLProseContentChecker,
1332     check_start => sub {
1333     my ($self, $item, $element_state) = @_;
1334     $self->_add_minus_elements ($element_state,
1335     {$HTML_NS => {footer => 1, address => 1}},
1336     $HTMLSectioningContent, $HTMLHeadingContent);
1337     },
1338     check_end => sub {
1339     my ($self, $item, $element_state) = @_;
1340     $self->_remove_minus_elements ($element_state);
1341 wakaba 1.29
1342 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1343 wakaba 1.29 },
1344 wakaba 1.1 };
1345    
1346     $Element->{$HTML_NS}->{p} = {
1347 wakaba 1.40 %HTMLPhrasingContentChecker,
1348 wakaba 1.1 };
1349    
1350     $Element->{$HTML_NS}->{hr} = {
1351 wakaba 1.40 %HTMLEmptyChecker,
1352 wakaba 1.1 };
1353    
1354     $Element->{$HTML_NS}->{br} = {
1355 wakaba 1.40 %HTMLEmptyChecker,
1356 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1357     ## (This requirement is semantic so that we cannot check.)
1358 wakaba 1.1 };
1359    
1360     $Element->{$HTML_NS}->{dialog} = {
1361 wakaba 1.40 %HTMLChecker,
1362     check_start => sub {
1363     my ($self, $item, $element_state) = @_;
1364     $element_state->{phase} = 'before dt';
1365     },
1366     check_child_element => sub {
1367     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1368     $child_is_transparent, $element_state) = @_;
1369     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1370     $self->{onerror}->(node => $child_el,
1371     type => 'element not allowed:minus',
1372     level => $self->{must_level});
1373     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1374     #
1375     } elsif ($element_state->{phase} eq 'before dt') {
1376     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1377     $element_state->{phase} = 'before dd';
1378     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1379     $self->{onerror}
1380     ->(node => $child_el, type => 'ps element missing:dt');
1381     $element_state->{phase} = 'before dt';
1382     } else {
1383     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1384     }
1385     } elsif ($element_state->{phase} eq 'before dd') {
1386     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1387     $element_state->{phase} = 'before dt';
1388     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1389     $self->{onerror}
1390     ->(node => $child_el, type => 'ps element missing:dd');
1391     $element_state->{phase} = 'before dd';
1392     } else {
1393     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1394 wakaba 1.1 }
1395 wakaba 1.40 } else {
1396     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1397     }
1398     },
1399     check_child_text => sub {
1400     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1401     if ($has_significant) {
1402     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1403 wakaba 1.1 }
1404 wakaba 1.40 },
1405     check_end => sub {
1406     my ($self, $item, $element_state) = @_;
1407     if ($element_state->{phase} eq 'before dd') {
1408     $self->{onerror}->(node => $item->{node},
1409     type => 'child element missing:dd');
1410 wakaba 1.1 }
1411 wakaba 1.40
1412     $HTMLChecker{check_end}->(@_);
1413 wakaba 1.1 },
1414     };
1415    
1416     $Element->{$HTML_NS}->{pre} = {
1417 wakaba 1.40 %HTMLPhrasingContentChecker,
1418 wakaba 1.1 };
1419    
1420     $Element->{$HTML_NS}->{ol} = {
1421 wakaba 1.40 %HTMLChecker,
1422     check_attrs => $GetHTMLAttrsChecker->({
1423 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1424     }),
1425 wakaba 1.40 check_child_element => sub {
1426     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1427     $child_is_transparent, $element_state) = @_;
1428     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1429     $self->{onerror}->(node => $child_el,
1430     type => 'element not allowed:minus',
1431     level => $self->{must_level});
1432     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1433     #
1434     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1435     #
1436     } else {
1437     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1438 wakaba 1.1 }
1439 wakaba 1.40 },
1440     check_child_text => sub {
1441     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1442     if ($has_significant) {
1443     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1444 wakaba 1.1 }
1445     },
1446     };
1447    
1448     $Element->{$HTML_NS}->{ul} = {
1449 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1450 wakaba 1.1 };
1451    
1452     $Element->{$HTML_NS}->{li} = {
1453 wakaba 1.40 %HTMLProseContentChecker,
1454     check_attrs => $GetHTMLAttrsChecker->({
1455 wakaba 1.1 start => sub {
1456     my ($self, $attr) = @_;
1457     my $parent = $attr->owner_element->manakai_parent_element;
1458     if (defined $parent) {
1459     my $parent_ns = $parent->namespace_uri;
1460     $parent_ns = '' unless defined $parent_ns;
1461     my $parent_ln = $parent->manakai_local_name;
1462     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1463     $self->{onerror}->(node => $attr, level => 'unsupported',
1464     type => 'attribute');
1465     }
1466     }
1467     $HTMLIntegerAttrChecker->($self, $attr);
1468     },
1469     }),
1470 wakaba 1.40 check_child_element => sub {
1471     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1472     $child_is_transparent, $element_state) = @_;
1473     if ($self->{flag}->{in_menu}) {
1474     $HTMLPhrasingContentChecker{check_child_element}->(@_);
1475     } else {
1476     $HTMLProseContentChecker{check_child_element}->(@_);
1477     }
1478     },
1479     check_child_text => sub {
1480     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1481     if ($self->{flag}->{in_menu}) {
1482     $HTMLPhrasingContentChecker{check_child_text}->(@_);
1483 wakaba 1.1 } else {
1484 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
1485 wakaba 1.1 }
1486     },
1487     };
1488    
1489     $Element->{$HTML_NS}->{dl} = {
1490 wakaba 1.40 %HTMLChecker,
1491     check_start => sub {
1492     my ($self, $item, $element_state) = @_;
1493     $element_state->{phase} = 'before dt';
1494     },
1495     check_child_element => sub {
1496     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1497     $child_is_transparent, $element_state) = @_;
1498     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1499     $self->{onerror}->(node => $child_el,
1500     type => 'element not allowed:minus',
1501     level => $self->{must_level});
1502     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1503     #
1504     } elsif ($element_state->{phase} eq 'in dds') {
1505     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1506     #$element_state->{phase} = 'in dds';
1507     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1508     $element_state->{phase} = 'in dts';
1509     } else {
1510     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1511     }
1512     } elsif ($element_state->{phase} eq 'in dts') {
1513     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1514     #$element_state->{phase} = 'in dts';
1515     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1516     $element_state->{phase} = 'in dds';
1517     } else {
1518     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1519     }
1520     } elsif ($element_state->{phase} eq 'before dt') {
1521     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1522     $element_state->{phase} = 'in dts';
1523     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1524     $self->{onerror}
1525     ->(node => $child_el, type => 'ps element missing:dt');
1526     $element_state->{phase} = 'in dds';
1527     } else {
1528     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1529 wakaba 1.1 }
1530 wakaba 1.40 } else {
1531     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
1532 wakaba 1.1 }
1533 wakaba 1.40 },
1534     check_child_text => sub {
1535     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1536     if ($has_significant) {
1537     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1538     }
1539     },
1540     check_end => sub {
1541     my ($self, $item, $element_state) = @_;
1542     if ($element_state->{phase} eq 'in dts') {
1543     $self->{onerror}->(node => $item->{node},
1544     type => 'child element missing:dd');
1545 wakaba 1.1 }
1546    
1547 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1548 wakaba 1.1 },
1549     };
1550    
1551     $Element->{$HTML_NS}->{dt} = {
1552 wakaba 1.40 %HTMLPhrasingContentChecker,
1553 wakaba 1.1 };
1554    
1555     $Element->{$HTML_NS}->{dd} = {
1556 wakaba 1.40 %HTMLProseContentChecker,
1557 wakaba 1.1 };
1558    
1559     $Element->{$HTML_NS}->{a} = {
1560 wakaba 1.40 %HTMLPhrasingContentChecker,
1561     check_attrs => sub {
1562     my ($self, $item, $element_state) = @_;
1563 wakaba 1.1 my %attr;
1564 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1565 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1566     $attr_ns = '' unless defined $attr_ns;
1567     my $attr_ln = $attr->manakai_local_name;
1568     my $checker;
1569     if ($attr_ns eq '') {
1570     $checker = {
1571     target => $HTMLTargetAttrChecker,
1572     href => $HTMLURIAttrChecker,
1573     ping => $HTMLSpaceURIsAttrChecker,
1574 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
1575 wakaba 1.1 media => $HTMLMQAttrChecker,
1576     hreflang => $HTMLLanguageTagAttrChecker,
1577     type => $HTMLIMTAttrChecker,
1578     }->{$attr_ln};
1579     if ($checker) {
1580     $attr{$attr_ln} = $attr;
1581     } else {
1582     $checker = $HTMLAttrChecker->{$attr_ln};
1583     }
1584     }
1585     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1586     || $AttrChecker->{$attr_ns}->{''};
1587     if ($checker) {
1588     $checker->($self, $attr) if ref $checker;
1589     } else {
1590     $self->{onerror}->(node => $attr, level => 'unsupported',
1591     type => 'attribute');
1592     ## ISSUE: No comformance createria for unknown attributes in the spec
1593     }
1594     }
1595    
1596 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
1597 wakaba 1.4 if (defined $attr{href}) {
1598     $self->{has_hyperlink_element} = 1;
1599 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
1600 wakaba 1.4 } else {
1601 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1602     if (defined $attr{$_}) {
1603     $self->{onerror}->(node => $attr{$_},
1604     type => 'attribute not allowed');
1605     }
1606     }
1607     }
1608     },
1609 wakaba 1.40 check_start => sub {
1610     my ($self, $item, $element_state) = @_;
1611     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
1612     },
1613     check_end => sub {
1614     my ($self, $item, $element_state) = @_;
1615     $self->_remove_minus_elements ($element_state);
1616 wakaba 1.1
1617 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1618 wakaba 1.1 },
1619     };
1620    
1621     $Element->{$HTML_NS}->{q} = {
1622 wakaba 1.40 %HTMLPhrasingContentChecker,
1623     check_attrs => $GetHTMLAttrsChecker->({
1624 wakaba 1.1 cite => $HTMLURIAttrChecker,
1625     }),
1626     };
1627    
1628     $Element->{$HTML_NS}->{cite} = {
1629 wakaba 1.40 %HTMLPhrasingContentChecker,
1630 wakaba 1.1 };
1631    
1632     $Element->{$HTML_NS}->{em} = {
1633 wakaba 1.40 %HTMLPhrasingContentChecker,
1634 wakaba 1.1 };
1635    
1636     $Element->{$HTML_NS}->{strong} = {
1637 wakaba 1.40 %HTMLPhrasingContentChecker,
1638 wakaba 1.1 };
1639    
1640     $Element->{$HTML_NS}->{small} = {
1641 wakaba 1.40 %HTMLPhrasingContentChecker,
1642 wakaba 1.1 };
1643    
1644 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
1645 wakaba 1.40 %HTMLPhrasingContentChecker,
1646 wakaba 1.1 };
1647    
1648     $Element->{$HTML_NS}->{dfn} = {
1649 wakaba 1.40 %HTMLPhrasingContentChecker,
1650     check_start => sub {
1651     my ($self, $item, $element_state) = @_;
1652     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
1653 wakaba 1.1
1654 wakaba 1.40 my $node = $item->{node};
1655 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
1656     unless (defined $term) {
1657     for my $child (@{$node->child_nodes}) {
1658     if ($child->node_type == 1) { # ELEMENT_NODE
1659     if (defined $term) {
1660     undef $term;
1661     last;
1662     } elsif ($child->manakai_local_name eq 'abbr') {
1663     my $nsuri = $child->namespace_uri;
1664     if (defined $nsuri and $nsuri eq $HTML_NS) {
1665     my $attr = $child->get_attribute_node_ns (undef, 'title');
1666     if ($attr) {
1667     $term = $attr->value;
1668     }
1669     }
1670     }
1671     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1672     ## TEXT_NODE or CDATA_SECTION_NODE
1673     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1674     next;
1675     }
1676     undef $term;
1677     last;
1678     }
1679     }
1680     unless (defined $term) {
1681     $term = $node->text_content;
1682     }
1683     }
1684     if ($self->{term}->{$term}) {
1685     $self->{onerror}->(node => $node, type => 'duplicate term');
1686     push @{$self->{term}->{$term}}, $node;
1687     } else {
1688     $self->{term}->{$term} = [$node];
1689     }
1690     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1691     ## has |title|.
1692 wakaba 1.40 },
1693     check_end => sub {
1694     my ($self, $item, $element_state) = @_;
1695     $self->_remove_minus_elements ($element_state);
1696 wakaba 1.1
1697 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1698 wakaba 1.1 },
1699     };
1700    
1701     $Element->{$HTML_NS}->{abbr} = {
1702 wakaba 1.40 %HTMLPhrasingContentChecker,
1703 wakaba 1.1 };
1704    
1705     $Element->{$HTML_NS}->{time} = {
1706 wakaba 1.40 %HTMLPhrasingContentChecker,
1707     check_attrs => $GetHTMLAttrsChecker->({
1708 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
1709     }),
1710     ## TODO: Write tests
1711 wakaba 1.40 check_end => sub {
1712     my ($self, $item, $element_state) = @_;
1713 wakaba 1.1
1714 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
1715 wakaba 1.1 my $input;
1716     my $reg_sp;
1717     my $input_node;
1718     if ($attr) {
1719     $input = $attr->value;
1720     $reg_sp = qr/[\x09-\x0D\x20]*/;
1721     $input_node = $attr;
1722     } else {
1723 wakaba 1.40 $input = $item->{node}->text_content;
1724 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
1725 wakaba 1.40 $input_node = $item->{node};
1726 wakaba 1.1
1727     ## ISSUE: What is the definition for "successfully extracts a date
1728     ## or time"? If the algorithm says the string is invalid but
1729     ## return some date or time, is it "successfully"?
1730     }
1731    
1732     my $hour;
1733     my $minute;
1734     my $second;
1735     if ($input =~ /
1736     \A
1737     [\x09-\x0D\x20]*
1738     ([0-9]+) # 1
1739     (?>
1740     -([0-9]+) # 2
1741     -([0-9]+) # 3
1742     [\x09-\x0D\x20]*
1743     (?>
1744     T
1745     [\x09-\x0D\x20]*
1746     )?
1747     ([0-9]+) # 4
1748     :([0-9]+) # 5
1749     (?>
1750     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1751     )?
1752     [\x09-\x0D\x20]*
1753     (?>
1754     Z
1755     [\x09-\x0D\x20]*
1756     |
1757     [+-]([0-9]+):([0-9]+) # 7, 8
1758     [\x09-\x0D\x20]*
1759     )?
1760     \z
1761     |
1762     :([0-9]+) # 9
1763     (?>
1764     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1765     )?
1766     [\x09-\x0D\x20]*\z
1767     )
1768     /x) {
1769     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1770     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1771     length $4 != 2 or length $5 != 2) {
1772     $self->{onerror}->(node => $input_node,
1773     type => 'dateortime:syntax error');
1774     }
1775    
1776     if (1 <= $2 and $2 <= 12) {
1777     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1778     if $3 < 1 or
1779     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1780     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1781     if $2 == 2 and $3 == 29 and
1782     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1783     } else {
1784     $self->{onerror}->(node => $input_node,
1785     type => 'datetime:bad month');
1786     }
1787    
1788     ($hour, $minute, $second) = ($4, $5, $6);
1789    
1790     if (defined $7) { ## [+-]hh:mm
1791     if (length $7 != 2 or length $8 != 2) {
1792     $self->{onerror}->(node => $input_node,
1793     type => 'dateortime:syntax error');
1794     }
1795    
1796     $self->{onerror}->(node => $input_node,
1797     type => 'datetime:bad timezone hour')
1798     if $7 > 23;
1799     $self->{onerror}->(node => $input_node,
1800     type => 'datetime:bad timezone minute')
1801     if $8 > 59;
1802     }
1803     } else { ## hh:mm
1804     if (length $1 != 2 or length $9 != 2) {
1805     $self->{onerror}->(node => $input_node,
1806     type => qq'dateortime:syntax error');
1807     }
1808    
1809     ($hour, $minute, $second) = ($1, $9, $10);
1810     }
1811    
1812     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
1813     if $hour > 23;
1814     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
1815     if $minute > 59;
1816    
1817     if (defined $second) { ## s
1818     ## NOTE: Integer part of second don't have to have length of two.
1819    
1820     if (substr ($second, 0, 1) eq '.') {
1821     $self->{onerror}->(node => $input_node,
1822     type => 'dateortime:syntax error');
1823     }
1824    
1825     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
1826     if $second >= 60;
1827     }
1828     } else {
1829     $self->{onerror}->(node => $input_node,
1830     type => 'dateortime:syntax error');
1831     }
1832    
1833 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1834 wakaba 1.1 },
1835     };
1836    
1837     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1838 wakaba 1.40 %HTMLPhrasingContentChecker,
1839     check_attrs => $GetHTMLAttrsChecker->({
1840 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1841     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1842     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1843     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1844     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1845     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1846     }),
1847     };
1848    
1849     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1850 wakaba 1.40 %HTMLPhrasingContentChecker,
1851     check_attrs => $GetHTMLAttrsChecker->({
1852 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1853     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1854     }),
1855     };
1856    
1857     $Element->{$HTML_NS}->{code} = {
1858 wakaba 1.40 %HTMLPhrasingContentChecker,
1859 wakaba 1.1 };
1860    
1861     $Element->{$HTML_NS}->{var} = {
1862 wakaba 1.40 %HTMLPhrasingContentChecker,
1863 wakaba 1.1 };
1864    
1865     $Element->{$HTML_NS}->{samp} = {
1866 wakaba 1.40 %HTMLPhrasingContentChecker,
1867 wakaba 1.1 };
1868    
1869     $Element->{$HTML_NS}->{kbd} = {
1870 wakaba 1.40 %HTMLPhrasingContentChecker,
1871 wakaba 1.1 };
1872    
1873     $Element->{$HTML_NS}->{sub} = {
1874 wakaba 1.40 %HTMLPhrasingContentChecker,
1875 wakaba 1.1 };
1876    
1877     $Element->{$HTML_NS}->{sup} = {
1878 wakaba 1.40 %HTMLPhrasingContentChecker,
1879 wakaba 1.1 };
1880    
1881     $Element->{$HTML_NS}->{span} = {
1882 wakaba 1.40 %HTMLPhrasingContentChecker,
1883 wakaba 1.1 };
1884    
1885     $Element->{$HTML_NS}->{i} = {
1886 wakaba 1.40 %HTMLPhrasingContentChecker,
1887 wakaba 1.1 };
1888    
1889     $Element->{$HTML_NS}->{b} = {
1890 wakaba 1.40 %HTMLPhrasingContentChecker,
1891 wakaba 1.1 };
1892    
1893     $Element->{$HTML_NS}->{bdo} = {
1894 wakaba 1.40 %HTMLPhrasingContentChecker,
1895     check_attrs => sub {
1896     my ($self, $item, $element_state) = @_;
1897     $GetHTMLAttrsChecker->({})->($self, $item, $element_state);
1898     unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
1899     $self->{onerror}->(node => $item->{node},
1900     type => 'attribute missing:dir');
1901 wakaba 1.1 }
1902     },
1903     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1904     };
1905    
1906 wakaba 1.29 =pod
1907    
1908     ## TODO:
1909    
1910     +
1911     + <p>Partly because of the confusion described above, authors are
1912     + strongly recommended to always mark up all paragraphs with the
1913     + <code>p</code> element, and to not have any <code>ins</code> or
1914     + <code>del</code> elements that cross across any <span
1915     + title="paragraph">implied paragraphs</span>.</p>
1916     +
1917     (An informative note)
1918    
1919     <p><code>ins</code> elements should not cross <span
1920     + title="paragraph">implied paragraph</span> boundaries.</p>
1921     (normative)
1922    
1923     + <p><code>del</code> elements should not cross <span
1924     + title="paragraph">implied paragraph</span> boundaries.</p>
1925     (normative)
1926    
1927     =cut
1928    
1929 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
1930 wakaba 1.40 %HTMLTransparentChecker,
1931     check_attrs => $GetHTMLAttrsChecker->({
1932 wakaba 1.1 cite => $HTMLURIAttrChecker,
1933     datetime => $HTMLDatetimeAttrChecker,
1934     }),
1935     };
1936    
1937     $Element->{$HTML_NS}->{del} = {
1938 wakaba 1.40 %HTMLTransparentChecker,
1939     check_attrs => $GetHTMLAttrsChecker->({
1940 wakaba 1.1 cite => $HTMLURIAttrChecker,
1941     datetime => $HTMLDatetimeAttrChecker,
1942     }),
1943 wakaba 1.40 check_end => sub {
1944     my ($self, $item, $element_state) = @_;
1945     if ($element_state->{has_significant}) {
1946     ## NOTE: Significantness flag does not propagate.
1947     } elsif ($item->{transparent}) {
1948     #
1949     } else {
1950     $self->{onerror}->(node => $item->{node},
1951     level => $self->{should_level},
1952     type => 'no significant content');
1953     }
1954 wakaba 1.1 },
1955     };
1956    
1957 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
1958 wakaba 1.40 %HTMLProseContentChecker,
1959    
1960     ## TODO: Reimplement
1961 wakaba 1.35 checker => sub {
1962 wakaba 1.39 ## NOTE: legend, Prose | Prose, legend
1963    
1964 wakaba 1.35 my ($self, $todo) = @_;
1965     my $el = $todo->{node};
1966     my $new_todos = [];
1967     my @nodes = (@{$el->child_nodes});
1968    
1969     my $old_values = {significant =>
1970     $todo->{flag}->{has_descendant}->{significant}};
1971     $todo->{flag}->{has_descendant}->{significant} = 0;
1972    
1973     my $has_legend;
1974     my $has_non_legend;
1975     my $has_non_style;
1976     while (@nodes) {
1977     my $node = shift @nodes;
1978     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1979    
1980     my $nt = $node->node_type;
1981     if ($nt == 1) {
1982     my $node_ns = $node->namespace_uri;
1983     $node_ns = '' unless defined $node_ns;
1984     my $node_ln = $node->manakai_local_name;
1985     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
1986     $has_non_legend = 1;
1987     $self->{onerror}->(node => $node,
1988     type => 'element not allowed:minus',
1989     level => $self->{must_level});
1990     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'legend') {
1991     if ($has_legend) {
1992     if (ref $has_legend) {
1993     $self->{onerror}->(node => $has_legend,
1994     type => 'element not allowed:figure legend',
1995     level => $self->{must_level});
1996     $has_legend = $node;
1997     } else {
1998     ## NOTE: The first child element was a |legend|.
1999     $self->{onerror}->(node => $node,
2000     type => 'element not allowed:figure legend',
2001     level => $self->{must_level});
2002     }
2003     } elsif ($has_non_legend) {
2004     undef $has_non_legend;
2005     $has_legend = $node;
2006     } else {
2007     $has_legend = 1;
2008     }
2009     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
2010     $has_non_legend = 1;
2011     if ($has_non_style or
2012     not $node->has_attribute_ns (undef, 'scoped')) {
2013     $self->{onerror}->(node => $node,
2014     type => 'element not allowed:prose style',
2015     level => $self->{must_level});
2016     }
2017     } elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) {
2018     $has_non_style = 1;
2019     $has_non_legend = 1;
2020     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
2021     #
2022     } else {
2023     $has_non_style = 1;
2024     $has_non_legend = 1;
2025     $self->{onerror}->(node => $node,
2026     type => 'element not allowed:prose',
2027     level => $self->{must_level})
2028     }
2029    
2030     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2031     unshift @nodes, @$sib;
2032     push @$new_todos, @$ch;
2033     } elsif ($nt == 3 or $nt == 4) {
2034     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2035     $has_non_style = 1;
2036     $has_non_legend = 1;
2037     $todo->{flag}->{has_descendant}->{significant} = 1;
2038     }
2039     } elsif ($nt == 5) {
2040     unshift @nodes, @{$node->child_nodes};
2041     }
2042     }
2043    
2044     if ($has_legend) {
2045     if (ref $has_legend and $has_non_legend) {
2046     $self->{onerror}->(node => $has_legend,
2047     type => 'element not allowed:figure legend',
2048     level => $self->{must_level});
2049     }
2050     } else {
2051     $self->{onerror}->(node => $todo->{node},
2052     type => 'element missing:legend',
2053     level => $self->{must_level});
2054     }
2055    
2056     push @$new_todos, {
2057     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
2058     old_values => $old_values,
2059 wakaba 1.40 # errors => $HTMLSignificantContentErrors,
2060 wakaba 1.35 };
2061    
2062     return ($new_todos);
2063     },
2064     };
2065 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2066 wakaba 1.1
2067     $Element->{$HTML_NS}->{img} = {
2068 wakaba 1.40 %HTMLEmptyChecker,
2069     check_attrs => sub {
2070     my ($self, $item, $element_state) = @_;
2071 wakaba 1.1 $GetHTMLAttrsChecker->({
2072     alt => sub { }, ## NOTE: No syntactical requirement
2073     src => $HTMLURIAttrChecker,
2074     usemap => $HTMLUsemapAttrChecker,
2075     ismap => sub {
2076 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2077     if (not $self->{flag}->{in_a_href}) {
2078 wakaba 1.15 $self->{onerror}->(node => $attr,
2079     type => 'attribute not allowed:ismap');
2080 wakaba 1.1 }
2081 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2082 wakaba 1.1 },
2083     ## TODO: height
2084     ## TODO: width
2085 wakaba 1.40 })->($self, $item);
2086     unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2087     $self->{onerror}->(node => $item->{node},
2088 wakaba 1.37 type => 'attribute missing:alt',
2089     level => $self->{should_level});
2090 wakaba 1.1 }
2091 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2092     $self->{onerror}->(node => $item->{node},
2093     type => 'attribute missing:src');
2094 wakaba 1.1 }
2095     },
2096     };
2097    
2098     $Element->{$HTML_NS}->{iframe} = {
2099 wakaba 1.40 %HTMLTextChecker,
2100     check_attrs => $GetHTMLAttrsChecker->({
2101 wakaba 1.1 src => $HTMLURIAttrChecker,
2102     }),
2103 wakaba 1.40 };
2104    
2105 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2106 wakaba 1.40 %HTMLEmptyChecker,
2107     check_attrs => sub {
2108     my ($self, $item, $element_state) = @_;
2109 wakaba 1.1 my $has_src;
2110 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2111 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2112     $attr_ns = '' unless defined $attr_ns;
2113     my $attr_ln = $attr->manakai_local_name;
2114     my $checker;
2115     if ($attr_ns eq '') {
2116     if ($attr_ln eq 'src') {
2117     $checker = $HTMLURIAttrChecker;
2118     $has_src = 1;
2119     } elsif ($attr_ln eq 'type') {
2120     $checker = $HTMLIMTAttrChecker;
2121     } else {
2122     ## TODO: height
2123     ## TODO: width
2124     $checker = $HTMLAttrChecker->{$attr_ln}
2125     || sub { }; ## NOTE: Any local attribute is ok.
2126     }
2127     }
2128     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2129     || $AttrChecker->{$attr_ns}->{''};
2130     if ($checker) {
2131     $checker->($self, $attr);
2132     } else {
2133     $self->{onerror}->(node => $attr, level => 'unsupported',
2134     type => 'attribute');
2135     ## ISSUE: No comformance createria for global attributes in the spec
2136     }
2137     }
2138    
2139     unless ($has_src) {
2140 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2141 wakaba 1.1 type => 'attribute missing:src');
2142     }
2143     },
2144     };
2145    
2146     $Element->{$HTML_NS}->{object} = {
2147 wakaba 1.40 %HTMLTransparentChecker,
2148     check_attrs => sub {
2149     my ($self, $item, $element_state) = @_;
2150 wakaba 1.1 $GetHTMLAttrsChecker->({
2151     data => $HTMLURIAttrChecker,
2152     type => $HTMLIMTAttrChecker,
2153     usemap => $HTMLUsemapAttrChecker,
2154     ## TODO: width
2155     ## TODO: height
2156 wakaba 1.40 })->($self, $item);
2157     unless ($item->{node}->has_attribute_ns (undef, 'data')) {
2158     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
2159     $self->{onerror}->(node => $item->{node},
2160 wakaba 1.1 type => 'attribute missing:data|type');
2161     }
2162     }
2163     },
2164 wakaba 1.40
2165     ## TODO: reimplement
2166 wakaba 1.25 checker => sub {
2167 wakaba 1.39 ## NOTE: param*, transparent (Prose)
2168    
2169 wakaba 1.25 my ($self, $todo) = @_;
2170 wakaba 1.39 my $el = $todo->{node};
2171     my $new_todos = [];
2172     my @nodes = (@{$el->child_nodes});
2173    
2174     $todo->{flag}->{has_descendant}->{significant} = 0;
2175    
2176     my $has_non_param;
2177     my $has_non_style;
2178     while (@nodes) {
2179     my $node = shift @nodes;
2180     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
2181    
2182     my $nt = $node->node_type;
2183     if ($nt == 1) {
2184     my $node_ns = $node->namespace_uri;
2185     $node_ns = '' unless defined $node_ns;
2186     my $node_ln = $node->manakai_local_name;
2187     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
2188     $has_non_param = 1;
2189     $self->{onerror}->(node => $node,
2190     type => 'element not allowed:minus',
2191     level => $self->{must_level});
2192     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'param') {
2193     if ($has_non_param) {
2194     $self->{onerror}->(node => $node,
2195     type => 'element not allowed:prose',
2196     level => $self->{must_level});
2197     }
2198     } elsif ($node_ns eq $HTML_NS and $node_ln eq 'style') {
2199     $has_non_param = 1;
2200     if ($has_non_style or
2201     not $node->has_attribute_ns (undef, 'scoped')) {
2202     $self->{onerror}->(node => $node,
2203     type => 'element not allowed:prose style',
2204     level => $self->{must_level});
2205     }
2206     } elsif ($HTMLProseContent->{$node_ns}->{$node_ln}) {
2207     $has_non_style = 1;
2208     $has_non_param = 1;
2209     } elsif ($self->{pluses}->{$node_ns}->{$node_ln}) {
2210     #
2211     } else {
2212     $has_non_style = 1;
2213     $has_non_param = 1;
2214     $self->{onerror}->(node => $node,
2215     type => 'element not allowed:prose',
2216     level => $self->{must_level})
2217     }
2218    
2219     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
2220     unshift @nodes, @$sib;
2221     push @$new_todos, @$ch;
2222     } elsif ($nt == 3 or $nt == 4) {
2223     if ($node->data =~ /[^\x09-\x0D\x20]/) {
2224     $has_non_style = 1;
2225     $has_non_param = 1;
2226     $todo->{flag}->{has_descendant}->{significant} = 1;
2227     }
2228     } elsif ($nt == 5) {
2229     unshift @nodes, @{$node->child_nodes};
2230     }
2231     }
2232    
2233     push @$new_todos, {
2234     type => 'descendant', node => $todo->{node}, flag => $todo->{flag},
2235     old_values => {significant => 1}, # |object| itself
2236 wakaba 1.40 # errors => $HTMLSignificantContentErrors,
2237 wakaba 1.39 };
2238    
2239     return ($new_todos);
2240 wakaba 1.25 },
2241 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2242 wakaba 1.1 };
2243    
2244     $Element->{$HTML_NS}->{param} = {
2245 wakaba 1.40 %HTMLEmptyChecker,
2246     check_attrs => sub {
2247     my ($self, $item, $element_state) = @_;
2248 wakaba 1.1 $GetHTMLAttrsChecker->({
2249     name => sub { },
2250     value => sub { },
2251 wakaba 1.40 })->($self, $item);
2252     unless ($item->{node}->has_attribute_ns (undef, 'name')) {
2253     $self->{onerror}->(node => $item->{node},
2254 wakaba 1.1 type => 'attribute missing:name');
2255     }
2256 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
2257     $self->{onerror}->(node => $item->{node},
2258 wakaba 1.1 type => 'attribute missing:value');
2259     }
2260     },
2261     };
2262    
2263     $Element->{$HTML_NS}->{video} = {
2264 wakaba 1.40 %HTMLTransparentChecker,
2265     check_attrs => $GetHTMLAttrsChecker->({
2266 wakaba 1.1 src => $HTMLURIAttrChecker,
2267     ## TODO: start, loopstart, loopend, end
2268     ## ISSUE: they MUST be "value time offset"s. Value?
2269 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2270 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2271     controls => $GetHTMLBooleanAttrChecker->('controls'),
2272 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2273     ## TODO: width, height (not for audio!)
2274 wakaba 1.1 }),
2275 wakaba 1.40
2276     ## TODO: reimplement
2277 wakaba 1.1 checker => sub {
2278     my ($self, $todo) = @_;
2279 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2280 wakaba 1.1
2281 wakaba 1.29 ## TODO:
2282 wakaba 1.1 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2283 wakaba 1.40 # return $HTMLProseContentChecker->($self, $todo);
2284 wakaba 1.1 } else {
2285 wakaba 1.40 # return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2286     # ->($self, $todo);
2287 wakaba 1.1 }
2288     },
2289     };
2290    
2291     $Element->{$HTML_NS}->{audio} = {
2292 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
2293     ## TODO: Is there audio-only attribute?
2294 wakaba 1.1 };
2295    
2296     $Element->{$HTML_NS}->{source} = {
2297 wakaba 1.40 %HTMLEmptyChecker,
2298     check_attrs => sub {
2299     my ($self, $item, $element_state) = @_;
2300 wakaba 1.1 $GetHTMLAttrsChecker->({
2301     src => $HTMLURIAttrChecker,
2302     type => $HTMLIMTAttrChecker,
2303     media => $HTMLMQAttrChecker,
2304 wakaba 1.40 })->($self, $item, $element_state);
2305     unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2306     $self->{onerror}->(node => $item->{node},
2307 wakaba 1.1 type => 'attribute missing:src');
2308     }
2309     },
2310     };
2311    
2312     $Element->{$HTML_NS}->{canvas} = {
2313 wakaba 1.40 %HTMLTransparentChecker,
2314     check_attrs => $GetHTMLAttrsChecker->({
2315 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2316     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2317     }),
2318     };
2319    
2320     $Element->{$HTML_NS}->{map} = {
2321 wakaba 1.40 %HTMLProseContentChecker,
2322     check_attrs => sub {
2323     my ($self, $item, $element_state) = @_;
2324 wakaba 1.4 my $has_id;
2325     $GetHTMLAttrsChecker->({
2326     id => sub {
2327     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2328     my ($self, $attr) = @_;
2329     my $value = $attr->value;
2330     if (length $value > 0) {
2331     if ($self->{id}->{$value}) {
2332     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2333     push @{$self->{id}->{$value}}, $attr;
2334     } else {
2335     $self->{id}->{$value} = [$attr];
2336     }
2337 wakaba 1.1 } else {
2338 wakaba 1.4 ## NOTE: MUST contain at least one character
2339     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2340 wakaba 1.1 }
2341 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2342     $self->{onerror}->(node => $attr, type => 'space in ID');
2343     }
2344     $self->{map}->{$value} ||= $attr;
2345     $has_id = 1;
2346     },
2347 wakaba 1.40 })->($self, $item, $element_state);
2348     $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
2349 wakaba 1.4 unless $has_id;
2350     },
2351 wakaba 1.1 };
2352    
2353     $Element->{$HTML_NS}->{area} = {
2354 wakaba 1.40 %HTMLEmptyChecker,
2355     check_attrs => sub {
2356     my ($self, $item, $element_state) = @_;
2357 wakaba 1.1 my %attr;
2358     my $coords;
2359 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2360 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2361     $attr_ns = '' unless defined $attr_ns;
2362     my $attr_ln = $attr->manakai_local_name;
2363     my $checker;
2364     if ($attr_ns eq '') {
2365     $checker = {
2366     alt => sub { },
2367     ## NOTE: |alt| value has no conformance creteria.
2368     shape => $GetHTMLEnumeratedAttrChecker->({
2369     circ => -1, circle => 1,
2370     default => 1,
2371     poly => 1, polygon => -1,
2372     rect => 1, rectangle => -1,
2373     }),
2374     coords => sub {
2375     my ($self, $attr) = @_;
2376     my $value = $attr->value;
2377     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2378     $coords = [split /,/, $value];
2379     } else {
2380     $self->{onerror}->(node => $attr,
2381     type => 'coords:syntax error');
2382     }
2383     },
2384     target => $HTMLTargetAttrChecker,
2385     href => $HTMLURIAttrChecker,
2386     ping => $HTMLSpaceURIsAttrChecker,
2387 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
2388 wakaba 1.1 media => $HTMLMQAttrChecker,
2389     hreflang => $HTMLLanguageTagAttrChecker,
2390     type => $HTMLIMTAttrChecker,
2391     }->{$attr_ln};
2392     if ($checker) {
2393     $attr{$attr_ln} = $attr;
2394     } else {
2395     $checker = $HTMLAttrChecker->{$attr_ln};
2396     }
2397     }
2398     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2399     || $AttrChecker->{$attr_ns}->{''};
2400     if ($checker) {
2401     $checker->($self, $attr) if ref $checker;
2402     } else {
2403     $self->{onerror}->(node => $attr, level => 'unsupported',
2404     type => 'attribute');
2405     ## ISSUE: No comformance createria for unknown attributes in the spec
2406     }
2407     }
2408    
2409     if (defined $attr{href}) {
2410 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2411 wakaba 1.1 unless (defined $attr{alt}) {
2412 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2413 wakaba 1.1 type => 'attribute missing:alt');
2414     }
2415     } else {
2416     for (qw/target ping rel media hreflang type alt/) {
2417     if (defined $attr{$_}) {
2418     $self->{onerror}->(node => $attr{$_},
2419     type => 'attribute not allowed');
2420     }
2421     }
2422     }
2423    
2424     my $shape = 'rectangle';
2425     if (defined $attr{shape}) {
2426     $shape = {
2427     circ => 'circle', circle => 'circle',
2428     default => 'default',
2429     poly => 'polygon', polygon => 'polygon',
2430     rect => 'rectangle', rectangle => 'rectangle',
2431     }->{lc $attr{shape}->value} || 'rectangle';
2432     ## TODO: ASCII lowercase?
2433     }
2434    
2435     if ($shape eq 'circle') {
2436     if (defined $attr{coords}) {
2437     if (defined $coords) {
2438     if (@$coords == 3) {
2439     if ($coords->[2] < 0) {
2440     $self->{onerror}->(node => $attr{coords},
2441     type => 'coords:out of range:2');
2442     }
2443     } else {
2444     $self->{onerror}->(node => $attr{coords},
2445     type => 'coords:number:3:'.@$coords);
2446     }
2447     } else {
2448     ## NOTE: A syntax error has been reported.
2449     }
2450     } else {
2451 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2452 wakaba 1.1 type => 'attribute missing:coords');
2453     }
2454     } elsif ($shape eq 'default') {
2455     if (defined $attr{coords}) {
2456     $self->{onerror}->(node => $attr{coords},
2457     type => 'attribute not allowed');
2458     }
2459     } elsif ($shape eq 'polygon') {
2460     if (defined $attr{coords}) {
2461     if (defined $coords) {
2462     if (@$coords >= 6) {
2463     unless (@$coords % 2 == 0) {
2464     $self->{onerror}->(node => $attr{coords},
2465     type => 'coords:number:even:'.@$coords);
2466     }
2467     } else {
2468     $self->{onerror}->(node => $attr{coords},
2469     type => 'coords:number:>=6:'.@$coords);
2470     }
2471     } else {
2472     ## NOTE: A syntax error has been reported.
2473     }
2474     } else {
2475 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2476 wakaba 1.1 type => 'attribute missing:coords');
2477     }
2478     } elsif ($shape eq 'rectangle') {
2479     if (defined $attr{coords}) {
2480     if (defined $coords) {
2481     if (@$coords == 4) {
2482     unless ($coords->[0] < $coords->[2]) {
2483     $self->{onerror}->(node => $attr{coords},
2484     type => 'coords:out of range:0');
2485     }
2486     unless ($coords->[1] < $coords->[3]) {
2487     $self->{onerror}->(node => $attr{coords},
2488     type => 'coords:out of range:1');
2489     }
2490     } else {
2491     $self->{onerror}->(node => $attr{coords},
2492     type => 'coords:number:4:'.@$coords);
2493     }
2494     } else {
2495     ## NOTE: A syntax error has been reported.
2496     }
2497     } else {
2498 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2499 wakaba 1.1 type => 'attribute missing:coords');
2500     }
2501     }
2502     },
2503     };
2504     ## TODO: only in map
2505    
2506     $Element->{$HTML_NS}->{table} = {
2507 wakaba 1.40 %HTMLChecker,
2508     check_start => sub {
2509     my ($self, $item, $element_state) = @_;
2510     $element_state->{phase} = 'before caption';
2511     },
2512     check_child_element => sub {
2513     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2514     $child_is_transparent, $element_state) = @_;
2515     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2516     $self->{onerror}->(node => $child_el,
2517     type => 'element not allowed:minus',
2518     level => $self->{must_level});
2519     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2520     #
2521     } elsif ($element_state->{phase} eq 'in tbodys') {
2522     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2523     #$element_state->{phase} = 'in tbodys';
2524     } elsif (not $element_state->{has_tfoot} and
2525     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2526     $element_state->{phase} = 'after tfoot';
2527     $element_state->{has_tfoot} = 1;
2528     } else {
2529     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2530     }
2531     } elsif ($element_state->{phase} eq 'in trs') {
2532     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2533     #$element_state->{phase} = 'in trs';
2534     } elsif (not $element_state->{has_tfoot} and
2535     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2536     $element_state->{phase} = 'after tfoot';
2537     $element_state->{has_tfoot} = 1;
2538     } else {
2539     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2540     }
2541     } elsif ($element_state->{phase} eq 'after thead') {
2542     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2543     $element_state->{phase} = 'in tbodys';
2544     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2545     $element_state->{phase} = 'in trs';
2546     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2547     $element_state->{phase} = 'in tbodys';
2548     $element_state->{has_tfoot} = 1;
2549     } else {
2550     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2551     }
2552     } elsif ($element_state->{phase} eq 'in colgroup') {
2553     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
2554     $element_state->{phase} = 'in colgroup';
2555     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
2556     $element_state->{phase} = 'after thead';
2557     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2558     $element_state->{phase} = 'in tbodys';
2559     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2560     $element_state->{phase} = 'in trs';
2561     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2562     $element_state->{phase} = 'in tbodys';
2563     $element_state->{has_tfoot} = 1;
2564     } else {
2565     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2566     }
2567     } elsif ($element_state->{phase} eq 'before caption') {
2568     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
2569     $element_state->{phase} = 'in colgroup';
2570     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
2571     $element_state->{phase} = 'in colgroup';
2572     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
2573     $element_state->{phase} = 'after thead';
2574     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2575     $element_state->{phase} = 'in tbodys';
2576     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2577     $element_state->{phase} = 'in trs';
2578     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2579     $element_state->{phase} = 'in tbodys';
2580     $element_state->{has_tfoot} = 1;
2581     } else {
2582     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2583     }
2584     } elsif ($element_state->{phase} eq 'after tfoot') {
2585     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2586     } else {
2587     die "check_child_element: Bad |table| phase: $element_state->{phase}";
2588     }
2589     },
2590     check_child_text => sub {
2591     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2592     if ($has_significant) {
2593     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2594 wakaba 1.1 }
2595 wakaba 1.40 },
2596     check_end => sub {
2597     my ($self, $item, $element_state) = @_;
2598 wakaba 1.1
2599     ## Table model errors
2600     require Whatpm::HTMLTable;
2601 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
2602 wakaba 1.1 my %opt = @_;
2603     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2604     });
2605 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
2606 wakaba 1.1
2607 wakaba 1.40 $HTMLChecker{check_end}->(@_);
2608 wakaba 1.1 },
2609     };
2610    
2611     $Element->{$HTML_NS}->{caption} = {
2612 wakaba 1.40 %HTMLPhrasingContentChecker,
2613 wakaba 1.1 };
2614    
2615     $Element->{$HTML_NS}->{colgroup} = {
2616 wakaba 1.40 %HTMLEmptyChecker,
2617     check_attrs => $GetHTMLAttrsChecker->({
2618 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2619     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2620     ## TODO: "attribute not supported" if |col|.
2621     ## ISSUE: MUST NOT if any |col|?
2622     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2623     }),
2624 wakaba 1.40 check_child_element => sub {
2625     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2626     $child_is_transparent, $element_state) = @_;
2627     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2628     $self->{onerror}->(node => $child_el,
2629     type => 'element not allowed:minus',
2630     level => $self->{must_level});
2631     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2632     #
2633     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
2634     #
2635     } else {
2636     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2637     }
2638     },
2639     check_child_text => sub {
2640     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2641     if ($has_significant) {
2642     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2643 wakaba 1.1 }
2644     },
2645     };
2646    
2647     $Element->{$HTML_NS}->{col} = {
2648 wakaba 1.40 %HTMLEmptyChecker,
2649     check_attrs => $GetHTMLAttrsChecker->({
2650 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2651     }),
2652     };
2653    
2654     $Element->{$HTML_NS}->{tbody} = {
2655 wakaba 1.40 %HTMLChecker,
2656     check_child_element => sub {
2657     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2658     $child_is_transparent, $element_state) = @_;
2659     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2660     $self->{onerror}->(node => $child_el,
2661     type => 'element not allowed:minus',
2662     level => $self->{must_level});
2663     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2664     #
2665     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2666     $element_state->{has_tr} = 1;
2667     } else {
2668     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2669     }
2670     },
2671     check_child_text => sub {
2672     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2673     if ($has_significant) {
2674     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2675 wakaba 1.1 }
2676 wakaba 1.40 },
2677     check_end => sub {
2678     my ($self, $item, $element_state) = @_;
2679     unless ($element_state->{has_tr}) {
2680     $self->{onerror}->(node => $item->{node},
2681     type => 'child element missing:tr');
2682 wakaba 1.1 }
2683 wakaba 1.40
2684     $HTMLChecker{check_end}->(@_);
2685 wakaba 1.1 },
2686     };
2687    
2688     $Element->{$HTML_NS}->{thead} = {
2689 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
2690 wakaba 1.1 };
2691    
2692     $Element->{$HTML_NS}->{tfoot} = {
2693 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
2694 wakaba 1.1 };
2695    
2696     $Element->{$HTML_NS}->{tr} = {
2697 wakaba 1.40 %HTMLChecker,
2698     check_child_element => sub {
2699     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2700     $child_is_transparent, $element_state) = @_;
2701     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2702     $self->{onerror}->(node => $child_el,
2703     type => 'element not allowed:minus',
2704     level => $self->{must_level});
2705     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2706     #
2707     } elsif ($child_nsuri eq $HTML_NS and
2708     ($child_ln eq 'td' or $child_ln eq 'th')) {
2709     $element_state->{has_cell} = 1;
2710     } else {
2711     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2712     }
2713     },
2714     check_child_text => sub {
2715     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2716     if ($has_significant) {
2717     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2718 wakaba 1.1 }
2719 wakaba 1.40 },
2720     check_end => sub {
2721     my ($self, $item, $element_state) = @_;
2722     unless ($element_state->{has_cell}) {
2723     $self->{onerror}->(node => $item->{node},
2724     type => 'child element missing:td|th');
2725 wakaba 1.1 }
2726 wakaba 1.40
2727     $HTMLChecker{check_end}->(@_);
2728 wakaba 1.1 },
2729     };
2730    
2731     $Element->{$HTML_NS}->{td} = {
2732 wakaba 1.40 %HTMLProseContentChecker,
2733     check_attrs => $GetHTMLAttrsChecker->({
2734 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2735     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2736     }),
2737     };
2738    
2739     $Element->{$HTML_NS}->{th} = {
2740 wakaba 1.40 %HTMLPhrasingContentChecker,
2741     check_attrs => $GetHTMLAttrsChecker->({
2742 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2743     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2744     scope => $GetHTMLEnumeratedAttrChecker
2745     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2746     }),
2747     };
2748    
2749     ## TODO: forms
2750 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2751 wakaba 1.1
2752     $Element->{$HTML_NS}->{script} = {
2753 wakaba 1.40 %HTMLChecker,
2754     check_attrs => $GetHTMLAttrsChecker->({
2755 wakaba 1.1 src => $HTMLURIAttrChecker,
2756     defer => $GetHTMLBooleanAttrChecker->('defer'),
2757     async => $GetHTMLBooleanAttrChecker->('async'),
2758     type => $HTMLIMTAttrChecker,
2759 wakaba 1.9 }),
2760 wakaba 1.40 check_start => sub {
2761     my ($self, $item, $element_state) = @_;
2762 wakaba 1.1
2763 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
2764     $element_state->{must_be_empty} = 1;
2765 wakaba 1.1 } else {
2766     ## NOTE: No content model conformance in HTML5 spec.
2767 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2768     my $language = $item->{node}->get_attribute_ns (undef, 'language');
2769 wakaba 1.1 if ((defined $type and $type eq '') or
2770     (defined $language and $language eq '')) {
2771     $type = 'text/javascript';
2772     } elsif (defined $type) {
2773     #
2774     } elsif (defined $language) {
2775     $type = 'text/' . $language;
2776     } else {
2777     $type = 'text/javascript';
2778     }
2779 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
2780     }
2781     },
2782     check_child_element => sub {
2783     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2784     $child_is_transparent, $element_state) = @_;
2785     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2786     $self->{onerror}->(node => $child_el,
2787     type => 'element not allowed:minus',
2788     level => $self->{must_level});
2789     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2790     #
2791     } else {
2792     if ($element_state->{must_be_empty}) {
2793     $self->{onerror}->(node => $child_el,
2794     type => 'element not allowed');
2795     }
2796     }
2797     },
2798     check_child_text => sub {
2799     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2800     if ($has_significant and
2801     $element_state->{must_be_empty}) {
2802     $self->{onerror}->(node => $child_node,
2803     type => 'character not allowed');
2804     }
2805     },
2806     check_end => sub {
2807     my ($self, $item, $element_state) = @_;
2808     unless ($element_state->{must_be_empty}) {
2809     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
2810     type => 'script:'.$element_state->{script_type});
2811     ## TODO: text/javascript support
2812    
2813     $HTMLChecker{check_end}->(@_);
2814 wakaba 1.1 }
2815     },
2816     };
2817 wakaba 1.25 ## ISSUE: Significant check and text child node
2818 wakaba 1.1
2819     ## NOTE: When script is disabled.
2820     $Element->{$HTML_NS}->{noscript} = {
2821 wakaba 1.40 %HTMLTransparentChecker,
2822     check_start => sub {
2823     my ($self, $item, $element_state) = @_;
2824 wakaba 1.3
2825 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
2826     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
2827 wakaba 1.3 }
2828    
2829 wakaba 1.40 unless ($self->{flag}->{in_head}) {
2830     $self->_add_minus_elements ($element_state,
2831     {$HTML_NS => {noscript => 1}});
2832     }
2833 wakaba 1.3 },
2834 wakaba 1.40 check_child_element => sub {
2835     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2836     $child_is_transparent, $element_state) = @_;
2837     if ($self->{flag}->{in_head}) {
2838     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2839     $self->{onerror}->(node => $child_el,
2840     type => 'element not allowed:minus',
2841     level => $self->{must_level});
2842     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2843     #
2844     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
2845     #
2846     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
2847     if ($child_el->has_attribute_ns (undef, 'scoped')) {
2848     $self->{onerror}->(node => $child_el,
2849     type => 'element not allowed:head noscript',
2850     level => $self->{must_level});
2851     }
2852     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
2853     if ($child_el->has_attribute_ns (undef, 'charset')) {
2854     ## NOTE: Non-conforming. An error is raised by
2855     ## |meta|'s checker.
2856     } else {
2857     my $http_equiv_attr
2858     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
2859     if ($http_equiv_attr) {
2860     ## TODO: case
2861     if (lc $http_equiv_attr->value eq 'content-type') {
2862     ## NOTE: Non-conforming. An error is raised by
2863     ## |meta|'s checker.
2864     } else {
2865 wakaba 1.3 #
2866     }
2867     } else {
2868 wakaba 1.40 $self->{onerror}->(node => $child_el,
2869 wakaba 1.34 type => 'element not allowed:head noscript',
2870     level => $self->{must_level});
2871 wakaba 1.3 }
2872     }
2873 wakaba 1.40 } else {
2874     $self->{onerror}->(node => $child_el,
2875     type => 'element not allowed:head noscript',
2876     level => $self->{must_level});
2877     }
2878     } else {
2879     $HTMLTransparentChecker{check_child_element}->(@_);
2880     }
2881     },
2882     check_child_text => sub {
2883     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2884     if ($self->{flag}->{in_head}) {
2885     if ($has_significant) {
2886     $self->{onerror}->(node => $child_node,
2887     type => 'character not allowed');
2888 wakaba 1.3 }
2889     } else {
2890 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
2891     }
2892     },
2893     check_end => sub {
2894     my ($self, $item, $element_state) = @_;
2895     $self->_remove_minus_elements ($element_state);
2896     if ($self->{flag}->{in_head}) {
2897     $HTMLChecker{check_end}->(@_);
2898     } else {
2899     $HTMLPhrasingContentChecker{check_end}->(@_);
2900 wakaba 1.3 }
2901 wakaba 1.1 },
2902     };
2903 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2904 wakaba 1.1
2905     $Element->{$HTML_NS}->{'event-source'} = {
2906 wakaba 1.40 %HTMLEmptyChecker,
2907     check_attrs => $GetHTMLAttrsChecker->({
2908 wakaba 1.1 src => $HTMLURIAttrChecker,
2909     }),
2910     };
2911    
2912     $Element->{$HTML_NS}->{details} = {
2913 wakaba 1.40 %HTMLProseContentChecker,
2914     check_attrs => $GetHTMLAttrsChecker->({
2915 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
2916     }),
2917     checker => sub {
2918     my ($self, $todo) = @_;
2919    
2920 wakaba 1.29 ## TODO:
2921 wakaba 1.40 # my ($sib, $ch)
2922     # = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2923     # ->($self, $todo);
2924     # return ($sib, $ch);
2925 wakaba 1.1 },
2926     };
2927    
2928     $Element->{$HTML_NS}->{datagrid} = {
2929 wakaba 1.40 %HTMLProseContentChecker,
2930     check_attrs => $GetHTMLAttrsChecker->({
2931 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2932     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2933     }),
2934 wakaba 1.40 check_start => sub {
2935     my ($self, $item, $element_state) = @_;
2936 wakaba 1.1
2937 wakaba 1.40 $self->_add_minus_elements ($element_state,
2938     {$HTML_NS => {a => 1, datagrid => 1}});
2939     $element_state->{phase} = 'any';
2940     },
2941     ## Prose -(text* table Prose*) | table | select | datalist | Empty
2942     check_child_element => sub {
2943     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2944     $child_is_transparent, $element_state) = @_;
2945     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2946     $self->{onerror}->(node => $child_el,
2947     type => 'element not allowed:minus',
2948     level => $self->{must_level});
2949     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2950     #
2951     } elsif ($element_state->{phase} eq 'prose') {
2952     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
2953     if ($element_state->{has_element} and
2954     $child_nsuri eq $HTML_NS and
2955     $child_ln eq 'table') {
2956     $self->{onerror}->(node => $child_el,
2957     type => 'element not allowed');
2958     } else {
2959 wakaba 1.8 #
2960 wakaba 1.1 }
2961 wakaba 1.40 } else {
2962     $self->{onerror}->(node => $child_el,
2963     type => 'element not allowed');
2964     }
2965     } elsif ($element_state->{phase} eq 'any') {
2966     if ($child_nsuri eq $HTML_NS and
2967     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
2968     $element_state->{phase} = 'none';
2969     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
2970     $element_state->{has_element} = 1;
2971     $element_state->{phase} = 'prose';
2972     } else {
2973     $self->{onerror}->(node => $child_el,
2974     type => 'element not allowed');
2975     }
2976     } elsif ($element_state->{phase} eq 'none') {
2977     $self->{onerror}->(node => $child_el,
2978     type => 'element not allowed');
2979     } else {
2980     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
2981     }
2982     },
2983     check_child_text => sub {
2984     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2985     if ($has_significant) {
2986     if ($element_state->{phase} eq 'prose') {
2987     #
2988     } elsif ($element_state->{phase} eq 'any') {
2989     $element_state->{phase} = 'prose';
2990     } else {
2991     $self->{onerror}->(node => $child_node,
2992     type => 'character not allowed');
2993 wakaba 1.1 }
2994     }
2995 wakaba 1.40 },
2996     check_end => sub {
2997     my ($self, $item, $element_state) = @_;
2998     $self->_remove_minus_elements ($element_state);
2999 wakaba 1.1
3000 wakaba 1.40 if ($element_state->{phase} eq 'none') {
3001     $HTMLChecker{check_end}->(@_);
3002     } else {
3003     $HTMLPhrasingContentChecker{check_end}->(@_);
3004     }
3005     },
3006 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
3007     ## are not disallowed (assuming that form control contents are also
3008     ## prose content).
3009 wakaba 1.1 };
3010    
3011     $Element->{$HTML_NS}->{command} = {
3012 wakaba 1.40 %HTMLEmptyChecker,
3013     check_attrs => $GetHTMLAttrsChecker->({
3014 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
3015     default => $GetHTMLBooleanAttrChecker->('default'),
3016     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3017     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3018     icon => $HTMLURIAttrChecker,
3019     label => sub { }, ## NOTE: No conformance creteria
3020     radiogroup => sub { }, ## NOTE: No conformance creteria
3021     ## NOTE: |title| has special semantics, but no syntactical difference
3022     type => sub {
3023     my ($self, $attr) = @_;
3024     my $value = $attr->value;
3025     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3026     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3027     }
3028     },
3029     }),
3030     };
3031    
3032     $Element->{$HTML_NS}->{menu} = {
3033 wakaba 1.40 %HTMLPhrasingContentChecker,
3034     check_attrs => $GetHTMLAttrsChecker->({
3035 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3036     id => sub {
3037     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3038     my ($self, $attr) = @_;
3039     my $value = $attr->value;
3040     if (length $value > 0) {
3041     if ($self->{id}->{$value}) {
3042     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3043     push @{$self->{id}->{$value}}, $attr;
3044     } else {
3045     $self->{id}->{$value} = [$attr];
3046     }
3047     } else {
3048     ## NOTE: MUST contain at least one character
3049     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3050     }
3051     if ($value =~ /[\x09-\x0D\x20]/) {
3052     $self->{onerror}->(node => $attr, type => 'space in ID');
3053     }
3054     $self->{menu}->{$value} ||= $attr;
3055     ## ISSUE: <menu id=""><p contextmenu=""> match?
3056     },
3057     label => sub { }, ## NOTE: No conformance creteria
3058     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3059     }),
3060 wakaba 1.40 check_start => sub {
3061     my ($self, $item, $element_state) = @_;
3062     $element_state->{phase} = 'li or phrasing';
3063     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
3064     $self->{flag}->{in_menu} = 1;
3065     },
3066     check_child_element => sub {
3067     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3068     $child_is_transparent, $element_state) = @_;
3069     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3070     $self->{onerror}->(node => $child_el,
3071     type => 'element not allowed:minus',
3072     level => $self->{must_level});
3073     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3074     #
3075     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
3076     if ($element_state->{phase} eq 'li') {
3077     #
3078     } elsif ($element_state->{phase} eq 'li or phrasing') {
3079     $element_state->{phase} = 'li';
3080     } else {
3081     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3082     }
3083     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3084     if ($element_state->{phase} eq 'phrasing') {
3085     #
3086     } elsif ($element_state->{phase} eq 'li or phrasing') {
3087     $element_state->{phase} = 'phrasing';
3088     } else {
3089     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3090     }
3091     } else {
3092     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3093     }
3094     },
3095     check_child_text => sub {
3096     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3097     if ($has_significant) {
3098     if ($element_state->{phase} eq 'phrasing') {
3099     #
3100     } elsif ($element_state->{phase} eq 'li or phrasing') {
3101     $element_state->{phase} = 'phrasing';
3102     } else {
3103     $self->{onerror}->(node => $child_node,
3104     type => 'character not allowed');
3105 wakaba 1.1 }
3106     }
3107 wakaba 1.40 },
3108     check_end => sub {
3109     my ($self, $item, $element_state) = @_;
3110     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
3111    
3112     if ($element_state->{phase} eq 'li') {
3113     $HTMLChecker{check_end}->(@_);
3114     } else { # 'phrasing' or 'li or phrasing'
3115     $HTMLPhrasingContentChecker{check_end}->(@_);
3116 wakaba 1.1 }
3117     },
3118 wakaba 1.8 };
3119    
3120     $Element->{$HTML_NS}->{datatemplate} = {
3121 wakaba 1.40 %HTMLChecker,
3122     check_child_element => sub {
3123     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3124     $child_is_transparent, $element_state) = @_;
3125     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3126     $self->{onerror}->(node => $child_el,
3127     type => 'element not allowed:minus',
3128     level => $self->{must_level});
3129     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3130     #
3131     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
3132     #
3133     } else {
3134     $self->{onerror}->(node => $child_el,
3135     type => 'element not allowed:datatemplate');
3136     }
3137     },
3138     check_child_text => sub {
3139     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3140     if ($has_significant) {
3141     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3142 wakaba 1.8 }
3143     },
3144     is_xml_root => 1,
3145     };
3146    
3147     $Element->{$HTML_NS}->{rule} = {
3148 wakaba 1.40 %HTMLChecker,
3149     check_attrs => $GetHTMLAttrsChecker->({
3150 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3151 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3152 wakaba 1.8 }),
3153 wakaba 1.40 check_start => sub {
3154     my ($self, $item, $element_state) = @_;
3155     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
3156     },
3157     check_child_element => sub { },
3158     check_child_text => sub { },
3159     check_end => sub {
3160     my ($self, $item, $element_state) = @_;
3161     $self->_remove_plus_elements ($element_state);
3162     $HTMLChecker{check_end}->(@_);
3163 wakaba 1.8 },
3164     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3165     ## is applied to some conforming data, results in a conforming DOM tree.":
3166     ## We don't check against this.
3167     };
3168    
3169     $Element->{$HTML_NS}->{nest} = {
3170 wakaba 1.40 %HTMLEmptyChecker,
3171     check_attrs => $GetHTMLAttrsChecker->({
3172 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3173     mode => sub {
3174     my ($self, $attr) = @_;
3175     my $value = $attr->value;
3176     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3177     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3178     }
3179     },
3180 wakaba 1.8 }),
3181 wakaba 1.1 };
3182    
3183     $Element->{$HTML_NS}->{legend} = {
3184 wakaba 1.40 %HTMLPhrasingContentChecker,
3185 wakaba 1.1 };
3186    
3187     $Element->{$HTML_NS}->{div} = {
3188 wakaba 1.40 %HTMLProseContentChecker,
3189 wakaba 1.1 };
3190    
3191     $Element->{$HTML_NS}->{font} = {
3192 wakaba 1.40 %HTMLTransparentChecker,
3193     check_attrs => $GetHTMLAttrsChecker->({}), ## TODO
3194 wakaba 1.1 };
3195    
3196     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3197    
3198     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24