/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.42 - (hide annotations) (download)
Sat Feb 23 14:37:09 2008 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.41: +66 -14 lines
++ whatpm/t/ChangeLog	23 Feb 2008 14:37:03 -0000
	* content-model-1.dat: Some test results are fixed.
	New tests for |video| and |audio| are added.

2008-02-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	23 Feb 2008 14:35:33 -0000
	* ContentChecker.pm (check_element): Support for |video|
	and |audio| as semi-transparent elements.

2008-02-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	23 Feb 2008 14:36:23 -0000
	* HTML.pm (object check_end): Don't check significant content
	if the element is used as a transparent element.
	(video check): Reimplemented.

2008-02-23  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.29 ## December 2007 HTML5 Classification
8    
9     my $HTMLMetadataContent = {
10     $HTML_NS => {
11     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
12     'event-source' => 1, command => 1, datatemplate => 1,
13     ## NOTE: A |meta| with no |name| element is not allowed as
14     ## a metadata content other than |head| element.
15     meta => 1,
16     },
17     ## NOTE: RDF is mentioned in the HTML5 spec.
18     ## TODO: Other RDF elements?
19     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
20     };
21    
22     my $HTMLProseContent = {
23     $HTML_NS => {
24     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
25     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
26     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
27     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
28     details => 1, ## ISSUE: "Prose element" in spec.
29     datagrid => 1, ## ISSUE: "Prose element" in spec.
30     datatemplate => 1,
31     div => 1, ## ISSUE: No category in spec.
32     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
33     ## Additionally, it must be before any other element or
34     ## non-inter-element-whitespace text node.
35     style => 1,
36    
37 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
38 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
39     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
40     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
41     command => 1, font => 1,
42     a => 1,
43     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
44     ## NOTE: |area| is allowed only as a descendant of |map|.
45     area => 1,
46    
47     ins => 1, del => 1,
48    
49     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
50     menu => 1,
51    
52     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
53     canvas => 1,
54     },
55    
56     ## NOTE: Embedded
57     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
58     q<http://www.w3.org/2000/svg> => {svg => 1},
59     };
60    
61     my $HTMLSectioningContent = {
62     $HTML_NS => {
63     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
64     ## NOTE: |body| is only allowed in |html| element.
65     body => 1,
66     },
67     };
68    
69     my $HTMLHeadingContent = {
70     $HTML_NS => {
71     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
72     },
73     };
74    
75     my $HTMLPhrasingContent = {
76     ## NOTE: All phrasing content is also prose content.
77     $HTML_NS => {
78 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
79 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
80     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
81     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
82     command => 1, font => 1,
83     a => 1,
84     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
85     ## NOTE: |area| is allowed only as a descendant of |map|.
86     area => 1,
87    
88     ## NOTE: Transparent.
89     ins => 1, del => 1,
90    
91     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
92     menu => 1,
93    
94     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
95     canvas => 1,
96     },
97    
98     ## NOTE: Embedded
99     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
100     q<http://www.w3.org/2000/svg> => {svg => 1},
101    
102     ## NOTE: And non-inter-element-whitespace text nodes.
103     };
104    
105 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
106 wakaba 1.29
107     my $HTMLInteractiveContent = {
108     $HTML_NS => {
109     a => 1,
110 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
111 wakaba 1.29 },
112     };
113    
114 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
115     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
116    
117     ## -- Common attribute syntacx checkers
118    
119 wakaba 1.1 our $AttrChecker;
120    
121     my $GetHTMLEnumeratedAttrChecker = sub {
122     my $states = shift; # {value => conforming ? 1 : -1}
123     return sub {
124     my ($self, $attr) = @_;
125     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
126     if ($states->{$value} > 0) {
127     #
128     } elsif ($states->{$value}) {
129     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
130     } else {
131     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
132     }
133     };
134     }; # $GetHTMLEnumeratedAttrChecker
135    
136     my $GetHTMLBooleanAttrChecker = sub {
137     my $local_name = shift;
138     return sub {
139     my ($self, $attr) = @_;
140     my $value = $attr->value;
141     unless ($value eq $local_name or $value eq '') {
142     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
143     }
144     };
145     }; # $GetHTMLBooleanAttrChecker
146    
147 wakaba 1.8 ## Unordered set of space-separated tokens
148 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
149 wakaba 1.8 my ($self, $attr) = @_;
150     my %word;
151     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
152     unless ($word{$word}) {
153     $word{$word} = 1;
154     } else {
155     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
156     }
157     }
158 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
159 wakaba 1.8
160 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
161     ## whose allowed values are defined by the section on link types)
162     my $HTMLLinkTypesAttrChecker = sub {
163 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
164 wakaba 1.1 my %word;
165     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
166     unless ($word{$word}) {
167     $word{$word} = 1;
168 wakaba 1.18 } elsif ($word eq 'up') {
169     #
170 wakaba 1.1 } else {
171     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
172     }
173     }
174     ## NOTE: Case sensitive match (since HTML5 spec does not say link
175     ## types are case-insensitive and it says "The value should not
176     ## be confusingly similar to any other defined value (e.g.
177     ## differing only in case).").
178     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
179     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
180     ## values to be used conformingly.
181     require Whatpm::_LinkTypeList;
182     our $LinkType;
183     for my $word (keys %word) {
184     my $def = $LinkType->{$word};
185     if (defined $def) {
186     if ($def->{status} eq 'accepted') {
187     if (defined $def->{effect}->[$a_or_area]) {
188     #
189     } else {
190     $self->{onerror}->(node => $attr,
191     type => 'link type:bad context:'.$word);
192     }
193     } elsif ($def->{status} eq 'proposal') {
194     $self->{onerror}->(node => $attr, level => 's',
195     type => 'link type:proposed:'.$word);
196 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
197     #
198     } else {
199     $self->{onerror}->(node => $attr,
200     type => 'link type:bad context:'.$word);
201     }
202 wakaba 1.1 } else { # rejected or synonym
203     $self->{onerror}->(node => $attr,
204     type => 'link type:non-conforming:'.$word);
205     }
206 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
207     if ($word eq 'alternate') {
208     #
209     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
210     $todo->{has_hyperlink_link_type} = 1;
211     }
212     }
213 wakaba 1.1 if ($def->{unique}) {
214     unless ($self->{has_link_type}->{$word}) {
215     $self->{has_link_type}->{$word} = 1;
216     } else {
217     $self->{onerror}->(node => $attr,
218     type => 'link type:duplicate:'.$word);
219     }
220     }
221     } else {
222     $self->{onerror}->(node => $attr, level => 'unsupported',
223     type => 'link type:'.$word);
224     }
225     }
226 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
227     if $word{alternate} and not $word{stylesheet};
228 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
229     ## says that using both X-Pingback: header field and HTML
230     ## <link rel=pingback> is deprecated and if both appears they
231     ## SHOULD contain exactly the same value.
232     ## ISSUE: Pingback 1.0 specification defines the exact representation
233     ## of its link element, which cannot be tested by the current arch.
234     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
235     ## include any string that matches to the pattern for the rel=pingback link,
236     ## which again inpossible to test.
237     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
238 wakaba 1.12
239     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
240 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
241     ## then they SHOULD be described in different paragraphs.".
242 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
243 wakaba 1.20
244     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
245 wakaba 1.1
246     ## URI (or IRI)
247     my $HTMLURIAttrChecker = sub {
248     my ($self, $attr) = @_;
249     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
250     my $value = $attr->value;
251     Whatpm::URIChecker->check_iri_reference ($value, sub {
252     my %opt = @_;
253     $self->{onerror}->(node => $attr, level => $opt{level},
254     type => 'URI::'.$opt{type}.
255     (defined $opt{position} ? ':'.$opt{position} : ''));
256     });
257 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
258 wakaba 1.1 }; # $HTMLURIAttrChecker
259    
260     ## A space separated list of one or more URIs (or IRIs)
261     my $HTMLSpaceURIsAttrChecker = sub {
262     my ($self, $attr) = @_;
263     my $i = 0;
264     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
265     Whatpm::URIChecker->check_iri_reference ($value, sub {
266     my %opt = @_;
267     $self->{onerror}->(node => $attr, level => $opt{level},
268 wakaba 1.2 type => 'URIs:'.':'.
269     $opt{type}.':'.$i.
270 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
271     });
272     $i++;
273     }
274     ## ISSUE: Relative references?
275     ## ISSUE: Leading or trailing white spaces are conformant?
276     ## ISSUE: A sequence of white space characters are conformant?
277     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
278     ## NOTE: Duplication seems not an error.
279 wakaba 1.4 $self->{has_uri_attr} = 1;
280 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
281    
282     my $HTMLDatetimeAttrChecker = sub {
283     my ($self, $attr) = @_;
284     my $value = $attr->value;
285     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
286     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
287     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
288     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
289     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
290     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
291     if $d < 1 or
292     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
293     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
294     if $M == 2 and $d == 29 and
295     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
296     } else {
297     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
298     }
299     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
300     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
301     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
302     if defined $s and $s > 59;
303     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
304     if $zh > 23;
305     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
306     if $zm > 59;
307     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
308     } else {
309     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
310     }
311     }; # $HTMLDatetimeAttrChecker
312    
313     my $HTMLIntegerAttrChecker = sub {
314     my ($self, $attr) = @_;
315     my $value = $attr->value;
316     unless ($value =~ /\A-?[0-9]+\z/) {
317     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
318     }
319     }; # $HTMLIntegerAttrChecker
320    
321     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
322     my $range_check = shift;
323     return sub {
324     my ($self, $attr) = @_;
325     my $value = $attr->value;
326     if ($value =~ /\A[0-9]+\z/) {
327     unless ($range_check->($value + 0)) {
328     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
329     }
330     } else {
331     $self->{onerror}->(node => $attr,
332     type => 'nninteger:syntax error');
333     }
334     };
335     }; # $GetHTMLNonNegativeIntegerAttrChecker
336    
337     my $GetHTMLFloatingPointNumberAttrChecker = sub {
338     my $range_check = shift;
339     return sub {
340     my ($self, $attr) = @_;
341     my $value = $attr->value;
342     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
343     unless ($range_check->($value + 0)) {
344     $self->{onerror}->(node => $attr, type => 'float:out of range');
345     }
346     } else {
347     $self->{onerror}->(node => $attr,
348     type => 'float:syntax error');
349     }
350     };
351     }; # $GetHTMLFloatingPointNumberAttrChecker
352    
353     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
354     ## ISSUE: RFC 2046 does not define syntax of media types.
355     ## ISSUE: The definition of "a valid MIME type" is unknown.
356     ## Syntactical correctness?
357     my $HTMLIMTAttrChecker = sub {
358     my ($self, $attr) = @_;
359     my $value = $attr->value;
360     ## ISSUE: RFC 2045 Content-Type header field allows insertion
361     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
362     ## ISSUE: RFC 2231 extension? Maybe no.
363     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
364     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
365     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
366     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
367     my @type = ($1, $2);
368     my $param = $3;
369     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
370     if (defined $2) {
371     push @type, $1 => $2;
372     } else {
373     my $n = $1;
374     my $v = $2;
375     $v =~ s/\\(.)/$1/gs;
376     push @type, $n => $v;
377     }
378     }
379     require Whatpm::IMTChecker;
380     Whatpm::IMTChecker->check_imt (sub {
381     my %opt = @_;
382     $self->{onerror}->(node => $attr, level => $opt{level},
383     type => 'IMT:'.$opt{type});
384     }, @type);
385     } else {
386     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
387     }
388     }; # $HTMLIMTAttrChecker
389    
390     my $HTMLLanguageTagAttrChecker = sub {
391 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
392    
393 wakaba 1.1 my ($self, $attr) = @_;
394 wakaba 1.6 my $value = $attr->value;
395     require Whatpm::LangTag;
396     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
397     my %opt = @_;
398     my $type = 'LangTag:'.$opt{type};
399     $type .= ':' . $opt{subtag} if defined $opt{subtag};
400     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
401     level => $opt{level});
402     });
403 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
404 wakaba 1.6
405     ## TODO: testdata
406 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
407    
408     ## "A valid media query [MQ]"
409     my $HTMLMQAttrChecker = sub {
410     my ($self, $attr) = @_;
411     $self->{onerror}->(node => $attr, level => 'unsupported',
412     type => 'media query');
413     ## ISSUE: What is "a valid media query"?
414     }; # $HTMLMQAttrChecker
415    
416     my $HTMLEventHandlerAttrChecker = sub {
417     my ($self, $attr) = @_;
418     $self->{onerror}->(node => $attr, level => 'unsupported',
419     type => 'event handler');
420     ## TODO: MUST contain valid ECMAScript code matching the
421     ## ECMAScript |FunctionBody| production. [ECMA262]
422     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
423     ## ISSUE: Automatic semicolon insertion does not apply?
424     ## ISSUE: Other script languages?
425     }; # $HTMLEventHandlerAttrChecker
426    
427     my $HTMLUsemapAttrChecker = sub {
428     my ($self, $attr) = @_;
429     ## MUST be a valid hashed ID reference to a |map| element
430     my $value = $attr->value;
431     if ($value =~ s/^#//) {
432     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
433     push @{$self->{usemap}}, [$value => $attr];
434     } else {
435     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
436     }
437     ## NOTE: Space characters in hashed ID references are conforming.
438     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
439     }; # $HTMLUsemapAttrChecker
440    
441     my $HTMLTargetAttrChecker = sub {
442     my ($self, $attr) = @_;
443     my $value = $attr->value;
444     if ($value =~ /^_/) {
445     $value = lc $value; ## ISSUE: ASCII case-insentitive?
446     unless ({
447     _self => 1, _parent => 1, _top => 1,
448     }->{$value}) {
449     $self->{onerror}->(node => $attr,
450     type => 'reserved browsing context name');
451     }
452     } else {
453 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
454 wakaba 1.1 }
455     }; # $HTMLTargetAttrChecker
456    
457 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
458     my ($self, $attr) = @_;
459    
460     ## ISSUE: Namespace resolution?
461    
462     my $value = $attr->value;
463    
464     require Whatpm::CSS::SelectorsParser;
465     my $p = Whatpm::CSS::SelectorsParser->new;
466     $p->{pseudo_class}->{$_} = 1 for qw/
467     active checked disabled empty enabled first-child first-of-type
468     focus hover indeterminate last-child last-of-type link only-child
469     only-of-type root target visited
470     lang nth-child nth-last-child nth-of-type nth-last-of-type not
471     -manakai-contains -manakai-current
472     /;
473    
474     $p->{pseudo_element}->{$_} = 1 for qw/
475     after before first-letter first-line
476     /;
477    
478     $p->{must_level} = $self->{must_level};
479     $p->{onerror} = sub {
480     my %opt = @_;
481     $opt{type} = 'selectors:'.$opt{type};
482     $self->{onerror}->(%opt, node => $attr);
483     };
484     $p->parse_string ($value);
485     }; # $HTMLSelectorsAttrChecker
486    
487 wakaba 1.1 my $HTMLAttrChecker = {
488     id => sub {
489     ## NOTE: |map| has its own variant of |id=""| checker
490     my ($self, $attr) = @_;
491     my $value = $attr->value;
492     if (length $value > 0) {
493     if ($self->{id}->{$value}) {
494     $self->{onerror}->(node => $attr, type => 'duplicate ID');
495     push @{$self->{id}->{$value}}, $attr;
496     } else {
497     $self->{id}->{$value} = [$attr];
498     }
499     if ($value =~ /[\x09-\x0D\x20]/) {
500     $self->{onerror}->(node => $attr, type => 'space in ID');
501     }
502     } else {
503     ## NOTE: MUST contain at least one character
504     $self->{onerror}->(node => $attr, type => 'empty attribute value');
505     }
506     },
507     title => sub {}, ## NOTE: No conformance creteria
508     lang => sub {
509     my ($self, $attr) = @_;
510 wakaba 1.6 my $value = $attr->value;
511     if ($value eq '') {
512     #
513     } else {
514     require Whatpm::LangTag;
515     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
516     my %opt = @_;
517     my $type = 'LangTag:'.$opt{type};
518     $type .= ':' . $opt{subtag} if defined $opt{subtag};
519     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
520     level => $opt{level});
521     });
522     }
523 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
524     unless ($attr->owner_document->manakai_is_html) {
525     $self->{onerror}->(node => $attr, type => 'in XML:lang');
526     }
527 wakaba 1.6
528     ## TODO: test data
529 wakaba 1.1 },
530     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
531     class => sub {
532     my ($self, $attr) = @_;
533     my %word;
534     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
535     unless ($word{$word}) {
536     $word{$word} = 1;
537     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
538     } else {
539     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
540     }
541     }
542     },
543     contextmenu => sub {
544     my ($self, $attr) = @_;
545     my $value = $attr->value;
546     push @{$self->{contextmenu}}, [$value => $attr];
547     ## ISSUE: "The value must be the ID of a menu element in the DOM."
548     ## What is "in the DOM"? A menu Element node that is not part
549     ## of the Document tree is in the DOM? A menu Element node that
550     ## belong to another Document tree is in the DOM?
551     },
552     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
553 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
554     ## TODO: ref, template, registrationmark
555 wakaba 1.1 };
556    
557     for (qw/
558     onabort onbeforeunload onblur onchange onclick oncontextmenu
559     ondblclick ondrag ondragend ondragenter ondragleave ondragover
560     ondragstart ondrop onerror onfocus onkeydown onkeypress
561     onkeyup onload onmessage onmousedown onmousemove onmouseout
562     onmouseover onmouseup onmousewheel onresize onscroll onselect
563     onsubmit onunload
564     /) {
565     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
566     }
567    
568     my $GetHTMLAttrsChecker = sub {
569     my $element_specific_checker = shift;
570     return sub {
571 wakaba 1.40 my ($self, $item, $element_state) = @_;
572     for my $attr (@{$item->{node}->attributes}) {
573 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
574     $attr_ns = '' unless defined $attr_ns;
575     my $attr_ln = $attr->manakai_local_name;
576     my $checker;
577     if ($attr_ns eq '') {
578     $checker = $element_specific_checker->{$attr_ln}
579 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
580 wakaba 1.1 }
581     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
582 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
583 wakaba 1.1 if ($checker) {
584 wakaba 1.40 $checker->($self, $attr, $item);
585 wakaba 1.1 } else {
586     $self->{onerror}->(node => $attr, level => 'unsupported',
587     type => 'attribute');
588     ## ISSUE: No comformance createria for unknown attributes in the spec
589     }
590     }
591     };
592     }; # $GetHTMLAttrsChecker
593    
594 wakaba 1.40 my %HTMLChecker = (
595     %Whatpm::ContentChecker::AnyChecker,
596     check_attrs => $GetHTMLAttrsChecker->({}),
597     );
598    
599     my %HTMLEmptyChecker = (
600     %HTMLChecker,
601     check_child_element => sub {
602     my ($self, $item, $child_el, $child_nsuri, $child_ln,
603     $child_is_transparent, $element_state) = @_;
604     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
605     $self->{onerror}->(node => $child_el,
606     type => 'element not allowed:minus',
607     level => $self->{must_level});
608     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
609     #
610     } else {
611     $self->{onerror}->(node => $child_el,
612     type => 'element not allowed:empty',
613     level => $self->{must_level});
614     }
615     },
616     check_child_text => sub {
617     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
618     if ($has_significant) {
619     $self->{onerror}->(node => $child_node,
620     type => 'character not allowed:empty',
621     level => $self->{must_level});
622     }
623     },
624     );
625    
626     my %HTMLTextChecker = (
627     %HTMLChecker,
628     check_child_element => sub {
629     my ($self, $item, $child_el, $child_nsuri, $child_ln,
630     $child_is_transparent, $element_state) = @_;
631     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
632     $self->{onerror}->(node => $child_el,
633     type => 'element not allowed:minus',
634     level => $self->{must_level});
635     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
636     #
637     } else {
638     $self->{onerror}->(node => $child_el, type => 'element not allowed');
639     }
640     },
641     );
642    
643     my %HTMLProseContentChecker = (
644     %HTMLChecker,
645     check_child_element => sub {
646     my ($self, $item, $child_el, $child_nsuri, $child_ln,
647     $child_is_transparent, $element_state) = @_;
648     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
649     $self->{onerror}->(node => $child_el,
650     type => 'element not allowed:minus',
651     level => $self->{must_level});
652     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
653     #
654     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
655     if ($element_state->{has_non_style} or
656     not $child_el->has_attribute_ns (undef, 'scoped')) {
657     $self->{onerror}->(node => $child_el,
658     type => 'element not allowed:prose style',
659     level => $self->{must_level});
660     }
661     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
662     $element_state->{has_non_style} = 1;
663     } else {
664     $element_state->{has_non_style} = 1;
665     $self->{onerror}->(node => $child_el,
666     type => 'element not allowed:prose',
667     level => $self->{must_level})
668     }
669     },
670     check_child_text => sub {
671     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
672     if ($has_significant) {
673     $element_state->{has_non_style} = 1;
674     }
675     },
676     check_end => sub {
677     my ($self, $item, $element_state) = @_;
678     if ($element_state->{has_significant}) {
679     $item->{parent_state}->{has_significant} = 1;
680     } elsif ($item->{transparent}) {
681     #
682     } else {
683     $self->{onerror}->(node => $item->{node},
684     level => $self->{should_level},
685     type => 'no significant content');
686     }
687     },
688     );
689    
690     my %HTMLPhrasingContentChecker = (
691     %HTMLChecker,
692     check_child_element => sub {
693     my ($self, $item, $child_el, $child_nsuri, $child_ln,
694     $child_is_transparent, $element_state) = @_;
695     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
696     $self->{onerror}->(node => $child_el,
697     type => 'element not allowed:minus',
698     level => $self->{must_level});
699     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
700     #
701     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
702     #
703     } else {
704     $self->{onerror}->(node => $child_el,
705     type => 'element not allowed:phrasing',
706     level => $self->{must_level});
707     }
708     },
709     check_end => $HTMLProseContentChecker{check_end},
710     ## NOTE: The definition for |li| assumes that the only differences
711     ## between prose and phrasing content checkers are |check_child_element|
712     ## and |check_child_text|.
713     );
714    
715     my %HTMLTransparentChecker = %HTMLProseContentChecker;
716     ## ISSUE: Significant content rule should be applied to transparent element
717     ## with parent? Currently, applied to |video| but not to others.
718    
719 wakaba 1.1 our $Element;
720     our $ElementDefault;
721    
722     $Element->{$HTML_NS}->{''} = {
723 wakaba 1.40 %HTMLChecker,
724     check_start => $ElementDefault->{check_start},
725 wakaba 1.1 };
726    
727     $Element->{$HTML_NS}->{html} = {
728     is_root => 1,
729 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
730 wakaba 1.16 manifest => $HTMLURIAttrChecker,
731 wakaba 1.1 xmlns => sub {
732     my ($self, $attr) = @_;
733     my $value = $attr->value;
734     unless ($value eq $HTML_NS) {
735     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
736     }
737     unless ($attr->owner_document->manakai_is_html) {
738     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
739     ## TODO: Test
740     }
741     },
742     }),
743 wakaba 1.40 check_start => sub {
744     my ($self, $item, $element_state) = @_;
745     $element_state->{phase} = 'before head';
746     },
747     check_child_element => sub {
748     my ($self, $item, $child_el, $child_nsuri, $child_ln,
749     $child_is_transparent, $element_state) = @_;
750     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
751     $self->{onerror}->(node => $child_el,
752     type => 'element not allowed:minus',
753     level => $self->{must_level});
754     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
755     #
756     } elsif ($element_state->{phase} eq 'before head') {
757     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
758     $element_state->{phase} = 'after head';
759     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
760     $self->{onerror}->(node => $child_el,
761     type => 'ps element missing:head');
762     $element_state->{phase} = 'after body';
763     } else {
764     $self->{onerror}->(node => $child_el,
765     type => 'element not allowed');
766     }
767     } elsif ($element_state->{phase} eq 'after head') {
768     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
769     $element_state->{phase} = 'after body';
770     } else {
771     $self->{onerror}->(node => $child_el,
772     type => 'element not allowed');
773     }
774     } elsif ($element_state->{phase} eq 'after body') {
775     $self->{onerror}->(node => $child_el,
776     type => 'element not allowed');
777     } else {
778     die "check_child_element: Bad |html| phase: $element_state->{phase}";
779     }
780     },
781     check_child_text => sub {
782     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
783     if ($has_significant) {
784     $self->{onerror}->(node => $child_node,
785     type => 'character not allowed');
786     }
787     },
788     check_end => sub {
789     my ($self, $item, $element_state) = @_;
790     if ($element_state->{phase} eq 'after body') {
791     #
792     } elsif ($element_state->{phase} eq 'before head') {
793     $self->{onerror}->(node => $item->{node},
794     type => 'child element missing:head');
795     $self->{onerror}->(node => $item->{node},
796     type => 'child element missing:body');
797     } elsif ($element_state->{phase} eq 'after head') {
798     $self->{onerror}->(node => $item->{node},
799     type => 'child element missing:body');
800     } else {
801     die "check_end: Bad |html| phase: $element_state->{phase}";
802     }
803 wakaba 1.1
804 wakaba 1.40 $HTMLChecker{check_end}->(@_);
805     },
806     };
807 wakaba 1.25
808 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
809     check_attrs => $GetHTMLAttrsChecker->({}),
810     check_child_element => sub {
811     my ($self, $item, $child_el, $child_nsuri, $child_ln,
812     $child_is_transparent, $element_state) = @_;
813     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
814     $self->{onerror}->(node => $child_el,
815     type => 'element not allowed:minus',
816     level => $self->{must_level});
817     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
818     #
819     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
820     unless ($element_state->{has_title}) {
821     $element_state->{has_title} = 1;
822     } else {
823     $self->{onerror}->(node => $child_el,
824     type => 'element not allowed:head title',
825     level => $self->{must_level});
826     }
827     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
828     if ($child_el->has_attribute_ns (undef, 'scoped')) {
829     $self->{onerror}->(node => $child_el,
830     type => 'element not allowed:head style',
831     level => $self->{must_level});
832 wakaba 1.1 }
833 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
834     #
835    
836     ## NOTE: |meta| is a metadata content. However, strictly speaking,
837     ## a |meta| element with none of |charset|, |name|,
838     ## or |http-equiv| attribute is not allowed. It is non-conforming
839     ## anyway.
840     } else {
841     $self->{onerror}->(node => $child_el,
842     type => 'element not allowed:metadata',
843     level => $self->{must_level});
844     }
845     $element_state->{in_head_original} = $self->{flag}->{in_head};
846     $self->{flag}->{in_head} = 1;
847     },
848     check_child_text => sub {
849     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
850     if ($has_significant) {
851     $self->{onerror}->(node => $child_node, type => 'character not allowed');
852 wakaba 1.1 }
853 wakaba 1.40 },
854     check_end => sub {
855     my ($self, $item, $element_state) = @_;
856     unless ($element_state->{has_title}) {
857     $self->{onerror}->(node => $item->{node},
858     type => 'child element missing:title');
859 wakaba 1.1 }
860 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
861 wakaba 1.1
862 wakaba 1.40 $HTMLChecker{check_end}->(@_);
863 wakaba 1.1 },
864     };
865    
866 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
867     %HTMLTextChecker,
868     };
869 wakaba 1.1
870 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
871     %HTMLEmptyChecker,
872     check_attrs => sub {
873     my ($self, $item, $element_state) = @_;
874 wakaba 1.1
875 wakaba 1.40 if ($self->{has_base}) {
876     $self->{onerror}->(node => $item->{node},
877     type => 'element not allowed:base');
878     } else {
879     $self->{has_base} = 1;
880 wakaba 1.29 }
881    
882 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
883     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
884 wakaba 1.14
885     if ($self->{has_uri_attr} and $has_href) {
886 wakaba 1.4 ## ISSUE: Are these examples conforming?
887     ## <head profile="a b c"><base href> (except for |profile|'s
888     ## non-conformance)
889     ## <title xml:base="relative"/><base href/> (maybe it should be)
890     ## <unknown xmlns="relative"/><base href/> (assuming that
891     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
892     ## <style>@import 'relative';</style><base href>
893     ## <script>location.href = 'relative';</script><base href>
894 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
895     ## an exception.
896 wakaba 1.40 $self->{onerror}->(node => $item->{node},
897 wakaba 1.4 type => 'basehref after URI attribute');
898     }
899 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
900 wakaba 1.4 ## ISSUE: Are these examples conforming?
901     ## <head><title xlink:href=""/><base target="name"/></head>
902     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
903     ## (assuming that |xbl:xbl| is allowed before |base|)
904     ## NOTE: These are non-conformant anyway because of |head|'s content model:
905     ## <link href=""/><base target="name"/>
906     ## <link rel=unknown href=""><base target=name>
907 wakaba 1.40 $self->{onerror}->(node => $item->{node},
908 wakaba 1.4 type => 'basetarget after hyperlink');
909     }
910    
911 wakaba 1.14 if (not $has_href and not $has_target) {
912 wakaba 1.40 $self->{onerror}->(node => $item->{node},
913 wakaba 1.14 type => 'attribute missing:href|target');
914     }
915    
916 wakaba 1.4 return $GetHTMLAttrsChecker->({
917     href => $HTMLURIAttrChecker,
918     target => $HTMLTargetAttrChecker,
919 wakaba 1.40 })->($self, $item, $element_state);
920 wakaba 1.4 },
921 wakaba 1.1 };
922    
923     $Element->{$HTML_NS}->{link} = {
924 wakaba 1.40 %HTMLEmptyChecker,
925     check_attrs => sub {
926     my ($self, $item, $element_state) = @_;
927 wakaba 1.1 $GetHTMLAttrsChecker->({
928     href => $HTMLURIAttrChecker,
929 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
930 wakaba 1.1 media => $HTMLMQAttrChecker,
931     hreflang => $HTMLLanguageTagAttrChecker,
932     type => $HTMLIMTAttrChecker,
933     ## NOTE: Though |title| has special semantics,
934     ## syntactically same as the |title| as global attribute.
935 wakaba 1.40 })->($self, $item, $element_state);
936     if ($item->{node}->has_attribute_ns (undef, 'href')) {
937     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
938 wakaba 1.4 } else {
939 wakaba 1.40 $self->{onerror}->(node => $item->{node},
940 wakaba 1.1 type => 'attribute missing:href');
941     }
942 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
943     $self->{onerror}->(node => $item->{node},
944 wakaba 1.1 type => 'attribute missing:rel');
945     }
946     },
947     };
948    
949     $Element->{$HTML_NS}->{meta} = {
950 wakaba 1.40 %HTMLEmptyChecker,
951     check_attrs => sub {
952     my ($self, $item, $element_state) = @_;
953 wakaba 1.1 my $name_attr;
954     my $http_equiv_attr;
955     my $charset_attr;
956     my $content_attr;
957 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
958 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
959     $attr_ns = '' unless defined $attr_ns;
960     my $attr_ln = $attr->manakai_local_name;
961     my $checker;
962     if ($attr_ns eq '') {
963     if ($attr_ln eq 'content') {
964     $content_attr = $attr;
965     $checker = 1;
966     } elsif ($attr_ln eq 'name') {
967     $name_attr = $attr;
968     $checker = 1;
969     } elsif ($attr_ln eq 'http-equiv') {
970     $http_equiv_attr = $attr;
971     $checker = 1;
972     } elsif ($attr_ln eq 'charset') {
973     $charset_attr = $attr;
974     $checker = 1;
975     } else {
976     $checker = $HTMLAttrChecker->{$attr_ln}
977     || $AttrChecker->{$attr_ns}->{$attr_ln}
978     || $AttrChecker->{$attr_ns}->{''};
979     }
980     } else {
981     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
982     || $AttrChecker->{$attr_ns}->{''};
983     }
984     if ($checker) {
985     $checker->($self, $attr) if ref $checker;
986     } else {
987     $self->{onerror}->(node => $attr, level => 'unsupported',
988     type => 'attribute');
989     ## ISSUE: No comformance createria for unknown attributes in the spec
990     }
991     }
992    
993     if (defined $name_attr) {
994     if (defined $http_equiv_attr) {
995     $self->{onerror}->(node => $http_equiv_attr,
996     type => 'attribute not allowed');
997     } elsif (defined $charset_attr) {
998     $self->{onerror}->(node => $charset_attr,
999     type => 'attribute not allowed');
1000     }
1001     my $metadata_name = $name_attr->value;
1002     my $metadata_value;
1003     if (defined $content_attr) {
1004     $metadata_value = $content_attr->value;
1005     } else {
1006 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1007 wakaba 1.1 type => 'attribute missing:content');
1008     $metadata_value = '';
1009     }
1010     } elsif (defined $http_equiv_attr) {
1011     if (defined $charset_attr) {
1012     $self->{onerror}->(node => $charset_attr,
1013     type => 'attribute not allowed');
1014     }
1015     unless (defined $content_attr) {
1016 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1017 wakaba 1.1 type => 'attribute missing:content');
1018     }
1019     } elsif (defined $charset_attr) {
1020     if (defined $content_attr) {
1021     $self->{onerror}->(node => $content_attr,
1022     type => 'attribute not allowed');
1023     }
1024     } else {
1025     if (defined $content_attr) {
1026     $self->{onerror}->(node => $content_attr,
1027     type => 'attribute not allowed');
1028 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1029 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1030     } else {
1031 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1032 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1033     }
1034     }
1035    
1036 wakaba 1.32 my $check_charset_decl = sub () {
1037 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1038 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1039     for my $el (@{$parent->child_nodes}) {
1040     next unless $el->node_type == 1; # ELEMENT_NODE
1041 wakaba 1.40 unless ($el eq $item->{node}) {
1042 wakaba 1.29 ## NOTE: Not the first child element.
1043 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1044 wakaba 1.32 type => 'element not allowed:meta charset',
1045     level => $self->{must_level});
1046 wakaba 1.29 }
1047     last;
1048     ## NOTE: Entity references are not supported.
1049     }
1050     } else {
1051 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1052 wakaba 1.32 type => 'element not allowed:meta charset',
1053     level => $self->{must_level});
1054 wakaba 1.29 }
1055    
1056 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1057     $self->{onerror}->(node => $item->{node},
1058 wakaba 1.32 type => 'in XML:charset',
1059     level => $self->{must_level});
1060 wakaba 1.1 }
1061 wakaba 1.32 }; # $check_charset_decl
1062 wakaba 1.21
1063 wakaba 1.32 my $check_charset = sub ($$) {
1064     my ($attr, $charset_value) = @_;
1065 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1066     ## is not explicitly spelled in the HTML5 spec, the Character Set
1067     ## registry of IANA, which is referenced from HTML5 spec, says that
1068     ## charset name is case-insensitive.
1069     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1070    
1071     require Message::Charset::Info;
1072     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1073 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1074 wakaba 1.21 if (defined $ic) {
1075     ## TODO: Test for this case
1076     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1077     if ($charset ne $ic_charset) {
1078 wakaba 1.32 $self->{onerror}->(node => $attr,
1079 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1080 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1081     level => $self->{must_level});
1082 wakaba 1.21 }
1083     } else {
1084     ## NOTE: MUST, but not checkable, since the document is not originally
1085     ## in serialized form (or the parser does not preserve the input
1086     ## encoding information).
1087 wakaba 1.32 $self->{onerror}->(node => $attr,
1088     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1089 wakaba 1.21 level => 'unsupported');
1090     }
1091    
1092     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1093     ## Syntactically valid and registered? What about x-charset names?
1094     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1095     ($charset_value)) {
1096 wakaba 1.32 $self->{onerror}->(node => $attr,
1097     type => 'charset:syntax error:'.$charset_value, ## TODO
1098     level => $self->{must_level});
1099 wakaba 1.21 }
1100    
1101     if ($charset) {
1102     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1103     ## with no "preferred MIME name" label)?
1104     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1105     if (($charset_status &
1106     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1107     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1108 wakaba 1.32 $self->{onerror}->(node => $attr,
1109 wakaba 1.21 type => 'charset:not preferred:'.
1110 wakaba 1.32 $charset_value, ## TODO
1111     level => $self->{must_level});
1112 wakaba 1.21 }
1113     if (($charset_status &
1114     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1115     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1116     if ($charset_value =~ /^x-/) {
1117 wakaba 1.32 $self->{onerror}->(node => $attr,
1118     type => 'charset:private:'.$charset_value, ## TODO
1119 wakaba 1.21 level => $self->{good_level});
1120     } else {
1121 wakaba 1.32 $self->{onerror}->(node => $attr,
1122 wakaba 1.21 type => 'charset:not registered:'.
1123 wakaba 1.32 $charset_value, ## TODO
1124 wakaba 1.21 level => $self->{good_level});
1125     }
1126     }
1127     } elsif ($charset_value =~ /^x-/) {
1128 wakaba 1.32 $self->{onerror}->(node => $attr,
1129     type => 'charset:private:'.$charset_value, ## TODO
1130 wakaba 1.21 level => $self->{good_level});
1131     } else {
1132 wakaba 1.32 $self->{onerror}->(node => $attr,
1133     type => 'charset:not registered:'.$charset_value, ## TODO
1134 wakaba 1.21 level => $self->{good_level});
1135     }
1136    
1137 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1138     $self->{onerror}->(node => $attr,
1139 wakaba 1.22 type => 'character reference in charset',
1140     level => $self->{must_level});
1141     }
1142 wakaba 1.32 }; # $check_charset
1143    
1144     ## TODO: metadata conformance
1145    
1146     ## TODO: pragma conformance
1147     if (defined $http_equiv_attr) { ## An enumerated attribute
1148     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1149     if ({
1150     'refresh' => 1,
1151     'default-style' => 1,
1152     }->{$keyword}) {
1153     #
1154 wakaba 1.33
1155     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1156 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1157 wakaba 1.33 ## ISSUE: Though it is renamed as "Encoding declaration" state in rev
1158     ## 1221, there are still many occurence of "Content-Type" state in
1159     ## the spec.
1160    
1161 wakaba 1.32 $check_charset_decl->();
1162     if ($content_attr) {
1163     my $content = $content_attr->value;
1164     if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) {
1165     $check_charset->($content_attr, $1);
1166     } else {
1167     $self->{onerror}->(node => $content_attr,
1168     type => 'meta content-type syntax error',
1169     level => $self->{must_level});
1170     }
1171     }
1172     } else {
1173     $self->{onerror}->(node => $http_equiv_attr,
1174     type => 'enumerated:invalid');
1175     }
1176     }
1177    
1178     if (defined $charset_attr) {
1179     $check_charset_decl->();
1180     $check_charset->($charset_attr, $charset_attr->value);
1181 wakaba 1.1 }
1182     },
1183     };
1184    
1185     $Element->{$HTML_NS}->{style} = {
1186 wakaba 1.40 %HTMLChecker,
1187     check_attrs => $GetHTMLAttrsChecker->({
1188 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1189     media => $HTMLMQAttrChecker,
1190     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1191     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1192     ## not different
1193     }),
1194 wakaba 1.40 check_start => sub {
1195     my ($self, $item, $element_state) = @_;
1196    
1197 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1198 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1199 wakaba 1.27 if (not defined $type or
1200     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1201 wakaba 1.40 $element_state->{allow_element} = 0;
1202     $element_state->{style_type} = 'text/css';
1203     } else {
1204     $element_state->{allow_element} = 1; # unknown
1205     $element_state->{style_type} = $type; ## TODO: $type normalization
1206     }
1207     },
1208     check_child_element => sub {
1209     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1210     $child_is_transparent, $element_state) = @_;
1211     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1212     $self->{onerror}->(node => $child_el,
1213     type => 'element not allowed:minus',
1214     level => $self->{must_level});
1215     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1216     #
1217     } elsif ($element_state->{allow_element}) {
1218     #
1219     } else {
1220     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1221     }
1222     },
1223     check_child_text => sub {
1224     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1225     $element_state->{text} .= $child_node->text_content;
1226     },
1227     check_end => sub {
1228     my ($self, $item, $element_state) = @_;
1229     if ($element_state->{style_type} eq 'text/css') {
1230     $self->{onsubdoc}->({s => $element_state->{text},
1231     container_node => $item->{node},
1232 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1233 wakaba 1.27 } else {
1234 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1235     type => 'style:'.$element_state->{style_type});
1236 wakaba 1.27 }
1237 wakaba 1.40
1238     $HTMLChecker{check_end}->(@_);
1239 wakaba 1.1 },
1240     };
1241 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1242 wakaba 1.1
1243     $Element->{$HTML_NS}->{body} = {
1244 wakaba 1.40 %HTMLProseContentChecker,
1245 wakaba 1.1 };
1246    
1247     $Element->{$HTML_NS}->{section} = {
1248 wakaba 1.40 %HTMLProseContentChecker,
1249 wakaba 1.1 };
1250    
1251     $Element->{$HTML_NS}->{nav} = {
1252 wakaba 1.40 %HTMLProseContentChecker,
1253 wakaba 1.1 };
1254    
1255     $Element->{$HTML_NS}->{article} = {
1256 wakaba 1.40 %HTMLProseContentChecker,
1257 wakaba 1.1 };
1258    
1259     $Element->{$HTML_NS}->{blockquote} = {
1260 wakaba 1.40 %HTMLProseContentChecker,
1261     check_attrs => $GetHTMLAttrsChecker->({
1262 wakaba 1.1 cite => $HTMLURIAttrChecker,
1263     }),
1264     };
1265    
1266     $Element->{$HTML_NS}->{aside} = {
1267 wakaba 1.40 %HTMLProseContentChecker,
1268 wakaba 1.1 };
1269    
1270     $Element->{$HTML_NS}->{h1} = {
1271 wakaba 1.40 %HTMLPhrasingContentChecker,
1272     check_start => sub {
1273     my ($self, $item, $element_state) = @_;
1274     $self->{flag}->{has_hn} = 1;
1275 wakaba 1.1 },
1276     };
1277    
1278 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1279 wakaba 1.1
1280 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1281 wakaba 1.1
1282 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1283 wakaba 1.1
1284 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1285 wakaba 1.1
1286 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1287 wakaba 1.1
1288 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1289    
1290 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1291 wakaba 1.40 %HTMLProseContentChecker,
1292     check_start => sub {
1293     my ($self, $item, $element_state) = @_;
1294     $self->_add_minus_elements ($element_state,
1295     {$HTML_NS => {qw/header 1 footer 1/}},
1296     $HTMLSectioningContent);
1297     $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1298     $self->{flag}->{has_hn} = 0;
1299     },
1300     check_end => sub {
1301     my ($self, $item, $element_state) = @_;
1302     $self->_remove_minus_elements ($element_state);
1303     unless ($self->{flag}->{has_hn}) {
1304     $self->{onerror}->(node => $item->{node},
1305     type => 'element missing:hn');
1306     }
1307     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1308 wakaba 1.1
1309 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1310 wakaba 1.1 },
1311 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1312 wakaba 1.1 };
1313    
1314     $Element->{$HTML_NS}->{footer} = {
1315 wakaba 1.40 %HTMLProseContentChecker,
1316     check_start => sub {
1317     my ($self, $item, $element_state) = @_;
1318     $self->_add_minus_elements ($element_state,
1319     {$HTML_NS => {footer => 1}},
1320     $HTMLSectioningContent, $HTMLHeadingContent);
1321     },
1322     check_end => sub {
1323     my ($self, $item, $element_state) = @_;
1324     $self->_remove_minus_elements ($element_state);
1325 wakaba 1.1
1326 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1327 wakaba 1.1 },
1328     };
1329    
1330     $Element->{$HTML_NS}->{address} = {
1331 wakaba 1.40 %HTMLProseContentChecker,
1332     check_start => sub {
1333     my ($self, $item, $element_state) = @_;
1334     $self->_add_minus_elements ($element_state,
1335     {$HTML_NS => {footer => 1, address => 1}},
1336     $HTMLSectioningContent, $HTMLHeadingContent);
1337     },
1338     check_end => sub {
1339     my ($self, $item, $element_state) = @_;
1340     $self->_remove_minus_elements ($element_state);
1341 wakaba 1.29
1342 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1343 wakaba 1.29 },
1344 wakaba 1.1 };
1345    
1346     $Element->{$HTML_NS}->{p} = {
1347 wakaba 1.40 %HTMLPhrasingContentChecker,
1348 wakaba 1.1 };
1349    
1350     $Element->{$HTML_NS}->{hr} = {
1351 wakaba 1.40 %HTMLEmptyChecker,
1352 wakaba 1.1 };
1353    
1354     $Element->{$HTML_NS}->{br} = {
1355 wakaba 1.40 %HTMLEmptyChecker,
1356 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1357     ## (This requirement is semantic so that we cannot check.)
1358 wakaba 1.1 };
1359    
1360     $Element->{$HTML_NS}->{dialog} = {
1361 wakaba 1.40 %HTMLChecker,
1362     check_start => sub {
1363     my ($self, $item, $element_state) = @_;
1364     $element_state->{phase} = 'before dt';
1365     },
1366     check_child_element => sub {
1367     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1368     $child_is_transparent, $element_state) = @_;
1369     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1370     $self->{onerror}->(node => $child_el,
1371     type => 'element not allowed:minus',
1372     level => $self->{must_level});
1373     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1374     #
1375     } elsif ($element_state->{phase} eq 'before dt') {
1376     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1377     $element_state->{phase} = 'before dd';
1378     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1379     $self->{onerror}
1380     ->(node => $child_el, type => 'ps element missing:dt');
1381     $element_state->{phase} = 'before dt';
1382     } else {
1383     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1384     }
1385     } elsif ($element_state->{phase} eq 'before dd') {
1386     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1387     $element_state->{phase} = 'before dt';
1388     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1389     $self->{onerror}
1390     ->(node => $child_el, type => 'ps element missing:dd');
1391     $element_state->{phase} = 'before dd';
1392     } else {
1393     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1394 wakaba 1.1 }
1395 wakaba 1.40 } else {
1396     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1397     }
1398     },
1399     check_child_text => sub {
1400     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1401     if ($has_significant) {
1402     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1403 wakaba 1.1 }
1404 wakaba 1.40 },
1405     check_end => sub {
1406     my ($self, $item, $element_state) = @_;
1407     if ($element_state->{phase} eq 'before dd') {
1408     $self->{onerror}->(node => $item->{node},
1409     type => 'child element missing:dd');
1410 wakaba 1.1 }
1411 wakaba 1.40
1412     $HTMLChecker{check_end}->(@_);
1413 wakaba 1.1 },
1414     };
1415    
1416     $Element->{$HTML_NS}->{pre} = {
1417 wakaba 1.40 %HTMLPhrasingContentChecker,
1418 wakaba 1.1 };
1419    
1420     $Element->{$HTML_NS}->{ol} = {
1421 wakaba 1.40 %HTMLChecker,
1422     check_attrs => $GetHTMLAttrsChecker->({
1423 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1424     }),
1425 wakaba 1.40 check_child_element => sub {
1426     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1427     $child_is_transparent, $element_state) = @_;
1428     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1429     $self->{onerror}->(node => $child_el,
1430     type => 'element not allowed:minus',
1431     level => $self->{must_level});
1432     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1433     #
1434     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1435     #
1436     } else {
1437     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1438 wakaba 1.1 }
1439 wakaba 1.40 },
1440     check_child_text => sub {
1441     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1442     if ($has_significant) {
1443     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1444 wakaba 1.1 }
1445     },
1446     };
1447    
1448     $Element->{$HTML_NS}->{ul} = {
1449 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1450 wakaba 1.1 };
1451    
1452     $Element->{$HTML_NS}->{li} = {
1453 wakaba 1.40 %HTMLProseContentChecker,
1454     check_attrs => $GetHTMLAttrsChecker->({
1455 wakaba 1.1 start => sub {
1456     my ($self, $attr) = @_;
1457     my $parent = $attr->owner_element->manakai_parent_element;
1458     if (defined $parent) {
1459     my $parent_ns = $parent->namespace_uri;
1460     $parent_ns = '' unless defined $parent_ns;
1461     my $parent_ln = $parent->manakai_local_name;
1462     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1463     $self->{onerror}->(node => $attr, level => 'unsupported',
1464     type => 'attribute');
1465     }
1466     }
1467     $HTMLIntegerAttrChecker->($self, $attr);
1468     },
1469     }),
1470 wakaba 1.40 check_child_element => sub {
1471     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1472     $child_is_transparent, $element_state) = @_;
1473     if ($self->{flag}->{in_menu}) {
1474     $HTMLPhrasingContentChecker{check_child_element}->(@_);
1475     } else {
1476     $HTMLProseContentChecker{check_child_element}->(@_);
1477     }
1478     },
1479     check_child_text => sub {
1480     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1481     if ($self->{flag}->{in_menu}) {
1482     $HTMLPhrasingContentChecker{check_child_text}->(@_);
1483 wakaba 1.1 } else {
1484 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
1485 wakaba 1.1 }
1486     },
1487     };
1488    
1489     $Element->{$HTML_NS}->{dl} = {
1490 wakaba 1.40 %HTMLChecker,
1491     check_start => sub {
1492     my ($self, $item, $element_state) = @_;
1493     $element_state->{phase} = 'before dt';
1494     },
1495     check_child_element => sub {
1496     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1497     $child_is_transparent, $element_state) = @_;
1498     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1499     $self->{onerror}->(node => $child_el,
1500     type => 'element not allowed:minus',
1501     level => $self->{must_level});
1502     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1503     #
1504     } elsif ($element_state->{phase} eq 'in dds') {
1505     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1506     #$element_state->{phase} = 'in dds';
1507     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1508     $element_state->{phase} = 'in dts';
1509     } else {
1510     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1511     }
1512     } elsif ($element_state->{phase} eq 'in dts') {
1513     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1514     #$element_state->{phase} = 'in dts';
1515     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1516     $element_state->{phase} = 'in dds';
1517     } else {
1518     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1519     }
1520     } elsif ($element_state->{phase} eq 'before dt') {
1521     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1522     $element_state->{phase} = 'in dts';
1523     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1524     $self->{onerror}
1525     ->(node => $child_el, type => 'ps element missing:dt');
1526     $element_state->{phase} = 'in dds';
1527     } else {
1528     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1529 wakaba 1.1 }
1530 wakaba 1.40 } else {
1531     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
1532 wakaba 1.1 }
1533 wakaba 1.40 },
1534     check_child_text => sub {
1535     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1536     if ($has_significant) {
1537     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1538     }
1539     },
1540     check_end => sub {
1541     my ($self, $item, $element_state) = @_;
1542     if ($element_state->{phase} eq 'in dts') {
1543     $self->{onerror}->(node => $item->{node},
1544     type => 'child element missing:dd');
1545 wakaba 1.1 }
1546    
1547 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1548 wakaba 1.1 },
1549     };
1550    
1551     $Element->{$HTML_NS}->{dt} = {
1552 wakaba 1.40 %HTMLPhrasingContentChecker,
1553 wakaba 1.1 };
1554    
1555     $Element->{$HTML_NS}->{dd} = {
1556 wakaba 1.40 %HTMLProseContentChecker,
1557 wakaba 1.1 };
1558    
1559     $Element->{$HTML_NS}->{a} = {
1560 wakaba 1.40 %HTMLPhrasingContentChecker,
1561     check_attrs => sub {
1562     my ($self, $item, $element_state) = @_;
1563 wakaba 1.1 my %attr;
1564 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1565 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1566     $attr_ns = '' unless defined $attr_ns;
1567     my $attr_ln = $attr->manakai_local_name;
1568     my $checker;
1569     if ($attr_ns eq '') {
1570     $checker = {
1571     target => $HTMLTargetAttrChecker,
1572     href => $HTMLURIAttrChecker,
1573     ping => $HTMLSpaceURIsAttrChecker,
1574 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
1575 wakaba 1.1 media => $HTMLMQAttrChecker,
1576     hreflang => $HTMLLanguageTagAttrChecker,
1577     type => $HTMLIMTAttrChecker,
1578     }->{$attr_ln};
1579     if ($checker) {
1580     $attr{$attr_ln} = $attr;
1581     } else {
1582     $checker = $HTMLAttrChecker->{$attr_ln};
1583     }
1584     }
1585     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1586     || $AttrChecker->{$attr_ns}->{''};
1587     if ($checker) {
1588     $checker->($self, $attr) if ref $checker;
1589     } else {
1590     $self->{onerror}->(node => $attr, level => 'unsupported',
1591     type => 'attribute');
1592     ## ISSUE: No comformance createria for unknown attributes in the spec
1593     }
1594     }
1595    
1596 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
1597 wakaba 1.4 if (defined $attr{href}) {
1598     $self->{has_hyperlink_element} = 1;
1599 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
1600 wakaba 1.4 } else {
1601 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1602     if (defined $attr{$_}) {
1603     $self->{onerror}->(node => $attr{$_},
1604     type => 'attribute not allowed');
1605     }
1606     }
1607     }
1608     },
1609 wakaba 1.40 check_start => sub {
1610     my ($self, $item, $element_state) = @_;
1611     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
1612     },
1613     check_end => sub {
1614     my ($self, $item, $element_state) = @_;
1615     $self->_remove_minus_elements ($element_state);
1616 wakaba 1.1
1617 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1618 wakaba 1.1 },
1619     };
1620    
1621     $Element->{$HTML_NS}->{q} = {
1622 wakaba 1.40 %HTMLPhrasingContentChecker,
1623     check_attrs => $GetHTMLAttrsChecker->({
1624 wakaba 1.1 cite => $HTMLURIAttrChecker,
1625     }),
1626     };
1627    
1628     $Element->{$HTML_NS}->{cite} = {
1629 wakaba 1.40 %HTMLPhrasingContentChecker,
1630 wakaba 1.1 };
1631    
1632     $Element->{$HTML_NS}->{em} = {
1633 wakaba 1.40 %HTMLPhrasingContentChecker,
1634 wakaba 1.1 };
1635    
1636     $Element->{$HTML_NS}->{strong} = {
1637 wakaba 1.40 %HTMLPhrasingContentChecker,
1638 wakaba 1.1 };
1639    
1640     $Element->{$HTML_NS}->{small} = {
1641 wakaba 1.40 %HTMLPhrasingContentChecker,
1642 wakaba 1.1 };
1643    
1644 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
1645 wakaba 1.40 %HTMLPhrasingContentChecker,
1646 wakaba 1.1 };
1647    
1648     $Element->{$HTML_NS}->{dfn} = {
1649 wakaba 1.40 %HTMLPhrasingContentChecker,
1650     check_start => sub {
1651     my ($self, $item, $element_state) = @_;
1652     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
1653 wakaba 1.1
1654 wakaba 1.40 my $node = $item->{node};
1655 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
1656     unless (defined $term) {
1657     for my $child (@{$node->child_nodes}) {
1658     if ($child->node_type == 1) { # ELEMENT_NODE
1659     if (defined $term) {
1660     undef $term;
1661     last;
1662     } elsif ($child->manakai_local_name eq 'abbr') {
1663     my $nsuri = $child->namespace_uri;
1664     if (defined $nsuri and $nsuri eq $HTML_NS) {
1665     my $attr = $child->get_attribute_node_ns (undef, 'title');
1666     if ($attr) {
1667     $term = $attr->value;
1668     }
1669     }
1670     }
1671     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1672     ## TEXT_NODE or CDATA_SECTION_NODE
1673     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1674     next;
1675     }
1676     undef $term;
1677     last;
1678     }
1679     }
1680     unless (defined $term) {
1681     $term = $node->text_content;
1682     }
1683     }
1684     if ($self->{term}->{$term}) {
1685     $self->{onerror}->(node => $node, type => 'duplicate term');
1686     push @{$self->{term}->{$term}}, $node;
1687     } else {
1688     $self->{term}->{$term} = [$node];
1689     }
1690     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1691     ## has |title|.
1692 wakaba 1.40 },
1693     check_end => sub {
1694     my ($self, $item, $element_state) = @_;
1695     $self->_remove_minus_elements ($element_state);
1696 wakaba 1.1
1697 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1698 wakaba 1.1 },
1699     };
1700    
1701     $Element->{$HTML_NS}->{abbr} = {
1702 wakaba 1.40 %HTMLPhrasingContentChecker,
1703 wakaba 1.1 };
1704    
1705     $Element->{$HTML_NS}->{time} = {
1706 wakaba 1.40 %HTMLPhrasingContentChecker,
1707     check_attrs => $GetHTMLAttrsChecker->({
1708 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
1709     }),
1710     ## TODO: Write tests
1711 wakaba 1.40 check_end => sub {
1712     my ($self, $item, $element_state) = @_;
1713 wakaba 1.1
1714 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
1715 wakaba 1.1 my $input;
1716     my $reg_sp;
1717     my $input_node;
1718     if ($attr) {
1719     $input = $attr->value;
1720     $reg_sp = qr/[\x09-\x0D\x20]*/;
1721     $input_node = $attr;
1722     } else {
1723 wakaba 1.40 $input = $item->{node}->text_content;
1724 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
1725 wakaba 1.40 $input_node = $item->{node};
1726 wakaba 1.1
1727     ## ISSUE: What is the definition for "successfully extracts a date
1728     ## or time"? If the algorithm says the string is invalid but
1729     ## return some date or time, is it "successfully"?
1730     }
1731    
1732     my $hour;
1733     my $minute;
1734     my $second;
1735     if ($input =~ /
1736     \A
1737     [\x09-\x0D\x20]*
1738     ([0-9]+) # 1
1739     (?>
1740     -([0-9]+) # 2
1741     -([0-9]+) # 3
1742     [\x09-\x0D\x20]*
1743     (?>
1744     T
1745     [\x09-\x0D\x20]*
1746     )?
1747     ([0-9]+) # 4
1748     :([0-9]+) # 5
1749     (?>
1750     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1751     )?
1752     [\x09-\x0D\x20]*
1753     (?>
1754     Z
1755     [\x09-\x0D\x20]*
1756     |
1757     [+-]([0-9]+):([0-9]+) # 7, 8
1758     [\x09-\x0D\x20]*
1759     )?
1760     \z
1761     |
1762     :([0-9]+) # 9
1763     (?>
1764     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1765     )?
1766     [\x09-\x0D\x20]*\z
1767     )
1768     /x) {
1769     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1770     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1771     length $4 != 2 or length $5 != 2) {
1772     $self->{onerror}->(node => $input_node,
1773     type => 'dateortime:syntax error');
1774     }
1775    
1776     if (1 <= $2 and $2 <= 12) {
1777     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1778     if $3 < 1 or
1779     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1780     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1781     if $2 == 2 and $3 == 29 and
1782     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1783     } else {
1784     $self->{onerror}->(node => $input_node,
1785     type => 'datetime:bad month');
1786     }
1787    
1788     ($hour, $minute, $second) = ($4, $5, $6);
1789    
1790     if (defined $7) { ## [+-]hh:mm
1791     if (length $7 != 2 or length $8 != 2) {
1792     $self->{onerror}->(node => $input_node,
1793     type => 'dateortime:syntax error');
1794     }
1795    
1796     $self->{onerror}->(node => $input_node,
1797     type => 'datetime:bad timezone hour')
1798     if $7 > 23;
1799     $self->{onerror}->(node => $input_node,
1800     type => 'datetime:bad timezone minute')
1801     if $8 > 59;
1802     }
1803     } else { ## hh:mm
1804     if (length $1 != 2 or length $9 != 2) {
1805     $self->{onerror}->(node => $input_node,
1806     type => qq'dateortime:syntax error');
1807     }
1808    
1809     ($hour, $minute, $second) = ($1, $9, $10);
1810     }
1811    
1812     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
1813     if $hour > 23;
1814     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
1815     if $minute > 59;
1816    
1817     if (defined $second) { ## s
1818     ## NOTE: Integer part of second don't have to have length of two.
1819    
1820     if (substr ($second, 0, 1) eq '.') {
1821     $self->{onerror}->(node => $input_node,
1822     type => 'dateortime:syntax error');
1823     }
1824    
1825     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
1826     if $second >= 60;
1827     }
1828     } else {
1829     $self->{onerror}->(node => $input_node,
1830     type => 'dateortime:syntax error');
1831     }
1832    
1833 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1834 wakaba 1.1 },
1835     };
1836    
1837     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1838 wakaba 1.40 %HTMLPhrasingContentChecker,
1839     check_attrs => $GetHTMLAttrsChecker->({
1840 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1841     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1842     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1843     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1844     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1845     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1846     }),
1847     };
1848    
1849     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1850 wakaba 1.40 %HTMLPhrasingContentChecker,
1851     check_attrs => $GetHTMLAttrsChecker->({
1852 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1853     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1854     }),
1855     };
1856    
1857     $Element->{$HTML_NS}->{code} = {
1858 wakaba 1.40 %HTMLPhrasingContentChecker,
1859 wakaba 1.1 };
1860    
1861     $Element->{$HTML_NS}->{var} = {
1862 wakaba 1.40 %HTMLPhrasingContentChecker,
1863 wakaba 1.1 };
1864    
1865     $Element->{$HTML_NS}->{samp} = {
1866 wakaba 1.40 %HTMLPhrasingContentChecker,
1867 wakaba 1.1 };
1868    
1869     $Element->{$HTML_NS}->{kbd} = {
1870 wakaba 1.40 %HTMLPhrasingContentChecker,
1871 wakaba 1.1 };
1872    
1873     $Element->{$HTML_NS}->{sub} = {
1874 wakaba 1.40 %HTMLPhrasingContentChecker,
1875 wakaba 1.1 };
1876    
1877     $Element->{$HTML_NS}->{sup} = {
1878 wakaba 1.40 %HTMLPhrasingContentChecker,
1879 wakaba 1.1 };
1880    
1881     $Element->{$HTML_NS}->{span} = {
1882 wakaba 1.40 %HTMLPhrasingContentChecker,
1883 wakaba 1.1 };
1884    
1885     $Element->{$HTML_NS}->{i} = {
1886 wakaba 1.40 %HTMLPhrasingContentChecker,
1887 wakaba 1.1 };
1888    
1889     $Element->{$HTML_NS}->{b} = {
1890 wakaba 1.40 %HTMLPhrasingContentChecker,
1891 wakaba 1.1 };
1892    
1893     $Element->{$HTML_NS}->{bdo} = {
1894 wakaba 1.40 %HTMLPhrasingContentChecker,
1895     check_attrs => sub {
1896     my ($self, $item, $element_state) = @_;
1897     $GetHTMLAttrsChecker->({})->($self, $item, $element_state);
1898     unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
1899     $self->{onerror}->(node => $item->{node},
1900     type => 'attribute missing:dir');
1901 wakaba 1.1 }
1902     },
1903     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1904     };
1905    
1906 wakaba 1.29 =pod
1907    
1908     ## TODO:
1909    
1910     +
1911     + <p>Partly because of the confusion described above, authors are
1912     + strongly recommended to always mark up all paragraphs with the
1913     + <code>p</code> element, and to not have any <code>ins</code> or
1914     + <code>del</code> elements that cross across any <span
1915     + title="paragraph">implied paragraphs</span>.</p>
1916     +
1917     (An informative note)
1918    
1919     <p><code>ins</code> elements should not cross <span
1920     + title="paragraph">implied paragraph</span> boundaries.</p>
1921     (normative)
1922    
1923     + <p><code>del</code> elements should not cross <span
1924     + title="paragraph">implied paragraph</span> boundaries.</p>
1925     (normative)
1926    
1927     =cut
1928    
1929 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
1930 wakaba 1.40 %HTMLTransparentChecker,
1931     check_attrs => $GetHTMLAttrsChecker->({
1932 wakaba 1.1 cite => $HTMLURIAttrChecker,
1933     datetime => $HTMLDatetimeAttrChecker,
1934     }),
1935     };
1936    
1937     $Element->{$HTML_NS}->{del} = {
1938 wakaba 1.40 %HTMLTransparentChecker,
1939     check_attrs => $GetHTMLAttrsChecker->({
1940 wakaba 1.1 cite => $HTMLURIAttrChecker,
1941     datetime => $HTMLDatetimeAttrChecker,
1942     }),
1943 wakaba 1.40 check_end => sub {
1944     my ($self, $item, $element_state) = @_;
1945     if ($element_state->{has_significant}) {
1946     ## NOTE: Significantness flag does not propagate.
1947     } elsif ($item->{transparent}) {
1948     #
1949     } else {
1950     $self->{onerror}->(node => $item->{node},
1951     level => $self->{should_level},
1952     type => 'no significant content');
1953     }
1954 wakaba 1.1 },
1955     };
1956    
1957 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
1958 wakaba 1.40 %HTMLProseContentChecker,
1959 wakaba 1.41 ## NOTE: legend, Prose | Prose, legend
1960     check_child_element => sub {
1961     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1962     $child_is_transparent, $element_state) = @_;
1963     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1964     $self->{onerror}->(node => $child_el,
1965     type => 'element not allowed:minus',
1966     level => $self->{must_level});
1967     $element_state->{has_non_legend} = 1;
1968     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1969     #
1970     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
1971     if ($element_state->{has_legend_at_first}) {
1972     $self->{onerror}->(node => $child_el,
1973     type => 'element not allowed:figure legend',
1974     level => $self->{must_level});
1975     } elsif ($element_state->{has_legend}) {
1976     $self->{onerror}->(node => $element_state->{has_legend},
1977     type => 'element not allowed:figure legend',
1978     level => $self->{must_level});
1979     $element_state->{has_legend} = $child_el;
1980     } elsif ($element_state->{has_non_legend}) {
1981     $element_state->{has_legend} = $child_el;
1982     } else {
1983     $element_state->{has_legend_at_first} = 1;
1984 wakaba 1.35 }
1985 wakaba 1.41 delete $element_state->{has_non_legend};
1986     } else {
1987     $HTMLProseContentChecker{check_child_element}->(@_);
1988     $element_state->{has_non_legend} = 1;
1989     }
1990     },
1991     check_child_text => sub {
1992     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1993     if ($has_significant) {
1994     $element_state->{has_non_legend} = 1;
1995 wakaba 1.35 }
1996 wakaba 1.41 },
1997     check_end => sub {
1998     my ($self, $item, $element_state) = @_;
1999 wakaba 1.35
2000 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2001     #
2002     } elsif ($element_state->{has_legend}) {
2003     if ($element_state->{has_non_legend}) {
2004     $self->{onerror}->(node => $element_state->{has_legend},
2005 wakaba 1.35 type => 'element not allowed:figure legend',
2006     level => $self->{must_level});
2007     }
2008     } else {
2009 wakaba 1.41 $self->{onerror}->(node => $item->{node},
2010 wakaba 1.35 type => 'element missing:legend',
2011     level => $self->{must_level});
2012     }
2013 wakaba 1.41
2014     $HTMLProseContentChecker{check_end}->(@_);
2015     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2016 wakaba 1.35 },
2017     };
2018 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2019 wakaba 1.1
2020     $Element->{$HTML_NS}->{img} = {
2021 wakaba 1.40 %HTMLEmptyChecker,
2022     check_attrs => sub {
2023     my ($self, $item, $element_state) = @_;
2024 wakaba 1.1 $GetHTMLAttrsChecker->({
2025     alt => sub { }, ## NOTE: No syntactical requirement
2026     src => $HTMLURIAttrChecker,
2027     usemap => $HTMLUsemapAttrChecker,
2028     ismap => sub {
2029 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2030     if (not $self->{flag}->{in_a_href}) {
2031 wakaba 1.15 $self->{onerror}->(node => $attr,
2032     type => 'attribute not allowed:ismap');
2033 wakaba 1.1 }
2034 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2035 wakaba 1.1 },
2036     ## TODO: height
2037     ## TODO: width
2038 wakaba 1.40 })->($self, $item);
2039     unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2040     $self->{onerror}->(node => $item->{node},
2041 wakaba 1.37 type => 'attribute missing:alt',
2042     level => $self->{should_level});
2043 wakaba 1.1 }
2044 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2045     $self->{onerror}->(node => $item->{node},
2046     type => 'attribute missing:src');
2047 wakaba 1.1 }
2048     },
2049     };
2050    
2051     $Element->{$HTML_NS}->{iframe} = {
2052 wakaba 1.40 %HTMLTextChecker,
2053     check_attrs => $GetHTMLAttrsChecker->({
2054 wakaba 1.1 src => $HTMLURIAttrChecker,
2055     }),
2056 wakaba 1.40 };
2057    
2058 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2059 wakaba 1.40 %HTMLEmptyChecker,
2060     check_attrs => sub {
2061     my ($self, $item, $element_state) = @_;
2062 wakaba 1.1 my $has_src;
2063 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2064 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2065     $attr_ns = '' unless defined $attr_ns;
2066     my $attr_ln = $attr->manakai_local_name;
2067     my $checker;
2068     if ($attr_ns eq '') {
2069     if ($attr_ln eq 'src') {
2070     $checker = $HTMLURIAttrChecker;
2071     $has_src = 1;
2072     } elsif ($attr_ln eq 'type') {
2073     $checker = $HTMLIMTAttrChecker;
2074     } else {
2075     ## TODO: height
2076     ## TODO: width
2077     $checker = $HTMLAttrChecker->{$attr_ln}
2078     || sub { }; ## NOTE: Any local attribute is ok.
2079     }
2080     }
2081     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2082     || $AttrChecker->{$attr_ns}->{''};
2083     if ($checker) {
2084     $checker->($self, $attr);
2085     } else {
2086     $self->{onerror}->(node => $attr, level => 'unsupported',
2087     type => 'attribute');
2088     ## ISSUE: No comformance createria for global attributes in the spec
2089     }
2090     }
2091    
2092     unless ($has_src) {
2093 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2094 wakaba 1.1 type => 'attribute missing:src');
2095     }
2096     },
2097     };
2098    
2099     $Element->{$HTML_NS}->{object} = {
2100 wakaba 1.40 %HTMLTransparentChecker,
2101     check_attrs => sub {
2102     my ($self, $item, $element_state) = @_;
2103 wakaba 1.1 $GetHTMLAttrsChecker->({
2104     data => $HTMLURIAttrChecker,
2105     type => $HTMLIMTAttrChecker,
2106     usemap => $HTMLUsemapAttrChecker,
2107     ## TODO: width
2108     ## TODO: height
2109 wakaba 1.40 })->($self, $item);
2110     unless ($item->{node}->has_attribute_ns (undef, 'data')) {
2111     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
2112     $self->{onerror}->(node => $item->{node},
2113 wakaba 1.1 type => 'attribute missing:data|type');
2114     }
2115     }
2116     },
2117 wakaba 1.41 ## NOTE: param*, transparent (Prose)
2118     check_child_element => sub {
2119     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2120     $child_is_transparent, $element_state) = @_;
2121     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2122     $self->{onerror}->(node => $child_el,
2123     type => 'element not allowed:minus',
2124     level => $self->{must_level});
2125     $element_state->{has_non_legend} = 1;
2126     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2127     #
2128     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
2129     if ($element_state->{has_non_param}) {
2130     $self->{onerror}->(node => $child_el,
2131     type => 'element not allowed:prose',
2132     level => $self->{must_level});
2133 wakaba 1.39 }
2134 wakaba 1.41 } else {
2135     $HTMLProseContentChecker{check_child_element}->(@_);
2136     $element_state->{has_non_param} = 1;
2137 wakaba 1.39 }
2138 wakaba 1.25 },
2139 wakaba 1.41 check_child_text => sub {
2140     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2141     if ($has_significant) {
2142     $element_state->{has_non_param} = 1;
2143     }
2144 wakaba 1.42 },
2145     check_end => sub {
2146     my ($self, $item, $element_state) = @_;
2147     if ($element_state->{has_significant}) {
2148     $item->{parent_state}->{has_significant} = 1;
2149     } elsif ($item->{node}->manakai_parent_element) {
2150     ## NOTE: Transparent.
2151     } else {
2152     $self->{onerror}->(node => $item->{node},
2153     level => $self->{should_level},
2154     type => 'no significant content');
2155     }
2156     },
2157 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2158 wakaba 1.1 };
2159 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
2160     ## What about |<section><object data><style scoped></style>x</object></section>|?
2161     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
2162 wakaba 1.1
2163     $Element->{$HTML_NS}->{param} = {
2164 wakaba 1.40 %HTMLEmptyChecker,
2165     check_attrs => sub {
2166     my ($self, $item, $element_state) = @_;
2167 wakaba 1.1 $GetHTMLAttrsChecker->({
2168     name => sub { },
2169     value => sub { },
2170 wakaba 1.40 })->($self, $item);
2171     unless ($item->{node}->has_attribute_ns (undef, 'name')) {
2172     $self->{onerror}->(node => $item->{node},
2173 wakaba 1.1 type => 'attribute missing:name');
2174     }
2175 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
2176     $self->{onerror}->(node => $item->{node},
2177 wakaba 1.1 type => 'attribute missing:value');
2178     }
2179     },
2180     };
2181    
2182     $Element->{$HTML_NS}->{video} = {
2183 wakaba 1.40 %HTMLTransparentChecker,
2184     check_attrs => $GetHTMLAttrsChecker->({
2185 wakaba 1.1 src => $HTMLURIAttrChecker,
2186     ## TODO: start, loopstart, loopend, end
2187     ## ISSUE: they MUST be "value time offset"s. Value?
2188 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2189 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2190     controls => $GetHTMLBooleanAttrChecker->('controls'),
2191 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2192 wakaba 1.42 ## TODO: width, height
2193 wakaba 1.1 }),
2194 wakaba 1.42 check_start => sub {
2195     my ($self, $item, $element_state) = @_;
2196     $element_state->{allow_source}
2197     = not $item->{node}->has_attribute_ns (undef, 'src');
2198     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
2199     ## NOTE: It might be set true by |check_element|.
2200     },
2201     check_child_element => sub {
2202     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2203     $child_is_transparent, $element_state) = @_;
2204     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2205     $self->{onerror}->(node => $child_el,
2206     type => 'element not allowed:minus',
2207     level => $self->{must_level});
2208     delete $element_state->{allow_source};
2209     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2210     #
2211     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
2212     if ($element_state->{allow_source}) {
2213     $element_state->{has_source} = 1;
2214     } else {
2215     $self->{onerror}->(node => $child_el,
2216     type => 'element not allowed:prose',
2217     level => $self->{must_level});
2218     }
2219 wakaba 1.1 } else {
2220 wakaba 1.42 delete $element_state->{allow_source};
2221     $HTMLProseContentChecker{check_child_element}->(@_);
2222     }
2223     },
2224     check_child_text => sub {
2225     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2226     if ($has_significant) {
2227     delete $element_state->{allow_source};
2228     }
2229     $HTMLProseContentChecker{check_child_text}->(@_);
2230     },
2231     check_end => sub {
2232     my ($self, $item, $element_state) = @_;
2233     if ($element_state->{has_source} == -1) {
2234     $self->{onerror}->(node => $item->{node},
2235     type => 'element missing:source',
2236     level => $self->{must_level});
2237 wakaba 1.1 }
2238 wakaba 1.42
2239     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
2240 wakaba 1.1 },
2241     };
2242    
2243     $Element->{$HTML_NS}->{audio} = {
2244 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
2245 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
2246     src => $HTMLURIAttrChecker,
2247     ## TODO: start, loopstart, loopend, end
2248     ## ISSUE: they MUST be "value time offset"s. Value?
2249     ## ISSUE: playcount has no conformance creteria
2250     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2251     controls => $GetHTMLBooleanAttrChecker->('controls'),
2252     }),
2253 wakaba 1.1 };
2254    
2255     $Element->{$HTML_NS}->{source} = {
2256 wakaba 1.40 %HTMLEmptyChecker,
2257     check_attrs => sub {
2258     my ($self, $item, $element_state) = @_;
2259 wakaba 1.1 $GetHTMLAttrsChecker->({
2260     src => $HTMLURIAttrChecker,
2261     type => $HTMLIMTAttrChecker,
2262     media => $HTMLMQAttrChecker,
2263 wakaba 1.40 })->($self, $item, $element_state);
2264     unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2265     $self->{onerror}->(node => $item->{node},
2266 wakaba 1.1 type => 'attribute missing:src');
2267     }
2268     },
2269     };
2270    
2271     $Element->{$HTML_NS}->{canvas} = {
2272 wakaba 1.40 %HTMLTransparentChecker,
2273     check_attrs => $GetHTMLAttrsChecker->({
2274 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2275     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2276     }),
2277     };
2278    
2279     $Element->{$HTML_NS}->{map} = {
2280 wakaba 1.40 %HTMLProseContentChecker,
2281     check_attrs => sub {
2282     my ($self, $item, $element_state) = @_;
2283 wakaba 1.4 my $has_id;
2284     $GetHTMLAttrsChecker->({
2285     id => sub {
2286     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2287     my ($self, $attr) = @_;
2288     my $value = $attr->value;
2289     if (length $value > 0) {
2290     if ($self->{id}->{$value}) {
2291     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2292     push @{$self->{id}->{$value}}, $attr;
2293     } else {
2294     $self->{id}->{$value} = [$attr];
2295     }
2296 wakaba 1.1 } else {
2297 wakaba 1.4 ## NOTE: MUST contain at least one character
2298     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2299 wakaba 1.1 }
2300 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2301     $self->{onerror}->(node => $attr, type => 'space in ID');
2302     }
2303     $self->{map}->{$value} ||= $attr;
2304     $has_id = 1;
2305     },
2306 wakaba 1.40 })->($self, $item, $element_state);
2307     $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
2308 wakaba 1.4 unless $has_id;
2309     },
2310 wakaba 1.1 };
2311    
2312     $Element->{$HTML_NS}->{area} = {
2313 wakaba 1.40 %HTMLEmptyChecker,
2314     check_attrs => sub {
2315     my ($self, $item, $element_state) = @_;
2316 wakaba 1.1 my %attr;
2317     my $coords;
2318 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2319 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2320     $attr_ns = '' unless defined $attr_ns;
2321     my $attr_ln = $attr->manakai_local_name;
2322     my $checker;
2323     if ($attr_ns eq '') {
2324     $checker = {
2325     alt => sub { },
2326     ## NOTE: |alt| value has no conformance creteria.
2327     shape => $GetHTMLEnumeratedAttrChecker->({
2328     circ => -1, circle => 1,
2329     default => 1,
2330     poly => 1, polygon => -1,
2331     rect => 1, rectangle => -1,
2332     }),
2333     coords => sub {
2334     my ($self, $attr) = @_;
2335     my $value = $attr->value;
2336     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2337     $coords = [split /,/, $value];
2338     } else {
2339     $self->{onerror}->(node => $attr,
2340     type => 'coords:syntax error');
2341     }
2342     },
2343     target => $HTMLTargetAttrChecker,
2344     href => $HTMLURIAttrChecker,
2345     ping => $HTMLSpaceURIsAttrChecker,
2346 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
2347 wakaba 1.1 media => $HTMLMQAttrChecker,
2348     hreflang => $HTMLLanguageTagAttrChecker,
2349     type => $HTMLIMTAttrChecker,
2350     }->{$attr_ln};
2351     if ($checker) {
2352     $attr{$attr_ln} = $attr;
2353     } else {
2354     $checker = $HTMLAttrChecker->{$attr_ln};
2355     }
2356     }
2357     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2358     || $AttrChecker->{$attr_ns}->{''};
2359     if ($checker) {
2360     $checker->($self, $attr) if ref $checker;
2361     } else {
2362     $self->{onerror}->(node => $attr, level => 'unsupported',
2363     type => 'attribute');
2364     ## ISSUE: No comformance createria for unknown attributes in the spec
2365     }
2366     }
2367    
2368     if (defined $attr{href}) {
2369 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2370 wakaba 1.1 unless (defined $attr{alt}) {
2371 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2372 wakaba 1.1 type => 'attribute missing:alt');
2373     }
2374     } else {
2375     for (qw/target ping rel media hreflang type alt/) {
2376     if (defined $attr{$_}) {
2377     $self->{onerror}->(node => $attr{$_},
2378     type => 'attribute not allowed');
2379     }
2380     }
2381     }
2382    
2383     my $shape = 'rectangle';
2384     if (defined $attr{shape}) {
2385     $shape = {
2386     circ => 'circle', circle => 'circle',
2387     default => 'default',
2388     poly => 'polygon', polygon => 'polygon',
2389     rect => 'rectangle', rectangle => 'rectangle',
2390     }->{lc $attr{shape}->value} || 'rectangle';
2391     ## TODO: ASCII lowercase?
2392     }
2393    
2394     if ($shape eq 'circle') {
2395     if (defined $attr{coords}) {
2396     if (defined $coords) {
2397     if (@$coords == 3) {
2398     if ($coords->[2] < 0) {
2399     $self->{onerror}->(node => $attr{coords},
2400     type => 'coords:out of range:2');
2401     }
2402     } else {
2403     $self->{onerror}->(node => $attr{coords},
2404     type => 'coords:number:3:'.@$coords);
2405     }
2406     } else {
2407     ## NOTE: A syntax error has been reported.
2408     }
2409     } else {
2410 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2411 wakaba 1.1 type => 'attribute missing:coords');
2412     }
2413     } elsif ($shape eq 'default') {
2414     if (defined $attr{coords}) {
2415     $self->{onerror}->(node => $attr{coords},
2416     type => 'attribute not allowed');
2417     }
2418     } elsif ($shape eq 'polygon') {
2419     if (defined $attr{coords}) {
2420     if (defined $coords) {
2421     if (@$coords >= 6) {
2422     unless (@$coords % 2 == 0) {
2423     $self->{onerror}->(node => $attr{coords},
2424     type => 'coords:number:even:'.@$coords);
2425     }
2426     } else {
2427     $self->{onerror}->(node => $attr{coords},
2428     type => 'coords:number:>=6:'.@$coords);
2429     }
2430     } else {
2431     ## NOTE: A syntax error has been reported.
2432     }
2433     } else {
2434 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2435 wakaba 1.1 type => 'attribute missing:coords');
2436     }
2437     } elsif ($shape eq 'rectangle') {
2438     if (defined $attr{coords}) {
2439     if (defined $coords) {
2440     if (@$coords == 4) {
2441     unless ($coords->[0] < $coords->[2]) {
2442     $self->{onerror}->(node => $attr{coords},
2443     type => 'coords:out of range:0');
2444     }
2445     unless ($coords->[1] < $coords->[3]) {
2446     $self->{onerror}->(node => $attr{coords},
2447     type => 'coords:out of range:1');
2448     }
2449     } else {
2450     $self->{onerror}->(node => $attr{coords},
2451     type => 'coords:number:4:'.@$coords);
2452     }
2453     } else {
2454     ## NOTE: A syntax error has been reported.
2455     }
2456     } else {
2457 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2458 wakaba 1.1 type => 'attribute missing:coords');
2459     }
2460     }
2461     },
2462     };
2463     ## TODO: only in map
2464    
2465     $Element->{$HTML_NS}->{table} = {
2466 wakaba 1.40 %HTMLChecker,
2467     check_start => sub {
2468     my ($self, $item, $element_state) = @_;
2469     $element_state->{phase} = 'before caption';
2470     },
2471     check_child_element => sub {
2472     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2473     $child_is_transparent, $element_state) = @_;
2474     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2475     $self->{onerror}->(node => $child_el,
2476     type => 'element not allowed:minus',
2477     level => $self->{must_level});
2478     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2479     #
2480     } elsif ($element_state->{phase} eq 'in tbodys') {
2481     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2482     #$element_state->{phase} = 'in tbodys';
2483     } elsif (not $element_state->{has_tfoot} and
2484     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2485     $element_state->{phase} = 'after tfoot';
2486     $element_state->{has_tfoot} = 1;
2487     } else {
2488     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2489     }
2490     } elsif ($element_state->{phase} eq 'in trs') {
2491     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2492     #$element_state->{phase} = 'in trs';
2493     } elsif (not $element_state->{has_tfoot} and
2494     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2495     $element_state->{phase} = 'after tfoot';
2496     $element_state->{has_tfoot} = 1;
2497     } else {
2498     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2499     }
2500     } elsif ($element_state->{phase} eq 'after thead') {
2501     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2502     $element_state->{phase} = 'in tbodys';
2503     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2504     $element_state->{phase} = 'in trs';
2505     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2506     $element_state->{phase} = 'in tbodys';
2507     $element_state->{has_tfoot} = 1;
2508     } else {
2509     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2510     }
2511     } elsif ($element_state->{phase} eq 'in colgroup') {
2512     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
2513     $element_state->{phase} = 'in colgroup';
2514     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
2515     $element_state->{phase} = 'after thead';
2516     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2517     $element_state->{phase} = 'in tbodys';
2518     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2519     $element_state->{phase} = 'in trs';
2520     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2521     $element_state->{phase} = 'in tbodys';
2522     $element_state->{has_tfoot} = 1;
2523     } else {
2524     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2525     }
2526     } elsif ($element_state->{phase} eq 'before caption') {
2527     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
2528     $element_state->{phase} = 'in colgroup';
2529     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
2530     $element_state->{phase} = 'in colgroup';
2531     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
2532     $element_state->{phase} = 'after thead';
2533     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2534     $element_state->{phase} = 'in tbodys';
2535     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2536     $element_state->{phase} = 'in trs';
2537     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2538     $element_state->{phase} = 'in tbodys';
2539     $element_state->{has_tfoot} = 1;
2540     } else {
2541     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2542     }
2543     } elsif ($element_state->{phase} eq 'after tfoot') {
2544     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2545     } else {
2546     die "check_child_element: Bad |table| phase: $element_state->{phase}";
2547     }
2548     },
2549     check_child_text => sub {
2550     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2551     if ($has_significant) {
2552     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2553 wakaba 1.1 }
2554 wakaba 1.40 },
2555     check_end => sub {
2556     my ($self, $item, $element_state) = @_;
2557 wakaba 1.1
2558     ## Table model errors
2559     require Whatpm::HTMLTable;
2560 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
2561 wakaba 1.1 my %opt = @_;
2562     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2563     });
2564 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
2565 wakaba 1.1
2566 wakaba 1.40 $HTMLChecker{check_end}->(@_);
2567 wakaba 1.1 },
2568     };
2569    
2570     $Element->{$HTML_NS}->{caption} = {
2571 wakaba 1.40 %HTMLPhrasingContentChecker,
2572 wakaba 1.1 };
2573    
2574     $Element->{$HTML_NS}->{colgroup} = {
2575 wakaba 1.40 %HTMLEmptyChecker,
2576     check_attrs => $GetHTMLAttrsChecker->({
2577 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2578     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2579     ## TODO: "attribute not supported" if |col|.
2580     ## ISSUE: MUST NOT if any |col|?
2581     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2582     }),
2583 wakaba 1.40 check_child_element => sub {
2584     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2585     $child_is_transparent, $element_state) = @_;
2586     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2587     $self->{onerror}->(node => $child_el,
2588     type => 'element not allowed:minus',
2589     level => $self->{must_level});
2590     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2591     #
2592     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
2593     #
2594     } else {
2595     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2596     }
2597     },
2598     check_child_text => sub {
2599     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2600     if ($has_significant) {
2601     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2602 wakaba 1.1 }
2603     },
2604     };
2605    
2606     $Element->{$HTML_NS}->{col} = {
2607 wakaba 1.40 %HTMLEmptyChecker,
2608     check_attrs => $GetHTMLAttrsChecker->({
2609 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2610     }),
2611     };
2612    
2613     $Element->{$HTML_NS}->{tbody} = {
2614 wakaba 1.40 %HTMLChecker,
2615     check_child_element => sub {
2616     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2617     $child_is_transparent, $element_state) = @_;
2618     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2619     $self->{onerror}->(node => $child_el,
2620     type => 'element not allowed:minus',
2621     level => $self->{must_level});
2622     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2623     #
2624     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2625     $element_state->{has_tr} = 1;
2626     } else {
2627     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2628     }
2629     },
2630     check_child_text => sub {
2631     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2632     if ($has_significant) {
2633     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2634 wakaba 1.1 }
2635 wakaba 1.40 },
2636     check_end => sub {
2637     my ($self, $item, $element_state) = @_;
2638     unless ($element_state->{has_tr}) {
2639     $self->{onerror}->(node => $item->{node},
2640     type => 'child element missing:tr');
2641 wakaba 1.1 }
2642 wakaba 1.40
2643     $HTMLChecker{check_end}->(@_);
2644 wakaba 1.1 },
2645     };
2646    
2647     $Element->{$HTML_NS}->{thead} = {
2648 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
2649 wakaba 1.1 };
2650    
2651     $Element->{$HTML_NS}->{tfoot} = {
2652 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
2653 wakaba 1.1 };
2654    
2655     $Element->{$HTML_NS}->{tr} = {
2656 wakaba 1.40 %HTMLChecker,
2657     check_child_element => sub {
2658     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2659     $child_is_transparent, $element_state) = @_;
2660     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2661     $self->{onerror}->(node => $child_el,
2662     type => 'element not allowed:minus',
2663     level => $self->{must_level});
2664     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2665     #
2666     } elsif ($child_nsuri eq $HTML_NS and
2667     ($child_ln eq 'td' or $child_ln eq 'th')) {
2668     $element_state->{has_cell} = 1;
2669     } else {
2670     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2671     }
2672     },
2673     check_child_text => sub {
2674     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2675     if ($has_significant) {
2676     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2677 wakaba 1.1 }
2678 wakaba 1.40 },
2679     check_end => sub {
2680     my ($self, $item, $element_state) = @_;
2681     unless ($element_state->{has_cell}) {
2682     $self->{onerror}->(node => $item->{node},
2683     type => 'child element missing:td|th');
2684 wakaba 1.1 }
2685 wakaba 1.40
2686     $HTMLChecker{check_end}->(@_);
2687 wakaba 1.1 },
2688     };
2689    
2690     $Element->{$HTML_NS}->{td} = {
2691 wakaba 1.40 %HTMLProseContentChecker,
2692     check_attrs => $GetHTMLAttrsChecker->({
2693 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2694     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2695     }),
2696     };
2697    
2698     $Element->{$HTML_NS}->{th} = {
2699 wakaba 1.40 %HTMLPhrasingContentChecker,
2700     check_attrs => $GetHTMLAttrsChecker->({
2701 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2702     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2703     scope => $GetHTMLEnumeratedAttrChecker
2704     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2705     }),
2706     };
2707    
2708     ## TODO: forms
2709 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2710 wakaba 1.1
2711     $Element->{$HTML_NS}->{script} = {
2712 wakaba 1.40 %HTMLChecker,
2713     check_attrs => $GetHTMLAttrsChecker->({
2714 wakaba 1.1 src => $HTMLURIAttrChecker,
2715     defer => $GetHTMLBooleanAttrChecker->('defer'),
2716     async => $GetHTMLBooleanAttrChecker->('async'),
2717     type => $HTMLIMTAttrChecker,
2718 wakaba 1.9 }),
2719 wakaba 1.40 check_start => sub {
2720     my ($self, $item, $element_state) = @_;
2721 wakaba 1.1
2722 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
2723     $element_state->{must_be_empty} = 1;
2724 wakaba 1.1 } else {
2725     ## NOTE: No content model conformance in HTML5 spec.
2726 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2727     my $language = $item->{node}->get_attribute_ns (undef, 'language');
2728 wakaba 1.1 if ((defined $type and $type eq '') or
2729     (defined $language and $language eq '')) {
2730     $type = 'text/javascript';
2731     } elsif (defined $type) {
2732     #
2733     } elsif (defined $language) {
2734     $type = 'text/' . $language;
2735     } else {
2736     $type = 'text/javascript';
2737     }
2738 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
2739     }
2740     },
2741     check_child_element => sub {
2742     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2743     $child_is_transparent, $element_state) = @_;
2744     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2745     $self->{onerror}->(node => $child_el,
2746     type => 'element not allowed:minus',
2747     level => $self->{must_level});
2748     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2749     #
2750     } else {
2751     if ($element_state->{must_be_empty}) {
2752     $self->{onerror}->(node => $child_el,
2753     type => 'element not allowed');
2754     }
2755     }
2756     },
2757     check_child_text => sub {
2758     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2759     if ($has_significant and
2760     $element_state->{must_be_empty}) {
2761     $self->{onerror}->(node => $child_node,
2762     type => 'character not allowed');
2763     }
2764     },
2765     check_end => sub {
2766     my ($self, $item, $element_state) = @_;
2767     unless ($element_state->{must_be_empty}) {
2768     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
2769     type => 'script:'.$element_state->{script_type});
2770     ## TODO: text/javascript support
2771    
2772     $HTMLChecker{check_end}->(@_);
2773 wakaba 1.1 }
2774     },
2775     };
2776 wakaba 1.25 ## ISSUE: Significant check and text child node
2777 wakaba 1.1
2778     ## NOTE: When script is disabled.
2779     $Element->{$HTML_NS}->{noscript} = {
2780 wakaba 1.40 %HTMLTransparentChecker,
2781     check_start => sub {
2782     my ($self, $item, $element_state) = @_;
2783 wakaba 1.3
2784 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
2785     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
2786 wakaba 1.3 }
2787    
2788 wakaba 1.40 unless ($self->{flag}->{in_head}) {
2789     $self->_add_minus_elements ($element_state,
2790     {$HTML_NS => {noscript => 1}});
2791     }
2792 wakaba 1.3 },
2793 wakaba 1.40 check_child_element => sub {
2794     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2795     $child_is_transparent, $element_state) = @_;
2796     if ($self->{flag}->{in_head}) {
2797     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2798     $self->{onerror}->(node => $child_el,
2799     type => 'element not allowed:minus',
2800     level => $self->{must_level});
2801     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2802     #
2803     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
2804     #
2805     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
2806     if ($child_el->has_attribute_ns (undef, 'scoped')) {
2807     $self->{onerror}->(node => $child_el,
2808     type => 'element not allowed:head noscript',
2809     level => $self->{must_level});
2810     }
2811     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
2812     if ($child_el->has_attribute_ns (undef, 'charset')) {
2813     ## NOTE: Non-conforming. An error is raised by
2814     ## |meta|'s checker.
2815     } else {
2816     my $http_equiv_attr
2817     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
2818     if ($http_equiv_attr) {
2819     ## TODO: case
2820     if (lc $http_equiv_attr->value eq 'content-type') {
2821     ## NOTE: Non-conforming. An error is raised by
2822     ## |meta|'s checker.
2823     } else {
2824 wakaba 1.3 #
2825     }
2826     } else {
2827 wakaba 1.40 $self->{onerror}->(node => $child_el,
2828 wakaba 1.34 type => 'element not allowed:head noscript',
2829     level => $self->{must_level});
2830 wakaba 1.3 }
2831     }
2832 wakaba 1.40 } else {
2833     $self->{onerror}->(node => $child_el,
2834     type => 'element not allowed:head noscript',
2835     level => $self->{must_level});
2836     }
2837     } else {
2838     $HTMLTransparentChecker{check_child_element}->(@_);
2839     }
2840     },
2841     check_child_text => sub {
2842     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2843     if ($self->{flag}->{in_head}) {
2844     if ($has_significant) {
2845     $self->{onerror}->(node => $child_node,
2846     type => 'character not allowed');
2847 wakaba 1.3 }
2848     } else {
2849 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
2850     }
2851     },
2852     check_end => sub {
2853     my ($self, $item, $element_state) = @_;
2854     $self->_remove_minus_elements ($element_state);
2855     if ($self->{flag}->{in_head}) {
2856     $HTMLChecker{check_end}->(@_);
2857     } else {
2858     $HTMLPhrasingContentChecker{check_end}->(@_);
2859 wakaba 1.3 }
2860 wakaba 1.1 },
2861     };
2862 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2863 wakaba 1.1
2864     $Element->{$HTML_NS}->{'event-source'} = {
2865 wakaba 1.40 %HTMLEmptyChecker,
2866     check_attrs => $GetHTMLAttrsChecker->({
2867 wakaba 1.1 src => $HTMLURIAttrChecker,
2868     }),
2869     };
2870    
2871     $Element->{$HTML_NS}->{details} = {
2872 wakaba 1.40 %HTMLProseContentChecker,
2873     check_attrs => $GetHTMLAttrsChecker->({
2874 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
2875     }),
2876     checker => sub {
2877     my ($self, $todo) = @_;
2878    
2879 wakaba 1.29 ## TODO:
2880 wakaba 1.40 # my ($sib, $ch)
2881     # = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2882     # ->($self, $todo);
2883     # return ($sib, $ch);
2884 wakaba 1.1 },
2885     };
2886    
2887     $Element->{$HTML_NS}->{datagrid} = {
2888 wakaba 1.40 %HTMLProseContentChecker,
2889     check_attrs => $GetHTMLAttrsChecker->({
2890 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2891     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2892     }),
2893 wakaba 1.40 check_start => sub {
2894     my ($self, $item, $element_state) = @_;
2895 wakaba 1.1
2896 wakaba 1.40 $self->_add_minus_elements ($element_state,
2897     {$HTML_NS => {a => 1, datagrid => 1}});
2898     $element_state->{phase} = 'any';
2899     },
2900     ## Prose -(text* table Prose*) | table | select | datalist | Empty
2901     check_child_element => sub {
2902     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2903     $child_is_transparent, $element_state) = @_;
2904     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2905     $self->{onerror}->(node => $child_el,
2906     type => 'element not allowed:minus',
2907     level => $self->{must_level});
2908     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2909     #
2910     } elsif ($element_state->{phase} eq 'prose') {
2911     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
2912     if ($element_state->{has_element} and
2913     $child_nsuri eq $HTML_NS and
2914     $child_ln eq 'table') {
2915     $self->{onerror}->(node => $child_el,
2916     type => 'element not allowed');
2917     } else {
2918 wakaba 1.8 #
2919 wakaba 1.1 }
2920 wakaba 1.40 } else {
2921     $self->{onerror}->(node => $child_el,
2922     type => 'element not allowed');
2923     }
2924     } elsif ($element_state->{phase} eq 'any') {
2925     if ($child_nsuri eq $HTML_NS and
2926     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
2927     $element_state->{phase} = 'none';
2928     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
2929     $element_state->{has_element} = 1;
2930     $element_state->{phase} = 'prose';
2931     } else {
2932     $self->{onerror}->(node => $child_el,
2933     type => 'element not allowed');
2934     }
2935     } elsif ($element_state->{phase} eq 'none') {
2936     $self->{onerror}->(node => $child_el,
2937     type => 'element not allowed');
2938     } else {
2939     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
2940     }
2941     },
2942     check_child_text => sub {
2943     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2944     if ($has_significant) {
2945     if ($element_state->{phase} eq 'prose') {
2946     #
2947     } elsif ($element_state->{phase} eq 'any') {
2948     $element_state->{phase} = 'prose';
2949     } else {
2950     $self->{onerror}->(node => $child_node,
2951     type => 'character not allowed');
2952 wakaba 1.1 }
2953     }
2954 wakaba 1.40 },
2955     check_end => sub {
2956     my ($self, $item, $element_state) = @_;
2957     $self->_remove_minus_elements ($element_state);
2958 wakaba 1.1
2959 wakaba 1.40 if ($element_state->{phase} eq 'none') {
2960     $HTMLChecker{check_end}->(@_);
2961     } else {
2962     $HTMLPhrasingContentChecker{check_end}->(@_);
2963     }
2964     },
2965 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
2966     ## are not disallowed (assuming that form control contents are also
2967     ## prose content).
2968 wakaba 1.1 };
2969    
2970     $Element->{$HTML_NS}->{command} = {
2971 wakaba 1.40 %HTMLEmptyChecker,
2972     check_attrs => $GetHTMLAttrsChecker->({
2973 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
2974     default => $GetHTMLBooleanAttrChecker->('default'),
2975     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2976     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2977     icon => $HTMLURIAttrChecker,
2978     label => sub { }, ## NOTE: No conformance creteria
2979     radiogroup => sub { }, ## NOTE: No conformance creteria
2980     ## NOTE: |title| has special semantics, but no syntactical difference
2981     type => sub {
2982     my ($self, $attr) = @_;
2983     my $value = $attr->value;
2984     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2985     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2986     }
2987     },
2988     }),
2989     };
2990    
2991     $Element->{$HTML_NS}->{menu} = {
2992 wakaba 1.40 %HTMLPhrasingContentChecker,
2993     check_attrs => $GetHTMLAttrsChecker->({
2994 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
2995     id => sub {
2996     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
2997     my ($self, $attr) = @_;
2998     my $value = $attr->value;
2999     if (length $value > 0) {
3000     if ($self->{id}->{$value}) {
3001     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3002     push @{$self->{id}->{$value}}, $attr;
3003     } else {
3004     $self->{id}->{$value} = [$attr];
3005     }
3006     } else {
3007     ## NOTE: MUST contain at least one character
3008     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3009     }
3010     if ($value =~ /[\x09-\x0D\x20]/) {
3011     $self->{onerror}->(node => $attr, type => 'space in ID');
3012     }
3013     $self->{menu}->{$value} ||= $attr;
3014     ## ISSUE: <menu id=""><p contextmenu=""> match?
3015     },
3016     label => sub { }, ## NOTE: No conformance creteria
3017     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3018     }),
3019 wakaba 1.40 check_start => sub {
3020     my ($self, $item, $element_state) = @_;
3021     $element_state->{phase} = 'li or phrasing';
3022     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
3023     $self->{flag}->{in_menu} = 1;
3024     },
3025     check_child_element => sub {
3026     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3027     $child_is_transparent, $element_state) = @_;
3028     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3029     $self->{onerror}->(node => $child_el,
3030     type => 'element not allowed:minus',
3031     level => $self->{must_level});
3032     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3033     #
3034     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
3035     if ($element_state->{phase} eq 'li') {
3036     #
3037     } elsif ($element_state->{phase} eq 'li or phrasing') {
3038     $element_state->{phase} = 'li';
3039     } else {
3040     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3041     }
3042     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3043     if ($element_state->{phase} eq 'phrasing') {
3044     #
3045     } elsif ($element_state->{phase} eq 'li or phrasing') {
3046     $element_state->{phase} = 'phrasing';
3047     } else {
3048     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3049     }
3050     } else {
3051     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3052     }
3053     },
3054     check_child_text => sub {
3055     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3056     if ($has_significant) {
3057     if ($element_state->{phase} eq 'phrasing') {
3058     #
3059     } elsif ($element_state->{phase} eq 'li or phrasing') {
3060     $element_state->{phase} = 'phrasing';
3061     } else {
3062     $self->{onerror}->(node => $child_node,
3063     type => 'character not allowed');
3064 wakaba 1.1 }
3065     }
3066 wakaba 1.40 },
3067     check_end => sub {
3068     my ($self, $item, $element_state) = @_;
3069     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
3070    
3071     if ($element_state->{phase} eq 'li') {
3072     $HTMLChecker{check_end}->(@_);
3073     } else { # 'phrasing' or 'li or phrasing'
3074     $HTMLPhrasingContentChecker{check_end}->(@_);
3075 wakaba 1.1 }
3076     },
3077 wakaba 1.8 };
3078    
3079     $Element->{$HTML_NS}->{datatemplate} = {
3080 wakaba 1.40 %HTMLChecker,
3081     check_child_element => sub {
3082     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3083     $child_is_transparent, $element_state) = @_;
3084     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3085     $self->{onerror}->(node => $child_el,
3086     type => 'element not allowed:minus',
3087     level => $self->{must_level});
3088     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3089     #
3090     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
3091     #
3092     } else {
3093     $self->{onerror}->(node => $child_el,
3094     type => 'element not allowed:datatemplate');
3095     }
3096     },
3097     check_child_text => sub {
3098     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3099     if ($has_significant) {
3100     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3101 wakaba 1.8 }
3102     },
3103     is_xml_root => 1,
3104     };
3105    
3106     $Element->{$HTML_NS}->{rule} = {
3107 wakaba 1.40 %HTMLChecker,
3108     check_attrs => $GetHTMLAttrsChecker->({
3109 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3110 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3111 wakaba 1.8 }),
3112 wakaba 1.40 check_start => sub {
3113     my ($self, $item, $element_state) = @_;
3114     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
3115     },
3116     check_child_element => sub { },
3117     check_child_text => sub { },
3118     check_end => sub {
3119     my ($self, $item, $element_state) = @_;
3120     $self->_remove_plus_elements ($element_state);
3121     $HTMLChecker{check_end}->(@_);
3122 wakaba 1.8 },
3123     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3124     ## is applied to some conforming data, results in a conforming DOM tree.":
3125     ## We don't check against this.
3126     };
3127    
3128     $Element->{$HTML_NS}->{nest} = {
3129 wakaba 1.40 %HTMLEmptyChecker,
3130     check_attrs => $GetHTMLAttrsChecker->({
3131 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3132     mode => sub {
3133     my ($self, $attr) = @_;
3134     my $value = $attr->value;
3135     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3136     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3137     }
3138     },
3139 wakaba 1.8 }),
3140 wakaba 1.1 };
3141    
3142     $Element->{$HTML_NS}->{legend} = {
3143 wakaba 1.40 %HTMLPhrasingContentChecker,
3144 wakaba 1.1 };
3145    
3146     $Element->{$HTML_NS}->{div} = {
3147 wakaba 1.40 %HTMLProseContentChecker,
3148 wakaba 1.1 };
3149    
3150     $Element->{$HTML_NS}->{font} = {
3151 wakaba 1.40 %HTMLTransparentChecker,
3152     check_attrs => $GetHTMLAttrsChecker->({}), ## TODO
3153 wakaba 1.1 };
3154    
3155     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3156    
3157     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24