/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.41 - (hide annotations) (download)
Sat Feb 23 12:48:19 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.40: +75 -168 lines
++ whatpm/Whatpm/ContentChecker/ChangeLog	23 Feb 2008 12:48:09 -0000
	* HTML.pm: |figure| content checker reimplemented.  |object|
	content checker for root element case reimplemented.

2008-02-23  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.29 ## December 2007 HTML5 Classification
8    
9     my $HTMLMetadataContent = {
10     $HTML_NS => {
11     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
12     'event-source' => 1, command => 1, datatemplate => 1,
13     ## NOTE: A |meta| with no |name| element is not allowed as
14     ## a metadata content other than |head| element.
15     meta => 1,
16     },
17     ## NOTE: RDF is mentioned in the HTML5 spec.
18     ## TODO: Other RDF elements?
19     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
20     };
21    
22     my $HTMLProseContent = {
23     $HTML_NS => {
24     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
25     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
26     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
27     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
28     details => 1, ## ISSUE: "Prose element" in spec.
29     datagrid => 1, ## ISSUE: "Prose element" in spec.
30     datatemplate => 1,
31     div => 1, ## ISSUE: No category in spec.
32     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
33     ## Additionally, it must be before any other element or
34     ## non-inter-element-whitespace text node.
35     style => 1,
36    
37 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
38 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
39     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
40     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
41     command => 1, font => 1,
42     a => 1,
43     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
44     ## NOTE: |area| is allowed only as a descendant of |map|.
45     area => 1,
46    
47     ins => 1, del => 1,
48    
49     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
50     menu => 1,
51    
52     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
53     canvas => 1,
54     },
55    
56     ## NOTE: Embedded
57     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
58     q<http://www.w3.org/2000/svg> => {svg => 1},
59     };
60    
61     my $HTMLSectioningContent = {
62     $HTML_NS => {
63     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
64     ## NOTE: |body| is only allowed in |html| element.
65     body => 1,
66     },
67     };
68    
69     my $HTMLHeadingContent = {
70     $HTML_NS => {
71     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
72     },
73     };
74    
75     my $HTMLPhrasingContent = {
76     ## NOTE: All phrasing content is also prose content.
77     $HTML_NS => {
78 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
79 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
80     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
81     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
82     command => 1, font => 1,
83     a => 1,
84     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
85     ## NOTE: |area| is allowed only as a descendant of |map|.
86     area => 1,
87    
88     ## NOTE: Transparent.
89     ins => 1, del => 1,
90    
91     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
92     menu => 1,
93    
94     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
95     canvas => 1,
96     },
97    
98     ## NOTE: Embedded
99     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
100     q<http://www.w3.org/2000/svg> => {svg => 1},
101    
102     ## NOTE: And non-inter-element-whitespace text nodes.
103     };
104    
105 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
106 wakaba 1.29
107     my $HTMLInteractiveContent = {
108     $HTML_NS => {
109     a => 1,
110 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
111 wakaba 1.29 },
112     };
113    
114 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
115     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
116    
117     ## -- Common attribute syntacx checkers
118    
119 wakaba 1.1 our $AttrChecker;
120    
121     my $GetHTMLEnumeratedAttrChecker = sub {
122     my $states = shift; # {value => conforming ? 1 : -1}
123     return sub {
124     my ($self, $attr) = @_;
125     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
126     if ($states->{$value} > 0) {
127     #
128     } elsif ($states->{$value}) {
129     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
130     } else {
131     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
132     }
133     };
134     }; # $GetHTMLEnumeratedAttrChecker
135    
136     my $GetHTMLBooleanAttrChecker = sub {
137     my $local_name = shift;
138     return sub {
139     my ($self, $attr) = @_;
140     my $value = $attr->value;
141     unless ($value eq $local_name or $value eq '') {
142     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
143     }
144     };
145     }; # $GetHTMLBooleanAttrChecker
146    
147 wakaba 1.8 ## Unordered set of space-separated tokens
148 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
149 wakaba 1.8 my ($self, $attr) = @_;
150     my %word;
151     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
152     unless ($word{$word}) {
153     $word{$word} = 1;
154     } else {
155     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
156     }
157     }
158 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
159 wakaba 1.8
160 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
161     ## whose allowed values are defined by the section on link types)
162     my $HTMLLinkTypesAttrChecker = sub {
163 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
164 wakaba 1.1 my %word;
165     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
166     unless ($word{$word}) {
167     $word{$word} = 1;
168 wakaba 1.18 } elsif ($word eq 'up') {
169     #
170 wakaba 1.1 } else {
171     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
172     }
173     }
174     ## NOTE: Case sensitive match (since HTML5 spec does not say link
175     ## types are case-insensitive and it says "The value should not
176     ## be confusingly similar to any other defined value (e.g.
177     ## differing only in case).").
178     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
179     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
180     ## values to be used conformingly.
181     require Whatpm::_LinkTypeList;
182     our $LinkType;
183     for my $word (keys %word) {
184     my $def = $LinkType->{$word};
185     if (defined $def) {
186     if ($def->{status} eq 'accepted') {
187     if (defined $def->{effect}->[$a_or_area]) {
188     #
189     } else {
190     $self->{onerror}->(node => $attr,
191     type => 'link type:bad context:'.$word);
192     }
193     } elsif ($def->{status} eq 'proposal') {
194     $self->{onerror}->(node => $attr, level => 's',
195     type => 'link type:proposed:'.$word);
196 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
197     #
198     } else {
199     $self->{onerror}->(node => $attr,
200     type => 'link type:bad context:'.$word);
201     }
202 wakaba 1.1 } else { # rejected or synonym
203     $self->{onerror}->(node => $attr,
204     type => 'link type:non-conforming:'.$word);
205     }
206 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
207     if ($word eq 'alternate') {
208     #
209     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
210     $todo->{has_hyperlink_link_type} = 1;
211     }
212     }
213 wakaba 1.1 if ($def->{unique}) {
214     unless ($self->{has_link_type}->{$word}) {
215     $self->{has_link_type}->{$word} = 1;
216     } else {
217     $self->{onerror}->(node => $attr,
218     type => 'link type:duplicate:'.$word);
219     }
220     }
221     } else {
222     $self->{onerror}->(node => $attr, level => 'unsupported',
223     type => 'link type:'.$word);
224     }
225     }
226 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
227     if $word{alternate} and not $word{stylesheet};
228 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
229     ## says that using both X-Pingback: header field and HTML
230     ## <link rel=pingback> is deprecated and if both appears they
231     ## SHOULD contain exactly the same value.
232     ## ISSUE: Pingback 1.0 specification defines the exact representation
233     ## of its link element, which cannot be tested by the current arch.
234     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
235     ## include any string that matches to the pattern for the rel=pingback link,
236     ## which again inpossible to test.
237     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
238 wakaba 1.12
239     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
240 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
241     ## then they SHOULD be described in different paragraphs.".
242 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
243 wakaba 1.20
244     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
245 wakaba 1.1
246     ## URI (or IRI)
247     my $HTMLURIAttrChecker = sub {
248     my ($self, $attr) = @_;
249     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
250     my $value = $attr->value;
251     Whatpm::URIChecker->check_iri_reference ($value, sub {
252     my %opt = @_;
253     $self->{onerror}->(node => $attr, level => $opt{level},
254     type => 'URI::'.$opt{type}.
255     (defined $opt{position} ? ':'.$opt{position} : ''));
256     });
257 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
258 wakaba 1.1 }; # $HTMLURIAttrChecker
259    
260     ## A space separated list of one or more URIs (or IRIs)
261     my $HTMLSpaceURIsAttrChecker = sub {
262     my ($self, $attr) = @_;
263     my $i = 0;
264     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
265     Whatpm::URIChecker->check_iri_reference ($value, sub {
266     my %opt = @_;
267     $self->{onerror}->(node => $attr, level => $opt{level},
268 wakaba 1.2 type => 'URIs:'.':'.
269     $opt{type}.':'.$i.
270 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
271     });
272     $i++;
273     }
274     ## ISSUE: Relative references?
275     ## ISSUE: Leading or trailing white spaces are conformant?
276     ## ISSUE: A sequence of white space characters are conformant?
277     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
278     ## NOTE: Duplication seems not an error.
279 wakaba 1.4 $self->{has_uri_attr} = 1;
280 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
281    
282     my $HTMLDatetimeAttrChecker = sub {
283     my ($self, $attr) = @_;
284     my $value = $attr->value;
285     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
286     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
287     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
288     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
289     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
290     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
291     if $d < 1 or
292     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
293     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
294     if $M == 2 and $d == 29 and
295     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
296     } else {
297     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
298     }
299     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
300     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
301     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
302     if defined $s and $s > 59;
303     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
304     if $zh > 23;
305     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
306     if $zm > 59;
307     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
308     } else {
309     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
310     }
311     }; # $HTMLDatetimeAttrChecker
312    
313     my $HTMLIntegerAttrChecker = sub {
314     my ($self, $attr) = @_;
315     my $value = $attr->value;
316     unless ($value =~ /\A-?[0-9]+\z/) {
317     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
318     }
319     }; # $HTMLIntegerAttrChecker
320    
321     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
322     my $range_check = shift;
323     return sub {
324     my ($self, $attr) = @_;
325     my $value = $attr->value;
326     if ($value =~ /\A[0-9]+\z/) {
327     unless ($range_check->($value + 0)) {
328     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
329     }
330     } else {
331     $self->{onerror}->(node => $attr,
332     type => 'nninteger:syntax error');
333     }
334     };
335     }; # $GetHTMLNonNegativeIntegerAttrChecker
336    
337     my $GetHTMLFloatingPointNumberAttrChecker = sub {
338     my $range_check = shift;
339     return sub {
340     my ($self, $attr) = @_;
341     my $value = $attr->value;
342     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
343     unless ($range_check->($value + 0)) {
344     $self->{onerror}->(node => $attr, type => 'float:out of range');
345     }
346     } else {
347     $self->{onerror}->(node => $attr,
348     type => 'float:syntax error');
349     }
350     };
351     }; # $GetHTMLFloatingPointNumberAttrChecker
352    
353     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
354     ## ISSUE: RFC 2046 does not define syntax of media types.
355     ## ISSUE: The definition of "a valid MIME type" is unknown.
356     ## Syntactical correctness?
357     my $HTMLIMTAttrChecker = sub {
358     my ($self, $attr) = @_;
359     my $value = $attr->value;
360     ## ISSUE: RFC 2045 Content-Type header field allows insertion
361     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
362     ## ISSUE: RFC 2231 extension? Maybe no.
363     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
364     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
365     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
366     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
367     my @type = ($1, $2);
368     my $param = $3;
369     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
370     if (defined $2) {
371     push @type, $1 => $2;
372     } else {
373     my $n = $1;
374     my $v = $2;
375     $v =~ s/\\(.)/$1/gs;
376     push @type, $n => $v;
377     }
378     }
379     require Whatpm::IMTChecker;
380     Whatpm::IMTChecker->check_imt (sub {
381     my %opt = @_;
382     $self->{onerror}->(node => $attr, level => $opt{level},
383     type => 'IMT:'.$opt{type});
384     }, @type);
385     } else {
386     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
387     }
388     }; # $HTMLIMTAttrChecker
389    
390     my $HTMLLanguageTagAttrChecker = sub {
391 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
392    
393 wakaba 1.1 my ($self, $attr) = @_;
394 wakaba 1.6 my $value = $attr->value;
395     require Whatpm::LangTag;
396     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
397     my %opt = @_;
398     my $type = 'LangTag:'.$opt{type};
399     $type .= ':' . $opt{subtag} if defined $opt{subtag};
400     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
401     level => $opt{level});
402     });
403 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
404 wakaba 1.6
405     ## TODO: testdata
406 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
407    
408     ## "A valid media query [MQ]"
409     my $HTMLMQAttrChecker = sub {
410     my ($self, $attr) = @_;
411     $self->{onerror}->(node => $attr, level => 'unsupported',
412     type => 'media query');
413     ## ISSUE: What is "a valid media query"?
414     }; # $HTMLMQAttrChecker
415    
416     my $HTMLEventHandlerAttrChecker = sub {
417     my ($self, $attr) = @_;
418     $self->{onerror}->(node => $attr, level => 'unsupported',
419     type => 'event handler');
420     ## TODO: MUST contain valid ECMAScript code matching the
421     ## ECMAScript |FunctionBody| production. [ECMA262]
422     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
423     ## ISSUE: Automatic semicolon insertion does not apply?
424     ## ISSUE: Other script languages?
425     }; # $HTMLEventHandlerAttrChecker
426    
427     my $HTMLUsemapAttrChecker = sub {
428     my ($self, $attr) = @_;
429     ## MUST be a valid hashed ID reference to a |map| element
430     my $value = $attr->value;
431     if ($value =~ s/^#//) {
432     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
433     push @{$self->{usemap}}, [$value => $attr];
434     } else {
435     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
436     }
437     ## NOTE: Space characters in hashed ID references are conforming.
438     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
439     }; # $HTMLUsemapAttrChecker
440    
441     my $HTMLTargetAttrChecker = sub {
442     my ($self, $attr) = @_;
443     my $value = $attr->value;
444     if ($value =~ /^_/) {
445     $value = lc $value; ## ISSUE: ASCII case-insentitive?
446     unless ({
447     _self => 1, _parent => 1, _top => 1,
448     }->{$value}) {
449     $self->{onerror}->(node => $attr,
450     type => 'reserved browsing context name');
451     }
452     } else {
453 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
454 wakaba 1.1 }
455     }; # $HTMLTargetAttrChecker
456    
457 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
458     my ($self, $attr) = @_;
459    
460     ## ISSUE: Namespace resolution?
461    
462     my $value = $attr->value;
463    
464     require Whatpm::CSS::SelectorsParser;
465     my $p = Whatpm::CSS::SelectorsParser->new;
466     $p->{pseudo_class}->{$_} = 1 for qw/
467     active checked disabled empty enabled first-child first-of-type
468     focus hover indeterminate last-child last-of-type link only-child
469     only-of-type root target visited
470     lang nth-child nth-last-child nth-of-type nth-last-of-type not
471     -manakai-contains -manakai-current
472     /;
473    
474     $p->{pseudo_element}->{$_} = 1 for qw/
475     after before first-letter first-line
476     /;
477    
478     $p->{must_level} = $self->{must_level};
479     $p->{onerror} = sub {
480     my %opt = @_;
481     $opt{type} = 'selectors:'.$opt{type};
482     $self->{onerror}->(%opt, node => $attr);
483     };
484     $p->parse_string ($value);
485     }; # $HTMLSelectorsAttrChecker
486    
487 wakaba 1.1 my $HTMLAttrChecker = {
488     id => sub {
489     ## NOTE: |map| has its own variant of |id=""| checker
490     my ($self, $attr) = @_;
491     my $value = $attr->value;
492     if (length $value > 0) {
493     if ($self->{id}->{$value}) {
494     $self->{onerror}->(node => $attr, type => 'duplicate ID');
495     push @{$self->{id}->{$value}}, $attr;
496     } else {
497     $self->{id}->{$value} = [$attr];
498     }
499     if ($value =~ /[\x09-\x0D\x20]/) {
500     $self->{onerror}->(node => $attr, type => 'space in ID');
501     }
502     } else {
503     ## NOTE: MUST contain at least one character
504     $self->{onerror}->(node => $attr, type => 'empty attribute value');
505     }
506     },
507     title => sub {}, ## NOTE: No conformance creteria
508     lang => sub {
509     my ($self, $attr) = @_;
510 wakaba 1.6 my $value = $attr->value;
511     if ($value eq '') {
512     #
513     } else {
514     require Whatpm::LangTag;
515     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
516     my %opt = @_;
517     my $type = 'LangTag:'.$opt{type};
518     $type .= ':' . $opt{subtag} if defined $opt{subtag};
519     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
520     level => $opt{level});
521     });
522     }
523 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
524     unless ($attr->owner_document->manakai_is_html) {
525     $self->{onerror}->(node => $attr, type => 'in XML:lang');
526     }
527 wakaba 1.6
528     ## TODO: test data
529 wakaba 1.1 },
530     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
531     class => sub {
532     my ($self, $attr) = @_;
533     my %word;
534     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
535     unless ($word{$word}) {
536     $word{$word} = 1;
537     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
538     } else {
539     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
540     }
541     }
542     },
543     contextmenu => sub {
544     my ($self, $attr) = @_;
545     my $value = $attr->value;
546     push @{$self->{contextmenu}}, [$value => $attr];
547     ## ISSUE: "The value must be the ID of a menu element in the DOM."
548     ## What is "in the DOM"? A menu Element node that is not part
549     ## of the Document tree is in the DOM? A menu Element node that
550     ## belong to another Document tree is in the DOM?
551     },
552     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
553 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
554     ## TODO: ref, template, registrationmark
555 wakaba 1.1 };
556    
557     for (qw/
558     onabort onbeforeunload onblur onchange onclick oncontextmenu
559     ondblclick ondrag ondragend ondragenter ondragleave ondragover
560     ondragstart ondrop onerror onfocus onkeydown onkeypress
561     onkeyup onload onmessage onmousedown onmousemove onmouseout
562     onmouseover onmouseup onmousewheel onresize onscroll onselect
563     onsubmit onunload
564     /) {
565     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
566     }
567    
568     my $GetHTMLAttrsChecker = sub {
569     my $element_specific_checker = shift;
570     return sub {
571 wakaba 1.40 my ($self, $item, $element_state) = @_;
572     for my $attr (@{$item->{node}->attributes}) {
573 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
574     $attr_ns = '' unless defined $attr_ns;
575     my $attr_ln = $attr->manakai_local_name;
576     my $checker;
577     if ($attr_ns eq '') {
578     $checker = $element_specific_checker->{$attr_ln}
579 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
580 wakaba 1.1 }
581     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
582 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
583 wakaba 1.1 if ($checker) {
584 wakaba 1.40 $checker->($self, $attr, $item);
585 wakaba 1.1 } else {
586     $self->{onerror}->(node => $attr, level => 'unsupported',
587     type => 'attribute');
588     ## ISSUE: No comformance createria for unknown attributes in the spec
589     }
590     }
591     };
592     }; # $GetHTMLAttrsChecker
593    
594 wakaba 1.40 my %HTMLChecker = (
595     %Whatpm::ContentChecker::AnyChecker,
596     check_attrs => $GetHTMLAttrsChecker->({}),
597     );
598    
599     my %HTMLEmptyChecker = (
600     %HTMLChecker,
601     check_child_element => sub {
602     my ($self, $item, $child_el, $child_nsuri, $child_ln,
603     $child_is_transparent, $element_state) = @_;
604     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
605     $self->{onerror}->(node => $child_el,
606     type => 'element not allowed:minus',
607     level => $self->{must_level});
608     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
609     #
610     } else {
611     $self->{onerror}->(node => $child_el,
612     type => 'element not allowed:empty',
613     level => $self->{must_level});
614     }
615     },
616     check_child_text => sub {
617     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
618     if ($has_significant) {
619     $self->{onerror}->(node => $child_node,
620     type => 'character not allowed:empty',
621     level => $self->{must_level});
622     }
623     },
624     );
625    
626     my %HTMLTextChecker = (
627     %HTMLChecker,
628     check_child_element => sub {
629     my ($self, $item, $child_el, $child_nsuri, $child_ln,
630     $child_is_transparent, $element_state) = @_;
631     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
632     $self->{onerror}->(node => $child_el,
633     type => 'element not allowed:minus',
634     level => $self->{must_level});
635     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
636     #
637     } else {
638     $self->{onerror}->(node => $child_el, type => 'element not allowed');
639     }
640     },
641     );
642    
643     my %HTMLProseContentChecker = (
644     %HTMLChecker,
645     check_child_element => sub {
646     my ($self, $item, $child_el, $child_nsuri, $child_ln,
647     $child_is_transparent, $element_state) = @_;
648     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
649     $self->{onerror}->(node => $child_el,
650     type => 'element not allowed:minus',
651     level => $self->{must_level});
652     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
653     #
654     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
655     if ($element_state->{has_non_style} or
656     not $child_el->has_attribute_ns (undef, 'scoped')) {
657     $self->{onerror}->(node => $child_el,
658     type => 'element not allowed:prose style',
659     level => $self->{must_level});
660     }
661     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
662     $element_state->{has_non_style} = 1;
663     } else {
664     $element_state->{has_non_style} = 1;
665     $self->{onerror}->(node => $child_el,
666     type => 'element not allowed:prose',
667     level => $self->{must_level})
668     }
669     },
670     check_child_text => sub {
671     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
672     if ($has_significant) {
673     $element_state->{has_non_style} = 1;
674     }
675     },
676     check_end => sub {
677     my ($self, $item, $element_state) = @_;
678     if ($element_state->{has_significant}) {
679     $item->{parent_state}->{has_significant} = 1;
680     } elsif ($item->{transparent}) {
681     #
682     } else {
683     $self->{onerror}->(node => $item->{node},
684     level => $self->{should_level},
685     type => 'no significant content');
686     }
687     },
688     );
689    
690     my %HTMLPhrasingContentChecker = (
691     %HTMLChecker,
692     check_child_element => sub {
693     my ($self, $item, $child_el, $child_nsuri, $child_ln,
694     $child_is_transparent, $element_state) = @_;
695     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
696     $self->{onerror}->(node => $child_el,
697     type => 'element not allowed:minus',
698     level => $self->{must_level});
699     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
700     #
701     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
702     #
703     } else {
704     $self->{onerror}->(node => $child_el,
705     type => 'element not allowed:phrasing',
706     level => $self->{must_level});
707     }
708     },
709     check_end => $HTMLProseContentChecker{check_end},
710     ## NOTE: The definition for |li| assumes that the only differences
711     ## between prose and phrasing content checkers are |check_child_element|
712     ## and |check_child_text|.
713     );
714    
715     my %HTMLTransparentChecker = %HTMLProseContentChecker;
716     ## ISSUE: Significant content rule should be applied to transparent element
717     ## with parent? Currently, applied to |video| but not to others.
718    
719 wakaba 1.1 our $Element;
720     our $ElementDefault;
721    
722     $Element->{$HTML_NS}->{''} = {
723 wakaba 1.40 %HTMLChecker,
724     check_start => $ElementDefault->{check_start},
725 wakaba 1.1 };
726    
727     $Element->{$HTML_NS}->{html} = {
728     is_root => 1,
729 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
730 wakaba 1.16 manifest => $HTMLURIAttrChecker,
731 wakaba 1.1 xmlns => sub {
732     my ($self, $attr) = @_;
733     my $value = $attr->value;
734     unless ($value eq $HTML_NS) {
735     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
736     }
737     unless ($attr->owner_document->manakai_is_html) {
738     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
739     ## TODO: Test
740     }
741     },
742     }),
743 wakaba 1.40 check_start => sub {
744     my ($self, $item, $element_state) = @_;
745     $element_state->{phase} = 'before head';
746     },
747     check_child_element => sub {
748     my ($self, $item, $child_el, $child_nsuri, $child_ln,
749     $child_is_transparent, $element_state) = @_;
750     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
751     $self->{onerror}->(node => $child_el,
752     type => 'element not allowed:minus',
753     level => $self->{must_level});
754     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
755     #
756     } elsif ($element_state->{phase} eq 'before head') {
757     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
758     $element_state->{phase} = 'after head';
759     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
760     $self->{onerror}->(node => $child_el,
761     type => 'ps element missing:head');
762     $element_state->{phase} = 'after body';
763     } else {
764     $self->{onerror}->(node => $child_el,
765     type => 'element not allowed');
766     }
767     } elsif ($element_state->{phase} eq 'after head') {
768     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
769     $element_state->{phase} = 'after body';
770     } else {
771     $self->{onerror}->(node => $child_el,
772     type => 'element not allowed');
773     }
774     } elsif ($element_state->{phase} eq 'after body') {
775     $self->{onerror}->(node => $child_el,
776     type => 'element not allowed');
777     } else {
778     die "check_child_element: Bad |html| phase: $element_state->{phase}";
779     }
780     },
781     check_child_text => sub {
782     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
783     if ($has_significant) {
784     $self->{onerror}->(node => $child_node,
785     type => 'character not allowed');
786     }
787     },
788     check_end => sub {
789     my ($self, $item, $element_state) = @_;
790     if ($element_state->{phase} eq 'after body') {
791     #
792     } elsif ($element_state->{phase} eq 'before head') {
793     $self->{onerror}->(node => $item->{node},
794     type => 'child element missing:head');
795     $self->{onerror}->(node => $item->{node},
796     type => 'child element missing:body');
797     } elsif ($element_state->{phase} eq 'after head') {
798     $self->{onerror}->(node => $item->{node},
799     type => 'child element missing:body');
800     } else {
801     die "check_end: Bad |html| phase: $element_state->{phase}";
802     }
803 wakaba 1.1
804 wakaba 1.40 $HTMLChecker{check_end}->(@_);
805     },
806     };
807 wakaba 1.25
808 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
809     check_attrs => $GetHTMLAttrsChecker->({}),
810     check_child_element => sub {
811     my ($self, $item, $child_el, $child_nsuri, $child_ln,
812     $child_is_transparent, $element_state) = @_;
813     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
814     $self->{onerror}->(node => $child_el,
815     type => 'element not allowed:minus',
816     level => $self->{must_level});
817     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
818     #
819     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
820     unless ($element_state->{has_title}) {
821     $element_state->{has_title} = 1;
822     } else {
823     $self->{onerror}->(node => $child_el,
824     type => 'element not allowed:head title',
825     level => $self->{must_level});
826     }
827     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
828     if ($child_el->has_attribute_ns (undef, 'scoped')) {
829     $self->{onerror}->(node => $child_el,
830     type => 'element not allowed:head style',
831     level => $self->{must_level});
832 wakaba 1.1 }
833 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
834     #
835    
836     ## NOTE: |meta| is a metadata content. However, strictly speaking,
837     ## a |meta| element with none of |charset|, |name|,
838     ## or |http-equiv| attribute is not allowed. It is non-conforming
839     ## anyway.
840     } else {
841     $self->{onerror}->(node => $child_el,
842     type => 'element not allowed:metadata',
843     level => $self->{must_level});
844     }
845     $element_state->{in_head_original} = $self->{flag}->{in_head};
846     $self->{flag}->{in_head} = 1;
847     },
848     check_child_text => sub {
849     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
850     if ($has_significant) {
851     $self->{onerror}->(node => $child_node, type => 'character not allowed');
852 wakaba 1.1 }
853 wakaba 1.40 },
854     check_end => sub {
855     my ($self, $item, $element_state) = @_;
856     unless ($element_state->{has_title}) {
857     $self->{onerror}->(node => $item->{node},
858     type => 'child element missing:title');
859 wakaba 1.1 }
860 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
861 wakaba 1.1
862 wakaba 1.40 $HTMLChecker{check_end}->(@_);
863 wakaba 1.1 },
864     };
865    
866 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
867     %HTMLTextChecker,
868     };
869 wakaba 1.1
870 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
871     %HTMLEmptyChecker,
872     check_attrs => sub {
873     my ($self, $item, $element_state) = @_;
874 wakaba 1.1
875 wakaba 1.40 if ($self->{has_base}) {
876     $self->{onerror}->(node => $item->{node},
877     type => 'element not allowed:base');
878     } else {
879     $self->{has_base} = 1;
880 wakaba 1.29 }
881    
882 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
883     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
884 wakaba 1.14
885     if ($self->{has_uri_attr} and $has_href) {
886 wakaba 1.4 ## ISSUE: Are these examples conforming?
887     ## <head profile="a b c"><base href> (except for |profile|'s
888     ## non-conformance)
889     ## <title xml:base="relative"/><base href/> (maybe it should be)
890     ## <unknown xmlns="relative"/><base href/> (assuming that
891     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
892     ## <style>@import 'relative';</style><base href>
893     ## <script>location.href = 'relative';</script><base href>
894 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
895     ## an exception.
896 wakaba 1.40 $self->{onerror}->(node => $item->{node},
897 wakaba 1.4 type => 'basehref after URI attribute');
898     }
899 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
900 wakaba 1.4 ## ISSUE: Are these examples conforming?
901     ## <head><title xlink:href=""/><base target="name"/></head>
902     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
903     ## (assuming that |xbl:xbl| is allowed before |base|)
904     ## NOTE: These are non-conformant anyway because of |head|'s content model:
905     ## <link href=""/><base target="name"/>
906     ## <link rel=unknown href=""><base target=name>
907 wakaba 1.40 $self->{onerror}->(node => $item->{node},
908 wakaba 1.4 type => 'basetarget after hyperlink');
909     }
910    
911 wakaba 1.14 if (not $has_href and not $has_target) {
912 wakaba 1.40 $self->{onerror}->(node => $item->{node},
913 wakaba 1.14 type => 'attribute missing:href|target');
914     }
915    
916 wakaba 1.4 return $GetHTMLAttrsChecker->({
917     href => $HTMLURIAttrChecker,
918     target => $HTMLTargetAttrChecker,
919 wakaba 1.40 })->($self, $item, $element_state);
920 wakaba 1.4 },
921 wakaba 1.1 };
922    
923     $Element->{$HTML_NS}->{link} = {
924 wakaba 1.40 %HTMLEmptyChecker,
925     check_attrs => sub {
926     my ($self, $item, $element_state) = @_;
927 wakaba 1.1 $GetHTMLAttrsChecker->({
928     href => $HTMLURIAttrChecker,
929 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
930 wakaba 1.1 media => $HTMLMQAttrChecker,
931     hreflang => $HTMLLanguageTagAttrChecker,
932     type => $HTMLIMTAttrChecker,
933     ## NOTE: Though |title| has special semantics,
934     ## syntactically same as the |title| as global attribute.
935 wakaba 1.40 })->($self, $item, $element_state);
936     if ($item->{node}->has_attribute_ns (undef, 'href')) {
937     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
938 wakaba 1.4 } else {
939 wakaba 1.40 $self->{onerror}->(node => $item->{node},
940 wakaba 1.1 type => 'attribute missing:href');
941     }
942 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
943     $self->{onerror}->(node => $item->{node},
944 wakaba 1.1 type => 'attribute missing:rel');
945     }
946     },
947     };
948    
949     $Element->{$HTML_NS}->{meta} = {
950 wakaba 1.40 %HTMLEmptyChecker,
951     check_attrs => sub {
952     my ($self, $item, $element_state) = @_;
953 wakaba 1.1 my $name_attr;
954     my $http_equiv_attr;
955     my $charset_attr;
956     my $content_attr;
957 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
958 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
959     $attr_ns = '' unless defined $attr_ns;
960     my $attr_ln = $attr->manakai_local_name;
961     my $checker;
962     if ($attr_ns eq '') {
963     if ($attr_ln eq 'content') {
964     $content_attr = $attr;
965     $checker = 1;
966     } elsif ($attr_ln eq 'name') {
967     $name_attr = $attr;
968     $checker = 1;
969     } elsif ($attr_ln eq 'http-equiv') {
970     $http_equiv_attr = $attr;
971     $checker = 1;
972     } elsif ($attr_ln eq 'charset') {
973     $charset_attr = $attr;
974     $checker = 1;
975     } else {
976     $checker = $HTMLAttrChecker->{$attr_ln}
977     || $AttrChecker->{$attr_ns}->{$attr_ln}
978     || $AttrChecker->{$attr_ns}->{''};
979     }
980     } else {
981     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
982     || $AttrChecker->{$attr_ns}->{''};
983     }
984     if ($checker) {
985     $checker->($self, $attr) if ref $checker;
986     } else {
987     $self->{onerror}->(node => $attr, level => 'unsupported',
988     type => 'attribute');
989     ## ISSUE: No comformance createria for unknown attributes in the spec
990     }
991     }
992    
993     if (defined $name_attr) {
994     if (defined $http_equiv_attr) {
995     $self->{onerror}->(node => $http_equiv_attr,
996     type => 'attribute not allowed');
997     } elsif (defined $charset_attr) {
998     $self->{onerror}->(node => $charset_attr,
999     type => 'attribute not allowed');
1000     }
1001     my $metadata_name = $name_attr->value;
1002     my $metadata_value;
1003     if (defined $content_attr) {
1004     $metadata_value = $content_attr->value;
1005     } else {
1006 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1007 wakaba 1.1 type => 'attribute missing:content');
1008     $metadata_value = '';
1009     }
1010     } elsif (defined $http_equiv_attr) {
1011     if (defined $charset_attr) {
1012     $self->{onerror}->(node => $charset_attr,
1013     type => 'attribute not allowed');
1014     }
1015     unless (defined $content_attr) {
1016 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1017 wakaba 1.1 type => 'attribute missing:content');
1018     }
1019     } elsif (defined $charset_attr) {
1020     if (defined $content_attr) {
1021     $self->{onerror}->(node => $content_attr,
1022     type => 'attribute not allowed');
1023     }
1024     } else {
1025     if (defined $content_attr) {
1026     $self->{onerror}->(node => $content_attr,
1027     type => 'attribute not allowed');
1028 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1029 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1030     } else {
1031 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1032 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1033     }
1034     }
1035    
1036 wakaba 1.32 my $check_charset_decl = sub () {
1037 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1038 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1039     for my $el (@{$parent->child_nodes}) {
1040     next unless $el->node_type == 1; # ELEMENT_NODE
1041 wakaba 1.40 unless ($el eq $item->{node}) {
1042 wakaba 1.29 ## NOTE: Not the first child element.
1043 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1044 wakaba 1.32 type => 'element not allowed:meta charset',
1045     level => $self->{must_level});
1046 wakaba 1.29 }
1047     last;
1048     ## NOTE: Entity references are not supported.
1049     }
1050     } else {
1051 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1052 wakaba 1.32 type => 'element not allowed:meta charset',
1053     level => $self->{must_level});
1054 wakaba 1.29 }
1055    
1056 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1057     $self->{onerror}->(node => $item->{node},
1058 wakaba 1.32 type => 'in XML:charset',
1059     level => $self->{must_level});
1060 wakaba 1.1 }
1061 wakaba 1.32 }; # $check_charset_decl
1062 wakaba 1.21
1063 wakaba 1.32 my $check_charset = sub ($$) {
1064     my ($attr, $charset_value) = @_;
1065 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1066     ## is not explicitly spelled in the HTML5 spec, the Character Set
1067     ## registry of IANA, which is referenced from HTML5 spec, says that
1068     ## charset name is case-insensitive.
1069     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1070    
1071     require Message::Charset::Info;
1072     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1073 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1074 wakaba 1.21 if (defined $ic) {
1075     ## TODO: Test for this case
1076     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1077     if ($charset ne $ic_charset) {
1078 wakaba 1.32 $self->{onerror}->(node => $attr,
1079 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1080 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1081     level => $self->{must_level});
1082 wakaba 1.21 }
1083     } else {
1084     ## NOTE: MUST, but not checkable, since the document is not originally
1085     ## in serialized form (or the parser does not preserve the input
1086     ## encoding information).
1087 wakaba 1.32 $self->{onerror}->(node => $attr,
1088     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1089 wakaba 1.21 level => 'unsupported');
1090     }
1091    
1092     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1093     ## Syntactically valid and registered? What about x-charset names?
1094     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1095     ($charset_value)) {
1096 wakaba 1.32 $self->{onerror}->(node => $attr,
1097     type => 'charset:syntax error:'.$charset_value, ## TODO
1098     level => $self->{must_level});
1099 wakaba 1.21 }
1100    
1101     if ($charset) {
1102     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1103     ## with no "preferred MIME name" label)?
1104     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1105     if (($charset_status &
1106     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1107     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1108 wakaba 1.32 $self->{onerror}->(node => $attr,
1109 wakaba 1.21 type => 'charset:not preferred:'.
1110 wakaba 1.32 $charset_value, ## TODO
1111     level => $self->{must_level});
1112 wakaba 1.21 }
1113     if (($charset_status &
1114     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1115     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1116     if ($charset_value =~ /^x-/) {
1117 wakaba 1.32 $self->{onerror}->(node => $attr,
1118     type => 'charset:private:'.$charset_value, ## TODO
1119 wakaba 1.21 level => $self->{good_level});
1120     } else {
1121 wakaba 1.32 $self->{onerror}->(node => $attr,
1122 wakaba 1.21 type => 'charset:not registered:'.
1123 wakaba 1.32 $charset_value, ## TODO
1124 wakaba 1.21 level => $self->{good_level});
1125     }
1126     }
1127     } elsif ($charset_value =~ /^x-/) {
1128 wakaba 1.32 $self->{onerror}->(node => $attr,
1129     type => 'charset:private:'.$charset_value, ## TODO
1130 wakaba 1.21 level => $self->{good_level});
1131     } else {
1132 wakaba 1.32 $self->{onerror}->(node => $attr,
1133     type => 'charset:not registered:'.$charset_value, ## TODO
1134 wakaba 1.21 level => $self->{good_level});
1135     }
1136    
1137 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1138     $self->{onerror}->(node => $attr,
1139 wakaba 1.22 type => 'character reference in charset',
1140     level => $self->{must_level});
1141     }
1142 wakaba 1.32 }; # $check_charset
1143    
1144     ## TODO: metadata conformance
1145    
1146     ## TODO: pragma conformance
1147     if (defined $http_equiv_attr) { ## An enumerated attribute
1148     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1149     if ({
1150     'refresh' => 1,
1151     'default-style' => 1,
1152     }->{$keyword}) {
1153     #
1154 wakaba 1.33
1155     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1156 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1157 wakaba 1.33 ## ISSUE: Though it is renamed as "Encoding declaration" state in rev
1158     ## 1221, there are still many occurence of "Content-Type" state in
1159     ## the spec.
1160    
1161 wakaba 1.32 $check_charset_decl->();
1162     if ($content_attr) {
1163     my $content = $content_attr->value;
1164     if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) {
1165     $check_charset->($content_attr, $1);
1166     } else {
1167     $self->{onerror}->(node => $content_attr,
1168     type => 'meta content-type syntax error',
1169     level => $self->{must_level});
1170     }
1171     }
1172     } else {
1173     $self->{onerror}->(node => $http_equiv_attr,
1174     type => 'enumerated:invalid');
1175     }
1176     }
1177    
1178     if (defined $charset_attr) {
1179     $check_charset_decl->();
1180     $check_charset->($charset_attr, $charset_attr->value);
1181 wakaba 1.1 }
1182     },
1183     };
1184    
1185     $Element->{$HTML_NS}->{style} = {
1186 wakaba 1.40 %HTMLChecker,
1187     check_attrs => $GetHTMLAttrsChecker->({
1188 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1189     media => $HTMLMQAttrChecker,
1190     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1191     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1192     ## not different
1193     }),
1194 wakaba 1.40 check_start => sub {
1195     my ($self, $item, $element_state) = @_;
1196    
1197 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1198 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1199 wakaba 1.27 if (not defined $type or
1200     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1201 wakaba 1.40 $element_state->{allow_element} = 0;
1202     $element_state->{style_type} = 'text/css';
1203     } else {
1204     $element_state->{allow_element} = 1; # unknown
1205     $element_state->{style_type} = $type; ## TODO: $type normalization
1206     }
1207     },
1208     check_child_element => sub {
1209     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1210     $child_is_transparent, $element_state) = @_;
1211     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1212     $self->{onerror}->(node => $child_el,
1213     type => 'element not allowed:minus',
1214     level => $self->{must_level});
1215     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1216     #
1217     } elsif ($element_state->{allow_element}) {
1218     #
1219     } else {
1220     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1221     }
1222     },
1223     check_child_text => sub {
1224     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1225     $element_state->{text} .= $child_node->text_content;
1226     },
1227     check_end => sub {
1228     my ($self, $item, $element_state) = @_;
1229     if ($element_state->{style_type} eq 'text/css') {
1230     $self->{onsubdoc}->({s => $element_state->{text},
1231     container_node => $item->{node},
1232 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1233 wakaba 1.27 } else {
1234 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1235     type => 'style:'.$element_state->{style_type});
1236 wakaba 1.27 }
1237 wakaba 1.40
1238     $HTMLChecker{check_end}->(@_);
1239 wakaba 1.1 },
1240     };
1241 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1242 wakaba 1.1
1243     $Element->{$HTML_NS}->{body} = {
1244 wakaba 1.40 %HTMLProseContentChecker,
1245 wakaba 1.1 };
1246    
1247     $Element->{$HTML_NS}->{section} = {
1248 wakaba 1.40 %HTMLProseContentChecker,
1249 wakaba 1.1 };
1250    
1251     $Element->{$HTML_NS}->{nav} = {
1252 wakaba 1.40 %HTMLProseContentChecker,
1253 wakaba 1.1 };
1254    
1255     $Element->{$HTML_NS}->{article} = {
1256 wakaba 1.40 %HTMLProseContentChecker,
1257 wakaba 1.1 };
1258    
1259     $Element->{$HTML_NS}->{blockquote} = {
1260 wakaba 1.40 %HTMLProseContentChecker,
1261     check_attrs => $GetHTMLAttrsChecker->({
1262 wakaba 1.1 cite => $HTMLURIAttrChecker,
1263     }),
1264     };
1265    
1266     $Element->{$HTML_NS}->{aside} = {
1267 wakaba 1.40 %HTMLProseContentChecker,
1268 wakaba 1.1 };
1269    
1270     $Element->{$HTML_NS}->{h1} = {
1271 wakaba 1.40 %HTMLPhrasingContentChecker,
1272     check_start => sub {
1273     my ($self, $item, $element_state) = @_;
1274     $self->{flag}->{has_hn} = 1;
1275 wakaba 1.1 },
1276     };
1277    
1278 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1279 wakaba 1.1
1280 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1281 wakaba 1.1
1282 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1283 wakaba 1.1
1284 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1285 wakaba 1.1
1286 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1287 wakaba 1.1
1288 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1289    
1290 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1291 wakaba 1.40 %HTMLProseContentChecker,
1292     check_start => sub {
1293     my ($self, $item, $element_state) = @_;
1294     $self->_add_minus_elements ($element_state,
1295     {$HTML_NS => {qw/header 1 footer 1/}},
1296     $HTMLSectioningContent);
1297     $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1298     $self->{flag}->{has_hn} = 0;
1299     },
1300     check_end => sub {
1301     my ($self, $item, $element_state) = @_;
1302     $self->_remove_minus_elements ($element_state);
1303     unless ($self->{flag}->{has_hn}) {
1304     $self->{onerror}->(node => $item->{node},
1305     type => 'element missing:hn');
1306     }
1307     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1308 wakaba 1.1
1309 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1310 wakaba 1.1 },
1311 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1312 wakaba 1.1 };
1313    
1314     $Element->{$HTML_NS}->{footer} = {
1315 wakaba 1.40 %HTMLProseContentChecker,
1316     check_start => sub {
1317     my ($self, $item, $element_state) = @_;
1318     $self->_add_minus_elements ($element_state,
1319     {$HTML_NS => {footer => 1}},
1320     $HTMLSectioningContent, $HTMLHeadingContent);
1321     },
1322     check_end => sub {
1323     my ($self, $item, $element_state) = @_;
1324     $self->_remove_minus_elements ($element_state);
1325 wakaba 1.1
1326 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1327 wakaba 1.1 },
1328     };
1329    
1330     $Element->{$HTML_NS}->{address} = {
1331 wakaba 1.40 %HTMLProseContentChecker,
1332     check_start => sub {
1333     my ($self, $item, $element_state) = @_;
1334     $self->_add_minus_elements ($element_state,
1335     {$HTML_NS => {footer => 1, address => 1}},
1336     $HTMLSectioningContent, $HTMLHeadingContent);
1337     },
1338     check_end => sub {
1339     my ($self, $item, $element_state) = @_;
1340     $self->_remove_minus_elements ($element_state);
1341 wakaba 1.29
1342 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1343 wakaba 1.29 },
1344 wakaba 1.1 };
1345    
1346     $Element->{$HTML_NS}->{p} = {
1347 wakaba 1.40 %HTMLPhrasingContentChecker,
1348 wakaba 1.1 };
1349    
1350     $Element->{$HTML_NS}->{hr} = {
1351 wakaba 1.40 %HTMLEmptyChecker,
1352 wakaba 1.1 };
1353    
1354     $Element->{$HTML_NS}->{br} = {
1355 wakaba 1.40 %HTMLEmptyChecker,
1356 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1357     ## (This requirement is semantic so that we cannot check.)
1358 wakaba 1.1 };
1359    
1360     $Element->{$HTML_NS}->{dialog} = {
1361 wakaba 1.40 %HTMLChecker,
1362     check_start => sub {
1363     my ($self, $item, $element_state) = @_;
1364     $element_state->{phase} = 'before dt';
1365     },
1366     check_child_element => sub {
1367     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1368     $child_is_transparent, $element_state) = @_;
1369     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1370     $self->{onerror}->(node => $child_el,
1371     type => 'element not allowed:minus',
1372     level => $self->{must_level});
1373     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1374     #
1375     } elsif ($element_state->{phase} eq 'before dt') {
1376     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1377     $element_state->{phase} = 'before dd';
1378     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1379     $self->{onerror}
1380     ->(node => $child_el, type => 'ps element missing:dt');
1381     $element_state->{phase} = 'before dt';
1382     } else {
1383     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1384     }
1385     } elsif ($element_state->{phase} eq 'before dd') {
1386     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1387     $element_state->{phase} = 'before dt';
1388     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1389     $self->{onerror}
1390     ->(node => $child_el, type => 'ps element missing:dd');
1391     $element_state->{phase} = 'before dd';
1392     } else {
1393     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1394 wakaba 1.1 }
1395 wakaba 1.40 } else {
1396     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1397     }
1398     },
1399     check_child_text => sub {
1400     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1401     if ($has_significant) {
1402     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1403 wakaba 1.1 }
1404 wakaba 1.40 },
1405     check_end => sub {
1406     my ($self, $item, $element_state) = @_;
1407     if ($element_state->{phase} eq 'before dd') {
1408     $self->{onerror}->(node => $item->{node},
1409     type => 'child element missing:dd');
1410 wakaba 1.1 }
1411 wakaba 1.40
1412     $HTMLChecker{check_end}->(@_);
1413 wakaba 1.1 },
1414     };
1415    
1416     $Element->{$HTML_NS}->{pre} = {
1417 wakaba 1.40 %HTMLPhrasingContentChecker,
1418 wakaba 1.1 };
1419    
1420     $Element->{$HTML_NS}->{ol} = {
1421 wakaba 1.40 %HTMLChecker,
1422     check_attrs => $GetHTMLAttrsChecker->({
1423 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1424     }),
1425 wakaba 1.40 check_child_element => sub {
1426     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1427     $child_is_transparent, $element_state) = @_;
1428     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1429     $self->{onerror}->(node => $child_el,
1430     type => 'element not allowed:minus',
1431     level => $self->{must_level});
1432     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1433     #
1434     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1435     #
1436     } else {
1437     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1438 wakaba 1.1 }
1439 wakaba 1.40 },
1440     check_child_text => sub {
1441     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1442     if ($has_significant) {
1443     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1444 wakaba 1.1 }
1445     },
1446     };
1447    
1448     $Element->{$HTML_NS}->{ul} = {
1449 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1450 wakaba 1.1 };
1451    
1452     $Element->{$HTML_NS}->{li} = {
1453 wakaba 1.40 %HTMLProseContentChecker,
1454     check_attrs => $GetHTMLAttrsChecker->({
1455 wakaba 1.1 start => sub {
1456     my ($self, $attr) = @_;
1457     my $parent = $attr->owner_element->manakai_parent_element;
1458     if (defined $parent) {
1459     my $parent_ns = $parent->namespace_uri;
1460     $parent_ns = '' unless defined $parent_ns;
1461     my $parent_ln = $parent->manakai_local_name;
1462     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1463     $self->{onerror}->(node => $attr, level => 'unsupported',
1464     type => 'attribute');
1465     }
1466     }
1467     $HTMLIntegerAttrChecker->($self, $attr);
1468     },
1469     }),
1470 wakaba 1.40 check_child_element => sub {
1471     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1472     $child_is_transparent, $element_state) = @_;
1473     if ($self->{flag}->{in_menu}) {
1474     $HTMLPhrasingContentChecker{check_child_element}->(@_);
1475     } else {
1476     $HTMLProseContentChecker{check_child_element}->(@_);
1477     }
1478     },
1479     check_child_text => sub {
1480     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1481     if ($self->{flag}->{in_menu}) {
1482     $HTMLPhrasingContentChecker{check_child_text}->(@_);
1483 wakaba 1.1 } else {
1484 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
1485 wakaba 1.1 }
1486     },
1487     };
1488    
1489     $Element->{$HTML_NS}->{dl} = {
1490 wakaba 1.40 %HTMLChecker,
1491     check_start => sub {
1492     my ($self, $item, $element_state) = @_;
1493     $element_state->{phase} = 'before dt';
1494     },
1495     check_child_element => sub {
1496     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1497     $child_is_transparent, $element_state) = @_;
1498     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1499     $self->{onerror}->(node => $child_el,
1500     type => 'element not allowed:minus',
1501     level => $self->{must_level});
1502     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1503     #
1504     } elsif ($element_state->{phase} eq 'in dds') {
1505     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1506     #$element_state->{phase} = 'in dds';
1507     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1508     $element_state->{phase} = 'in dts';
1509     } else {
1510     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1511     }
1512     } elsif ($element_state->{phase} eq 'in dts') {
1513     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1514     #$element_state->{phase} = 'in dts';
1515     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1516     $element_state->{phase} = 'in dds';
1517     } else {
1518     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1519     }
1520     } elsif ($element_state->{phase} eq 'before dt') {
1521     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1522     $element_state->{phase} = 'in dts';
1523     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1524     $self->{onerror}
1525     ->(node => $child_el, type => 'ps element missing:dt');
1526     $element_state->{phase} = 'in dds';
1527     } else {
1528     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1529 wakaba 1.1 }
1530 wakaba 1.40 } else {
1531     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
1532 wakaba 1.1 }
1533 wakaba 1.40 },
1534     check_child_text => sub {
1535     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1536     if ($has_significant) {
1537     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1538     }
1539     },
1540     check_end => sub {
1541     my ($self, $item, $element_state) = @_;
1542     if ($element_state->{phase} eq 'in dts') {
1543     $self->{onerror}->(node => $item->{node},
1544     type => 'child element missing:dd');
1545 wakaba 1.1 }
1546    
1547 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1548 wakaba 1.1 },
1549     };
1550    
1551     $Element->{$HTML_NS}->{dt} = {
1552 wakaba 1.40 %HTMLPhrasingContentChecker,
1553 wakaba 1.1 };
1554    
1555     $Element->{$HTML_NS}->{dd} = {
1556 wakaba 1.40 %HTMLProseContentChecker,
1557 wakaba 1.1 };
1558    
1559     $Element->{$HTML_NS}->{a} = {
1560 wakaba 1.40 %HTMLPhrasingContentChecker,
1561     check_attrs => sub {
1562     my ($self, $item, $element_state) = @_;
1563 wakaba 1.1 my %attr;
1564 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1565 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1566     $attr_ns = '' unless defined $attr_ns;
1567     my $attr_ln = $attr->manakai_local_name;
1568     my $checker;
1569     if ($attr_ns eq '') {
1570     $checker = {
1571     target => $HTMLTargetAttrChecker,
1572     href => $HTMLURIAttrChecker,
1573     ping => $HTMLSpaceURIsAttrChecker,
1574 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
1575 wakaba 1.1 media => $HTMLMQAttrChecker,
1576     hreflang => $HTMLLanguageTagAttrChecker,
1577     type => $HTMLIMTAttrChecker,
1578     }->{$attr_ln};
1579     if ($checker) {
1580     $attr{$attr_ln} = $attr;
1581     } else {
1582     $checker = $HTMLAttrChecker->{$attr_ln};
1583     }
1584     }
1585     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1586     || $AttrChecker->{$attr_ns}->{''};
1587     if ($checker) {
1588     $checker->($self, $attr) if ref $checker;
1589     } else {
1590     $self->{onerror}->(node => $attr, level => 'unsupported',
1591     type => 'attribute');
1592     ## ISSUE: No comformance createria for unknown attributes in the spec
1593     }
1594     }
1595    
1596 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
1597 wakaba 1.4 if (defined $attr{href}) {
1598     $self->{has_hyperlink_element} = 1;
1599 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
1600 wakaba 1.4 } else {
1601 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1602     if (defined $attr{$_}) {
1603     $self->{onerror}->(node => $attr{$_},
1604     type => 'attribute not allowed');
1605     }
1606     }
1607     }
1608     },
1609 wakaba 1.40 check_start => sub {
1610     my ($self, $item, $element_state) = @_;
1611     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
1612     },
1613     check_end => sub {
1614     my ($self, $item, $element_state) = @_;
1615     $self->_remove_minus_elements ($element_state);
1616 wakaba 1.1
1617 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1618 wakaba 1.1 },
1619     };
1620    
1621     $Element->{$HTML_NS}->{q} = {
1622 wakaba 1.40 %HTMLPhrasingContentChecker,
1623     check_attrs => $GetHTMLAttrsChecker->({
1624 wakaba 1.1 cite => $HTMLURIAttrChecker,
1625     }),
1626     };
1627    
1628     $Element->{$HTML_NS}->{cite} = {
1629 wakaba 1.40 %HTMLPhrasingContentChecker,
1630 wakaba 1.1 };
1631    
1632     $Element->{$HTML_NS}->{em} = {
1633 wakaba 1.40 %HTMLPhrasingContentChecker,
1634 wakaba 1.1 };
1635    
1636     $Element->{$HTML_NS}->{strong} = {
1637 wakaba 1.40 %HTMLPhrasingContentChecker,
1638 wakaba 1.1 };
1639    
1640     $Element->{$HTML_NS}->{small} = {
1641 wakaba 1.40 %HTMLPhrasingContentChecker,
1642 wakaba 1.1 };
1643    
1644 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
1645 wakaba 1.40 %HTMLPhrasingContentChecker,
1646 wakaba 1.1 };
1647    
1648     $Element->{$HTML_NS}->{dfn} = {
1649 wakaba 1.40 %HTMLPhrasingContentChecker,
1650     check_start => sub {
1651     my ($self, $item, $element_state) = @_;
1652     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
1653 wakaba 1.1
1654 wakaba 1.40 my $node = $item->{node};
1655 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
1656     unless (defined $term) {
1657     for my $child (@{$node->child_nodes}) {
1658     if ($child->node_type == 1) { # ELEMENT_NODE
1659     if (defined $term) {
1660     undef $term;
1661     last;
1662     } elsif ($child->manakai_local_name eq 'abbr') {
1663     my $nsuri = $child->namespace_uri;
1664     if (defined $nsuri and $nsuri eq $HTML_NS) {
1665     my $attr = $child->get_attribute_node_ns (undef, 'title');
1666     if ($attr) {
1667     $term = $attr->value;
1668     }
1669     }
1670     }
1671     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1672     ## TEXT_NODE or CDATA_SECTION_NODE
1673     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1674     next;
1675     }
1676     undef $term;
1677     last;
1678     }
1679     }
1680     unless (defined $term) {
1681     $term = $node->text_content;
1682     }
1683     }
1684     if ($self->{term}->{$term}) {
1685     $self->{onerror}->(node => $node, type => 'duplicate term');
1686     push @{$self->{term}->{$term}}, $node;
1687     } else {
1688     $self->{term}->{$term} = [$node];
1689     }
1690     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1691     ## has |title|.
1692 wakaba 1.40 },
1693     check_end => sub {
1694     my ($self, $item, $element_state) = @_;
1695     $self->_remove_minus_elements ($element_state);
1696 wakaba 1.1
1697 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1698 wakaba 1.1 },
1699     };
1700    
1701     $Element->{$HTML_NS}->{abbr} = {
1702 wakaba 1.40 %HTMLPhrasingContentChecker,
1703 wakaba 1.1 };
1704    
1705     $Element->{$HTML_NS}->{time} = {
1706 wakaba 1.40 %HTMLPhrasingContentChecker,
1707     check_attrs => $GetHTMLAttrsChecker->({
1708 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
1709     }),
1710     ## TODO: Write tests
1711 wakaba 1.40 check_end => sub {
1712     my ($self, $item, $element_state) = @_;
1713 wakaba 1.1
1714 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
1715 wakaba 1.1 my $input;
1716     my $reg_sp;
1717     my $input_node;
1718     if ($attr) {
1719     $input = $attr->value;
1720     $reg_sp = qr/[\x09-\x0D\x20]*/;
1721     $input_node = $attr;
1722     } else {
1723 wakaba 1.40 $input = $item->{node}->text_content;
1724 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
1725 wakaba 1.40 $input_node = $item->{node};
1726 wakaba 1.1
1727     ## ISSUE: What is the definition for "successfully extracts a date
1728     ## or time"? If the algorithm says the string is invalid but
1729     ## return some date or time, is it "successfully"?
1730     }
1731    
1732     my $hour;
1733     my $minute;
1734     my $second;
1735     if ($input =~ /
1736     \A
1737     [\x09-\x0D\x20]*
1738     ([0-9]+) # 1
1739     (?>
1740     -([0-9]+) # 2
1741     -([0-9]+) # 3
1742     [\x09-\x0D\x20]*
1743     (?>
1744     T
1745     [\x09-\x0D\x20]*
1746     )?
1747     ([0-9]+) # 4
1748     :([0-9]+) # 5
1749     (?>
1750     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1751     )?
1752     [\x09-\x0D\x20]*
1753     (?>
1754     Z
1755     [\x09-\x0D\x20]*
1756     |
1757     [+-]([0-9]+):([0-9]+) # 7, 8
1758     [\x09-\x0D\x20]*
1759     )?
1760     \z
1761     |
1762     :([0-9]+) # 9
1763     (?>
1764     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1765     )?
1766     [\x09-\x0D\x20]*\z
1767     )
1768     /x) {
1769     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1770     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1771     length $4 != 2 or length $5 != 2) {
1772     $self->{onerror}->(node => $input_node,
1773     type => 'dateortime:syntax error');
1774     }
1775    
1776     if (1 <= $2 and $2 <= 12) {
1777     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1778     if $3 < 1 or
1779     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1780     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1781     if $2 == 2 and $3 == 29 and
1782     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1783     } else {
1784     $self->{onerror}->(node => $input_node,
1785     type => 'datetime:bad month');
1786     }
1787    
1788     ($hour, $minute, $second) = ($4, $5, $6);
1789    
1790     if (defined $7) { ## [+-]hh:mm
1791     if (length $7 != 2 or length $8 != 2) {
1792     $self->{onerror}->(node => $input_node,
1793     type => 'dateortime:syntax error');
1794     }
1795    
1796     $self->{onerror}->(node => $input_node,
1797     type => 'datetime:bad timezone hour')
1798     if $7 > 23;
1799     $self->{onerror}->(node => $input_node,
1800     type => 'datetime:bad timezone minute')
1801     if $8 > 59;
1802     }
1803     } else { ## hh:mm
1804     if (length $1 != 2 or length $9 != 2) {
1805     $self->{onerror}->(node => $input_node,
1806     type => qq'dateortime:syntax error');
1807     }
1808    
1809     ($hour, $minute, $second) = ($1, $9, $10);
1810     }
1811    
1812     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
1813     if $hour > 23;
1814     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
1815     if $minute > 59;
1816    
1817     if (defined $second) { ## s
1818     ## NOTE: Integer part of second don't have to have length of two.
1819    
1820     if (substr ($second, 0, 1) eq '.') {
1821     $self->{onerror}->(node => $input_node,
1822     type => 'dateortime:syntax error');
1823     }
1824    
1825     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
1826     if $second >= 60;
1827     }
1828     } else {
1829     $self->{onerror}->(node => $input_node,
1830     type => 'dateortime:syntax error');
1831     }
1832    
1833 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1834 wakaba 1.1 },
1835     };
1836    
1837     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1838 wakaba 1.40 %HTMLPhrasingContentChecker,
1839     check_attrs => $GetHTMLAttrsChecker->({
1840 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1841     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1842     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1843     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1844     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1845     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1846     }),
1847     };
1848    
1849     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1850 wakaba 1.40 %HTMLPhrasingContentChecker,
1851     check_attrs => $GetHTMLAttrsChecker->({
1852 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1853     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1854     }),
1855     };
1856    
1857     $Element->{$HTML_NS}->{code} = {
1858 wakaba 1.40 %HTMLPhrasingContentChecker,
1859 wakaba 1.1 };
1860    
1861     $Element->{$HTML_NS}->{var} = {
1862 wakaba 1.40 %HTMLPhrasingContentChecker,
1863 wakaba 1.1 };
1864    
1865     $Element->{$HTML_NS}->{samp} = {
1866 wakaba 1.40 %HTMLPhrasingContentChecker,
1867 wakaba 1.1 };
1868    
1869     $Element->{$HTML_NS}->{kbd} = {
1870 wakaba 1.40 %HTMLPhrasingContentChecker,
1871 wakaba 1.1 };
1872    
1873     $Element->{$HTML_NS}->{sub} = {
1874 wakaba 1.40 %HTMLPhrasingContentChecker,
1875 wakaba 1.1 };
1876    
1877     $Element->{$HTML_NS}->{sup} = {
1878 wakaba 1.40 %HTMLPhrasingContentChecker,
1879 wakaba 1.1 };
1880    
1881     $Element->{$HTML_NS}->{span} = {
1882 wakaba 1.40 %HTMLPhrasingContentChecker,
1883 wakaba 1.1 };
1884    
1885     $Element->{$HTML_NS}->{i} = {
1886 wakaba 1.40 %HTMLPhrasingContentChecker,
1887 wakaba 1.1 };
1888    
1889     $Element->{$HTML_NS}->{b} = {
1890 wakaba 1.40 %HTMLPhrasingContentChecker,
1891 wakaba 1.1 };
1892    
1893     $Element->{$HTML_NS}->{bdo} = {
1894 wakaba 1.40 %HTMLPhrasingContentChecker,
1895     check_attrs => sub {
1896     my ($self, $item, $element_state) = @_;
1897     $GetHTMLAttrsChecker->({})->($self, $item, $element_state);
1898     unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
1899     $self->{onerror}->(node => $item->{node},
1900     type => 'attribute missing:dir');
1901 wakaba 1.1 }
1902     },
1903     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1904     };
1905    
1906 wakaba 1.29 =pod
1907    
1908     ## TODO:
1909    
1910     +
1911     + <p>Partly because of the confusion described above, authors are
1912     + strongly recommended to always mark up all paragraphs with the
1913     + <code>p</code> element, and to not have any <code>ins</code> or
1914     + <code>del</code> elements that cross across any <span
1915     + title="paragraph">implied paragraphs</span>.</p>
1916     +
1917     (An informative note)
1918    
1919     <p><code>ins</code> elements should not cross <span
1920     + title="paragraph">implied paragraph</span> boundaries.</p>
1921     (normative)
1922    
1923     + <p><code>del</code> elements should not cross <span
1924     + title="paragraph">implied paragraph</span> boundaries.</p>
1925     (normative)
1926    
1927     =cut
1928    
1929 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
1930 wakaba 1.40 %HTMLTransparentChecker,
1931     check_attrs => $GetHTMLAttrsChecker->({
1932 wakaba 1.1 cite => $HTMLURIAttrChecker,
1933     datetime => $HTMLDatetimeAttrChecker,
1934     }),
1935     };
1936    
1937     $Element->{$HTML_NS}->{del} = {
1938 wakaba 1.40 %HTMLTransparentChecker,
1939     check_attrs => $GetHTMLAttrsChecker->({
1940 wakaba 1.1 cite => $HTMLURIAttrChecker,
1941     datetime => $HTMLDatetimeAttrChecker,
1942     }),
1943 wakaba 1.40 check_end => sub {
1944     my ($self, $item, $element_state) = @_;
1945     if ($element_state->{has_significant}) {
1946     ## NOTE: Significantness flag does not propagate.
1947     } elsif ($item->{transparent}) {
1948     #
1949     } else {
1950     $self->{onerror}->(node => $item->{node},
1951     level => $self->{should_level},
1952     type => 'no significant content');
1953     }
1954 wakaba 1.1 },
1955     };
1956    
1957 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
1958 wakaba 1.40 %HTMLProseContentChecker,
1959 wakaba 1.41 ## NOTE: legend, Prose | Prose, legend
1960     check_child_element => sub {
1961     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1962     $child_is_transparent, $element_state) = @_;
1963     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1964     $self->{onerror}->(node => $child_el,
1965     type => 'element not allowed:minus',
1966     level => $self->{must_level});
1967     $element_state->{has_non_legend} = 1;
1968     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1969     #
1970     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
1971     if ($element_state->{has_legend_at_first}) {
1972     $self->{onerror}->(node => $child_el,
1973     type => 'element not allowed:figure legend',
1974     level => $self->{must_level});
1975     } elsif ($element_state->{has_legend}) {
1976     $self->{onerror}->(node => $element_state->{has_legend},
1977     type => 'element not allowed:figure legend',
1978     level => $self->{must_level});
1979     $element_state->{has_legend} = $child_el;
1980     } elsif ($element_state->{has_non_legend}) {
1981     $element_state->{has_legend} = $child_el;
1982     } else {
1983     $element_state->{has_legend_at_first} = 1;
1984 wakaba 1.35 }
1985 wakaba 1.41 delete $element_state->{has_non_legend};
1986     } else {
1987     $HTMLProseContentChecker{check_child_element}->(@_);
1988     $element_state->{has_non_legend} = 1;
1989     }
1990     },
1991     check_child_text => sub {
1992     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1993     if ($has_significant) {
1994     $element_state->{has_non_legend} = 1;
1995 wakaba 1.35 }
1996 wakaba 1.41 },
1997     check_end => sub {
1998     my ($self, $item, $element_state) = @_;
1999 wakaba 1.35
2000 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2001     #
2002     } elsif ($element_state->{has_legend}) {
2003     if ($element_state->{has_non_legend}) {
2004     $self->{onerror}->(node => $element_state->{has_legend},
2005 wakaba 1.35 type => 'element not allowed:figure legend',
2006     level => $self->{must_level});
2007     }
2008     } else {
2009 wakaba 1.41 $self->{onerror}->(node => $item->{node},
2010 wakaba 1.35 type => 'element missing:legend',
2011     level => $self->{must_level});
2012     }
2013 wakaba 1.41
2014     $HTMLProseContentChecker{check_end}->(@_);
2015     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2016 wakaba 1.35 },
2017     };
2018 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2019 wakaba 1.1
2020     $Element->{$HTML_NS}->{img} = {
2021 wakaba 1.40 %HTMLEmptyChecker,
2022     check_attrs => sub {
2023     my ($self, $item, $element_state) = @_;
2024 wakaba 1.1 $GetHTMLAttrsChecker->({
2025     alt => sub { }, ## NOTE: No syntactical requirement
2026     src => $HTMLURIAttrChecker,
2027     usemap => $HTMLUsemapAttrChecker,
2028     ismap => sub {
2029 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2030     if (not $self->{flag}->{in_a_href}) {
2031 wakaba 1.15 $self->{onerror}->(node => $attr,
2032     type => 'attribute not allowed:ismap');
2033 wakaba 1.1 }
2034 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2035 wakaba 1.1 },
2036     ## TODO: height
2037     ## TODO: width
2038 wakaba 1.40 })->($self, $item);
2039     unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2040     $self->{onerror}->(node => $item->{node},
2041 wakaba 1.37 type => 'attribute missing:alt',
2042     level => $self->{should_level});
2043 wakaba 1.1 }
2044 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2045     $self->{onerror}->(node => $item->{node},
2046     type => 'attribute missing:src');
2047 wakaba 1.1 }
2048     },
2049     };
2050    
2051     $Element->{$HTML_NS}->{iframe} = {
2052 wakaba 1.40 %HTMLTextChecker,
2053     check_attrs => $GetHTMLAttrsChecker->({
2054 wakaba 1.1 src => $HTMLURIAttrChecker,
2055     }),
2056 wakaba 1.40 };
2057    
2058 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2059 wakaba 1.40 %HTMLEmptyChecker,
2060     check_attrs => sub {
2061     my ($self, $item, $element_state) = @_;
2062 wakaba 1.1 my $has_src;
2063 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2064 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2065     $attr_ns = '' unless defined $attr_ns;
2066     my $attr_ln = $attr->manakai_local_name;
2067     my $checker;
2068     if ($attr_ns eq '') {
2069     if ($attr_ln eq 'src') {
2070     $checker = $HTMLURIAttrChecker;
2071     $has_src = 1;
2072     } elsif ($attr_ln eq 'type') {
2073     $checker = $HTMLIMTAttrChecker;
2074     } else {
2075     ## TODO: height
2076     ## TODO: width
2077     $checker = $HTMLAttrChecker->{$attr_ln}
2078     || sub { }; ## NOTE: Any local attribute is ok.
2079     }
2080     }
2081     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2082     || $AttrChecker->{$attr_ns}->{''};
2083     if ($checker) {
2084     $checker->($self, $attr);
2085     } else {
2086     $self->{onerror}->(node => $attr, level => 'unsupported',
2087     type => 'attribute');
2088     ## ISSUE: No comformance createria for global attributes in the spec
2089     }
2090     }
2091    
2092     unless ($has_src) {
2093 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2094 wakaba 1.1 type => 'attribute missing:src');
2095     }
2096     },
2097     };
2098    
2099     $Element->{$HTML_NS}->{object} = {
2100 wakaba 1.40 %HTMLTransparentChecker,
2101     check_attrs => sub {
2102     my ($self, $item, $element_state) = @_;
2103 wakaba 1.1 $GetHTMLAttrsChecker->({
2104     data => $HTMLURIAttrChecker,
2105     type => $HTMLIMTAttrChecker,
2106     usemap => $HTMLUsemapAttrChecker,
2107     ## TODO: width
2108     ## TODO: height
2109 wakaba 1.40 })->($self, $item);
2110     unless ($item->{node}->has_attribute_ns (undef, 'data')) {
2111     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
2112     $self->{onerror}->(node => $item->{node},
2113 wakaba 1.1 type => 'attribute missing:data|type');
2114     }
2115     }
2116     },
2117 wakaba 1.41 ## NOTE: param*, transparent (Prose)
2118     check_child_element => sub {
2119     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2120     $child_is_transparent, $element_state) = @_;
2121     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2122     $self->{onerror}->(node => $child_el,
2123     type => 'element not allowed:minus',
2124     level => $self->{must_level});
2125     $element_state->{has_non_legend} = 1;
2126     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2127     #
2128     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
2129     if ($element_state->{has_non_param}) {
2130     $self->{onerror}->(node => $child_el,
2131     type => 'element not allowed:prose',
2132     level => $self->{must_level});
2133 wakaba 1.39 }
2134 wakaba 1.41 } else {
2135     $HTMLProseContentChecker{check_child_element}->(@_);
2136     $element_state->{has_non_param} = 1;
2137 wakaba 1.39 }
2138 wakaba 1.25 },
2139 wakaba 1.41 check_child_text => sub {
2140     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2141     if ($has_significant) {
2142     $element_state->{has_non_param} = 1;
2143     }
2144     },
2145 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2146 wakaba 1.1 };
2147 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
2148     ## What about |<section><object data><style scoped></style>x</object></section>|?
2149     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
2150 wakaba 1.1
2151     $Element->{$HTML_NS}->{param} = {
2152 wakaba 1.40 %HTMLEmptyChecker,
2153     check_attrs => sub {
2154     my ($self, $item, $element_state) = @_;
2155 wakaba 1.1 $GetHTMLAttrsChecker->({
2156     name => sub { },
2157     value => sub { },
2158 wakaba 1.40 })->($self, $item);
2159     unless ($item->{node}->has_attribute_ns (undef, 'name')) {
2160     $self->{onerror}->(node => $item->{node},
2161 wakaba 1.1 type => 'attribute missing:name');
2162     }
2163 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
2164     $self->{onerror}->(node => $item->{node},
2165 wakaba 1.1 type => 'attribute missing:value');
2166     }
2167     },
2168     };
2169    
2170     $Element->{$HTML_NS}->{video} = {
2171 wakaba 1.40 %HTMLTransparentChecker,
2172     check_attrs => $GetHTMLAttrsChecker->({
2173 wakaba 1.1 src => $HTMLURIAttrChecker,
2174     ## TODO: start, loopstart, loopend, end
2175     ## ISSUE: they MUST be "value time offset"s. Value?
2176 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2177 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2178     controls => $GetHTMLBooleanAttrChecker->('controls'),
2179 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2180     ## TODO: width, height (not for audio!)
2181 wakaba 1.1 }),
2182 wakaba 1.40
2183     ## TODO: reimplement
2184 wakaba 1.1 checker => sub {
2185     my ($self, $todo) = @_;
2186 wakaba 1.25 $todo->{flag}->{has_descendant}->{significant} = 1;
2187 wakaba 1.1
2188 wakaba 1.29 ## TODO:
2189 wakaba 1.1 if ($todo->{node}->has_attribute_ns (undef, 'src')) {
2190 wakaba 1.40 # return $HTMLProseContentChecker->($self, $todo);
2191 wakaba 1.1 } else {
2192 wakaba 1.40 # return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source')
2193     # ->($self, $todo);
2194 wakaba 1.1 }
2195     },
2196     };
2197    
2198     $Element->{$HTML_NS}->{audio} = {
2199 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
2200     ## TODO: Is there audio-only attribute?
2201 wakaba 1.1 };
2202    
2203     $Element->{$HTML_NS}->{source} = {
2204 wakaba 1.40 %HTMLEmptyChecker,
2205     check_attrs => sub {
2206     my ($self, $item, $element_state) = @_;
2207 wakaba 1.1 $GetHTMLAttrsChecker->({
2208     src => $HTMLURIAttrChecker,
2209     type => $HTMLIMTAttrChecker,
2210     media => $HTMLMQAttrChecker,
2211 wakaba 1.40 })->($self, $item, $element_state);
2212     unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2213     $self->{onerror}->(node => $item->{node},
2214 wakaba 1.1 type => 'attribute missing:src');
2215     }
2216     },
2217     };
2218    
2219     $Element->{$HTML_NS}->{canvas} = {
2220 wakaba 1.40 %HTMLTransparentChecker,
2221     check_attrs => $GetHTMLAttrsChecker->({
2222 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2223     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2224     }),
2225     };
2226    
2227     $Element->{$HTML_NS}->{map} = {
2228 wakaba 1.40 %HTMLProseContentChecker,
2229     check_attrs => sub {
2230     my ($self, $item, $element_state) = @_;
2231 wakaba 1.4 my $has_id;
2232     $GetHTMLAttrsChecker->({
2233     id => sub {
2234     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2235     my ($self, $attr) = @_;
2236     my $value = $attr->value;
2237     if (length $value > 0) {
2238     if ($self->{id}->{$value}) {
2239     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2240     push @{$self->{id}->{$value}}, $attr;
2241     } else {
2242     $self->{id}->{$value} = [$attr];
2243     }
2244 wakaba 1.1 } else {
2245 wakaba 1.4 ## NOTE: MUST contain at least one character
2246     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2247 wakaba 1.1 }
2248 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2249     $self->{onerror}->(node => $attr, type => 'space in ID');
2250     }
2251     $self->{map}->{$value} ||= $attr;
2252     $has_id = 1;
2253     },
2254 wakaba 1.40 })->($self, $item, $element_state);
2255     $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
2256 wakaba 1.4 unless $has_id;
2257     },
2258 wakaba 1.1 };
2259    
2260     $Element->{$HTML_NS}->{area} = {
2261 wakaba 1.40 %HTMLEmptyChecker,
2262     check_attrs => sub {
2263     my ($self, $item, $element_state) = @_;
2264 wakaba 1.1 my %attr;
2265     my $coords;
2266 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2267 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2268     $attr_ns = '' unless defined $attr_ns;
2269     my $attr_ln = $attr->manakai_local_name;
2270     my $checker;
2271     if ($attr_ns eq '') {
2272     $checker = {
2273     alt => sub { },
2274     ## NOTE: |alt| value has no conformance creteria.
2275     shape => $GetHTMLEnumeratedAttrChecker->({
2276     circ => -1, circle => 1,
2277     default => 1,
2278     poly => 1, polygon => -1,
2279     rect => 1, rectangle => -1,
2280     }),
2281     coords => sub {
2282     my ($self, $attr) = @_;
2283     my $value = $attr->value;
2284     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2285     $coords = [split /,/, $value];
2286     } else {
2287     $self->{onerror}->(node => $attr,
2288     type => 'coords:syntax error');
2289     }
2290     },
2291     target => $HTMLTargetAttrChecker,
2292     href => $HTMLURIAttrChecker,
2293     ping => $HTMLSpaceURIsAttrChecker,
2294 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
2295 wakaba 1.1 media => $HTMLMQAttrChecker,
2296     hreflang => $HTMLLanguageTagAttrChecker,
2297     type => $HTMLIMTAttrChecker,
2298     }->{$attr_ln};
2299     if ($checker) {
2300     $attr{$attr_ln} = $attr;
2301     } else {
2302     $checker = $HTMLAttrChecker->{$attr_ln};
2303     }
2304     }
2305     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2306     || $AttrChecker->{$attr_ns}->{''};
2307     if ($checker) {
2308     $checker->($self, $attr) if ref $checker;
2309     } else {
2310     $self->{onerror}->(node => $attr, level => 'unsupported',
2311     type => 'attribute');
2312     ## ISSUE: No comformance createria for unknown attributes in the spec
2313     }
2314     }
2315    
2316     if (defined $attr{href}) {
2317 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2318 wakaba 1.1 unless (defined $attr{alt}) {
2319 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2320 wakaba 1.1 type => 'attribute missing:alt');
2321     }
2322     } else {
2323     for (qw/target ping rel media hreflang type alt/) {
2324     if (defined $attr{$_}) {
2325     $self->{onerror}->(node => $attr{$_},
2326     type => 'attribute not allowed');
2327     }
2328     }
2329     }
2330    
2331     my $shape = 'rectangle';
2332     if (defined $attr{shape}) {
2333     $shape = {
2334     circ => 'circle', circle => 'circle',
2335     default => 'default',
2336     poly => 'polygon', polygon => 'polygon',
2337     rect => 'rectangle', rectangle => 'rectangle',
2338     }->{lc $attr{shape}->value} || 'rectangle';
2339     ## TODO: ASCII lowercase?
2340     }
2341    
2342     if ($shape eq 'circle') {
2343     if (defined $attr{coords}) {
2344     if (defined $coords) {
2345     if (@$coords == 3) {
2346     if ($coords->[2] < 0) {
2347     $self->{onerror}->(node => $attr{coords},
2348     type => 'coords:out of range:2');
2349     }
2350     } else {
2351     $self->{onerror}->(node => $attr{coords},
2352     type => 'coords:number:3:'.@$coords);
2353     }
2354     } else {
2355     ## NOTE: A syntax error has been reported.
2356     }
2357     } else {
2358 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2359 wakaba 1.1 type => 'attribute missing:coords');
2360     }
2361     } elsif ($shape eq 'default') {
2362     if (defined $attr{coords}) {
2363     $self->{onerror}->(node => $attr{coords},
2364     type => 'attribute not allowed');
2365     }
2366     } elsif ($shape eq 'polygon') {
2367     if (defined $attr{coords}) {
2368     if (defined $coords) {
2369     if (@$coords >= 6) {
2370     unless (@$coords % 2 == 0) {
2371     $self->{onerror}->(node => $attr{coords},
2372     type => 'coords:number:even:'.@$coords);
2373     }
2374     } else {
2375     $self->{onerror}->(node => $attr{coords},
2376     type => 'coords:number:>=6:'.@$coords);
2377     }
2378     } else {
2379     ## NOTE: A syntax error has been reported.
2380     }
2381     } else {
2382 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2383 wakaba 1.1 type => 'attribute missing:coords');
2384     }
2385     } elsif ($shape eq 'rectangle') {
2386     if (defined $attr{coords}) {
2387     if (defined $coords) {
2388     if (@$coords == 4) {
2389     unless ($coords->[0] < $coords->[2]) {
2390     $self->{onerror}->(node => $attr{coords},
2391     type => 'coords:out of range:0');
2392     }
2393     unless ($coords->[1] < $coords->[3]) {
2394     $self->{onerror}->(node => $attr{coords},
2395     type => 'coords:out of range:1');
2396     }
2397     } else {
2398     $self->{onerror}->(node => $attr{coords},
2399     type => 'coords:number:4:'.@$coords);
2400     }
2401     } else {
2402     ## NOTE: A syntax error has been reported.
2403     }
2404     } else {
2405 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2406 wakaba 1.1 type => 'attribute missing:coords');
2407     }
2408     }
2409     },
2410     };
2411     ## TODO: only in map
2412    
2413     $Element->{$HTML_NS}->{table} = {
2414 wakaba 1.40 %HTMLChecker,
2415     check_start => sub {
2416     my ($self, $item, $element_state) = @_;
2417     $element_state->{phase} = 'before caption';
2418     },
2419     check_child_element => sub {
2420     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2421     $child_is_transparent, $element_state) = @_;
2422     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2423     $self->{onerror}->(node => $child_el,
2424     type => 'element not allowed:minus',
2425     level => $self->{must_level});
2426     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2427     #
2428     } elsif ($element_state->{phase} eq 'in tbodys') {
2429     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2430     #$element_state->{phase} = 'in tbodys';
2431     } elsif (not $element_state->{has_tfoot} and
2432     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2433     $element_state->{phase} = 'after tfoot';
2434     $element_state->{has_tfoot} = 1;
2435     } else {
2436     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2437     }
2438     } elsif ($element_state->{phase} eq 'in trs') {
2439     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2440     #$element_state->{phase} = 'in trs';
2441     } elsif (not $element_state->{has_tfoot} and
2442     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2443     $element_state->{phase} = 'after tfoot';
2444     $element_state->{has_tfoot} = 1;
2445     } else {
2446     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2447     }
2448     } elsif ($element_state->{phase} eq 'after thead') {
2449     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2450     $element_state->{phase} = 'in tbodys';
2451     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2452     $element_state->{phase} = 'in trs';
2453     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2454     $element_state->{phase} = 'in tbodys';
2455     $element_state->{has_tfoot} = 1;
2456     } else {
2457     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2458     }
2459     } elsif ($element_state->{phase} eq 'in colgroup') {
2460     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
2461     $element_state->{phase} = 'in colgroup';
2462     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
2463     $element_state->{phase} = 'after thead';
2464     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2465     $element_state->{phase} = 'in tbodys';
2466     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2467     $element_state->{phase} = 'in trs';
2468     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2469     $element_state->{phase} = 'in tbodys';
2470     $element_state->{has_tfoot} = 1;
2471     } else {
2472     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2473     }
2474     } elsif ($element_state->{phase} eq 'before caption') {
2475     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
2476     $element_state->{phase} = 'in colgroup';
2477     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
2478     $element_state->{phase} = 'in colgroup';
2479     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
2480     $element_state->{phase} = 'after thead';
2481     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2482     $element_state->{phase} = 'in tbodys';
2483     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2484     $element_state->{phase} = 'in trs';
2485     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2486     $element_state->{phase} = 'in tbodys';
2487     $element_state->{has_tfoot} = 1;
2488     } else {
2489     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2490     }
2491     } elsif ($element_state->{phase} eq 'after tfoot') {
2492     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2493     } else {
2494     die "check_child_element: Bad |table| phase: $element_state->{phase}";
2495     }
2496     },
2497     check_child_text => sub {
2498     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2499     if ($has_significant) {
2500     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2501 wakaba 1.1 }
2502 wakaba 1.40 },
2503     check_end => sub {
2504     my ($self, $item, $element_state) = @_;
2505 wakaba 1.1
2506     ## Table model errors
2507     require Whatpm::HTMLTable;
2508 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
2509 wakaba 1.1 my %opt = @_;
2510     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2511     });
2512 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
2513 wakaba 1.1
2514 wakaba 1.40 $HTMLChecker{check_end}->(@_);
2515 wakaba 1.1 },
2516     };
2517    
2518     $Element->{$HTML_NS}->{caption} = {
2519 wakaba 1.40 %HTMLPhrasingContentChecker,
2520 wakaba 1.1 };
2521    
2522     $Element->{$HTML_NS}->{colgroup} = {
2523 wakaba 1.40 %HTMLEmptyChecker,
2524     check_attrs => $GetHTMLAttrsChecker->({
2525 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2526     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2527     ## TODO: "attribute not supported" if |col|.
2528     ## ISSUE: MUST NOT if any |col|?
2529     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2530     }),
2531 wakaba 1.40 check_child_element => sub {
2532     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2533     $child_is_transparent, $element_state) = @_;
2534     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2535     $self->{onerror}->(node => $child_el,
2536     type => 'element not allowed:minus',
2537     level => $self->{must_level});
2538     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2539     #
2540     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
2541     #
2542     } else {
2543     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2544     }
2545     },
2546     check_child_text => sub {
2547     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2548     if ($has_significant) {
2549     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2550 wakaba 1.1 }
2551     },
2552     };
2553    
2554     $Element->{$HTML_NS}->{col} = {
2555 wakaba 1.40 %HTMLEmptyChecker,
2556     check_attrs => $GetHTMLAttrsChecker->({
2557 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2558     }),
2559     };
2560    
2561     $Element->{$HTML_NS}->{tbody} = {
2562 wakaba 1.40 %HTMLChecker,
2563     check_child_element => sub {
2564     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2565     $child_is_transparent, $element_state) = @_;
2566     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2567     $self->{onerror}->(node => $child_el,
2568     type => 'element not allowed:minus',
2569     level => $self->{must_level});
2570     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2571     #
2572     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2573     $element_state->{has_tr} = 1;
2574     } else {
2575     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2576     }
2577     },
2578     check_child_text => sub {
2579     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2580     if ($has_significant) {
2581     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2582 wakaba 1.1 }
2583 wakaba 1.40 },
2584     check_end => sub {
2585     my ($self, $item, $element_state) = @_;
2586     unless ($element_state->{has_tr}) {
2587     $self->{onerror}->(node => $item->{node},
2588     type => 'child element missing:tr');
2589 wakaba 1.1 }
2590 wakaba 1.40
2591     $HTMLChecker{check_end}->(@_);
2592 wakaba 1.1 },
2593     };
2594    
2595     $Element->{$HTML_NS}->{thead} = {
2596 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
2597 wakaba 1.1 };
2598    
2599     $Element->{$HTML_NS}->{tfoot} = {
2600 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
2601 wakaba 1.1 };
2602    
2603     $Element->{$HTML_NS}->{tr} = {
2604 wakaba 1.40 %HTMLChecker,
2605     check_child_element => sub {
2606     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2607     $child_is_transparent, $element_state) = @_;
2608     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2609     $self->{onerror}->(node => $child_el,
2610     type => 'element not allowed:minus',
2611     level => $self->{must_level});
2612     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2613     #
2614     } elsif ($child_nsuri eq $HTML_NS and
2615     ($child_ln eq 'td' or $child_ln eq 'th')) {
2616     $element_state->{has_cell} = 1;
2617     } else {
2618     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2619     }
2620     },
2621     check_child_text => sub {
2622     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2623     if ($has_significant) {
2624     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2625 wakaba 1.1 }
2626 wakaba 1.40 },
2627     check_end => sub {
2628     my ($self, $item, $element_state) = @_;
2629     unless ($element_state->{has_cell}) {
2630     $self->{onerror}->(node => $item->{node},
2631     type => 'child element missing:td|th');
2632 wakaba 1.1 }
2633 wakaba 1.40
2634     $HTMLChecker{check_end}->(@_);
2635 wakaba 1.1 },
2636     };
2637    
2638     $Element->{$HTML_NS}->{td} = {
2639 wakaba 1.40 %HTMLProseContentChecker,
2640     check_attrs => $GetHTMLAttrsChecker->({
2641 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2642     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2643     }),
2644     };
2645    
2646     $Element->{$HTML_NS}->{th} = {
2647 wakaba 1.40 %HTMLPhrasingContentChecker,
2648     check_attrs => $GetHTMLAttrsChecker->({
2649 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2650     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2651     scope => $GetHTMLEnumeratedAttrChecker
2652     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2653     }),
2654     };
2655    
2656     ## TODO: forms
2657 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2658 wakaba 1.1
2659     $Element->{$HTML_NS}->{script} = {
2660 wakaba 1.40 %HTMLChecker,
2661     check_attrs => $GetHTMLAttrsChecker->({
2662 wakaba 1.1 src => $HTMLURIAttrChecker,
2663     defer => $GetHTMLBooleanAttrChecker->('defer'),
2664     async => $GetHTMLBooleanAttrChecker->('async'),
2665     type => $HTMLIMTAttrChecker,
2666 wakaba 1.9 }),
2667 wakaba 1.40 check_start => sub {
2668     my ($self, $item, $element_state) = @_;
2669 wakaba 1.1
2670 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
2671     $element_state->{must_be_empty} = 1;
2672 wakaba 1.1 } else {
2673     ## NOTE: No content model conformance in HTML5 spec.
2674 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2675     my $language = $item->{node}->get_attribute_ns (undef, 'language');
2676 wakaba 1.1 if ((defined $type and $type eq '') or
2677     (defined $language and $language eq '')) {
2678     $type = 'text/javascript';
2679     } elsif (defined $type) {
2680     #
2681     } elsif (defined $language) {
2682     $type = 'text/' . $language;
2683     } else {
2684     $type = 'text/javascript';
2685     }
2686 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
2687     }
2688     },
2689     check_child_element => sub {
2690     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2691     $child_is_transparent, $element_state) = @_;
2692     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2693     $self->{onerror}->(node => $child_el,
2694     type => 'element not allowed:minus',
2695     level => $self->{must_level});
2696     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2697     #
2698     } else {
2699     if ($element_state->{must_be_empty}) {
2700     $self->{onerror}->(node => $child_el,
2701     type => 'element not allowed');
2702     }
2703     }
2704     },
2705     check_child_text => sub {
2706     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2707     if ($has_significant and
2708     $element_state->{must_be_empty}) {
2709     $self->{onerror}->(node => $child_node,
2710     type => 'character not allowed');
2711     }
2712     },
2713     check_end => sub {
2714     my ($self, $item, $element_state) = @_;
2715     unless ($element_state->{must_be_empty}) {
2716     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
2717     type => 'script:'.$element_state->{script_type});
2718     ## TODO: text/javascript support
2719    
2720     $HTMLChecker{check_end}->(@_);
2721 wakaba 1.1 }
2722     },
2723     };
2724 wakaba 1.25 ## ISSUE: Significant check and text child node
2725 wakaba 1.1
2726     ## NOTE: When script is disabled.
2727     $Element->{$HTML_NS}->{noscript} = {
2728 wakaba 1.40 %HTMLTransparentChecker,
2729     check_start => sub {
2730     my ($self, $item, $element_state) = @_;
2731 wakaba 1.3
2732 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
2733     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
2734 wakaba 1.3 }
2735    
2736 wakaba 1.40 unless ($self->{flag}->{in_head}) {
2737     $self->_add_minus_elements ($element_state,
2738     {$HTML_NS => {noscript => 1}});
2739     }
2740 wakaba 1.3 },
2741 wakaba 1.40 check_child_element => sub {
2742     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2743     $child_is_transparent, $element_state) = @_;
2744     if ($self->{flag}->{in_head}) {
2745     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2746     $self->{onerror}->(node => $child_el,
2747     type => 'element not allowed:minus',
2748     level => $self->{must_level});
2749     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2750     #
2751     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
2752     #
2753     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
2754     if ($child_el->has_attribute_ns (undef, 'scoped')) {
2755     $self->{onerror}->(node => $child_el,
2756     type => 'element not allowed:head noscript',
2757     level => $self->{must_level});
2758     }
2759     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
2760     if ($child_el->has_attribute_ns (undef, 'charset')) {
2761     ## NOTE: Non-conforming. An error is raised by
2762     ## |meta|'s checker.
2763     } else {
2764     my $http_equiv_attr
2765     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
2766     if ($http_equiv_attr) {
2767     ## TODO: case
2768     if (lc $http_equiv_attr->value eq 'content-type') {
2769     ## NOTE: Non-conforming. An error is raised by
2770     ## |meta|'s checker.
2771     } else {
2772 wakaba 1.3 #
2773     }
2774     } else {
2775 wakaba 1.40 $self->{onerror}->(node => $child_el,
2776 wakaba 1.34 type => 'element not allowed:head noscript',
2777     level => $self->{must_level});
2778 wakaba 1.3 }
2779     }
2780 wakaba 1.40 } else {
2781     $self->{onerror}->(node => $child_el,
2782     type => 'element not allowed:head noscript',
2783     level => $self->{must_level});
2784     }
2785     } else {
2786     $HTMLTransparentChecker{check_child_element}->(@_);
2787     }
2788     },
2789     check_child_text => sub {
2790     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2791     if ($self->{flag}->{in_head}) {
2792     if ($has_significant) {
2793     $self->{onerror}->(node => $child_node,
2794     type => 'character not allowed');
2795 wakaba 1.3 }
2796     } else {
2797 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
2798     }
2799     },
2800     check_end => sub {
2801     my ($self, $item, $element_state) = @_;
2802     $self->_remove_minus_elements ($element_state);
2803     if ($self->{flag}->{in_head}) {
2804     $HTMLChecker{check_end}->(@_);
2805     } else {
2806     $HTMLPhrasingContentChecker{check_end}->(@_);
2807 wakaba 1.3 }
2808 wakaba 1.1 },
2809     };
2810 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2811 wakaba 1.1
2812     $Element->{$HTML_NS}->{'event-source'} = {
2813 wakaba 1.40 %HTMLEmptyChecker,
2814     check_attrs => $GetHTMLAttrsChecker->({
2815 wakaba 1.1 src => $HTMLURIAttrChecker,
2816     }),
2817     };
2818    
2819     $Element->{$HTML_NS}->{details} = {
2820 wakaba 1.40 %HTMLProseContentChecker,
2821     check_attrs => $GetHTMLAttrsChecker->({
2822 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
2823     }),
2824     checker => sub {
2825     my ($self, $todo) = @_;
2826    
2827 wakaba 1.29 ## TODO:
2828 wakaba 1.40 # my ($sib, $ch)
2829     # = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend')
2830     # ->($self, $todo);
2831     # return ($sib, $ch);
2832 wakaba 1.1 },
2833     };
2834    
2835     $Element->{$HTML_NS}->{datagrid} = {
2836 wakaba 1.40 %HTMLProseContentChecker,
2837     check_attrs => $GetHTMLAttrsChecker->({
2838 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2839     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2840     }),
2841 wakaba 1.40 check_start => sub {
2842     my ($self, $item, $element_state) = @_;
2843 wakaba 1.1
2844 wakaba 1.40 $self->_add_minus_elements ($element_state,
2845     {$HTML_NS => {a => 1, datagrid => 1}});
2846     $element_state->{phase} = 'any';
2847     },
2848     ## Prose -(text* table Prose*) | table | select | datalist | Empty
2849     check_child_element => sub {
2850     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2851     $child_is_transparent, $element_state) = @_;
2852     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2853     $self->{onerror}->(node => $child_el,
2854     type => 'element not allowed:minus',
2855     level => $self->{must_level});
2856     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2857     #
2858     } elsif ($element_state->{phase} eq 'prose') {
2859     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
2860     if ($element_state->{has_element} and
2861     $child_nsuri eq $HTML_NS and
2862     $child_ln eq 'table') {
2863     $self->{onerror}->(node => $child_el,
2864     type => 'element not allowed');
2865     } else {
2866 wakaba 1.8 #
2867 wakaba 1.1 }
2868 wakaba 1.40 } else {
2869     $self->{onerror}->(node => $child_el,
2870     type => 'element not allowed');
2871     }
2872     } elsif ($element_state->{phase} eq 'any') {
2873     if ($child_nsuri eq $HTML_NS and
2874     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
2875     $element_state->{phase} = 'none';
2876     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
2877     $element_state->{has_element} = 1;
2878     $element_state->{phase} = 'prose';
2879     } else {
2880     $self->{onerror}->(node => $child_el,
2881     type => 'element not allowed');
2882     }
2883     } elsif ($element_state->{phase} eq 'none') {
2884     $self->{onerror}->(node => $child_el,
2885     type => 'element not allowed');
2886     } else {
2887     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
2888     }
2889     },
2890     check_child_text => sub {
2891     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2892     if ($has_significant) {
2893     if ($element_state->{phase} eq 'prose') {
2894     #
2895     } elsif ($element_state->{phase} eq 'any') {
2896     $element_state->{phase} = 'prose';
2897     } else {
2898     $self->{onerror}->(node => $child_node,
2899     type => 'character not allowed');
2900 wakaba 1.1 }
2901     }
2902 wakaba 1.40 },
2903     check_end => sub {
2904     my ($self, $item, $element_state) = @_;
2905     $self->_remove_minus_elements ($element_state);
2906 wakaba 1.1
2907 wakaba 1.40 if ($element_state->{phase} eq 'none') {
2908     $HTMLChecker{check_end}->(@_);
2909     } else {
2910     $HTMLPhrasingContentChecker{check_end}->(@_);
2911     }
2912     },
2913 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
2914     ## are not disallowed (assuming that form control contents are also
2915     ## prose content).
2916 wakaba 1.1 };
2917    
2918     $Element->{$HTML_NS}->{command} = {
2919 wakaba 1.40 %HTMLEmptyChecker,
2920     check_attrs => $GetHTMLAttrsChecker->({
2921 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
2922     default => $GetHTMLBooleanAttrChecker->('default'),
2923     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2924     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
2925     icon => $HTMLURIAttrChecker,
2926     label => sub { }, ## NOTE: No conformance creteria
2927     radiogroup => sub { }, ## NOTE: No conformance creteria
2928     ## NOTE: |title| has special semantics, but no syntactical difference
2929     type => sub {
2930     my ($self, $attr) = @_;
2931     my $value = $attr->value;
2932     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
2933     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
2934     }
2935     },
2936     }),
2937     };
2938    
2939     $Element->{$HTML_NS}->{menu} = {
2940 wakaba 1.40 %HTMLPhrasingContentChecker,
2941     check_attrs => $GetHTMLAttrsChecker->({
2942 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
2943     id => sub {
2944     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
2945     my ($self, $attr) = @_;
2946     my $value = $attr->value;
2947     if (length $value > 0) {
2948     if ($self->{id}->{$value}) {
2949     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2950     push @{$self->{id}->{$value}}, $attr;
2951     } else {
2952     $self->{id}->{$value} = [$attr];
2953     }
2954     } else {
2955     ## NOTE: MUST contain at least one character
2956     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2957     }
2958     if ($value =~ /[\x09-\x0D\x20]/) {
2959     $self->{onerror}->(node => $attr, type => 'space in ID');
2960     }
2961     $self->{menu}->{$value} ||= $attr;
2962     ## ISSUE: <menu id=""><p contextmenu=""> match?
2963     },
2964     label => sub { }, ## NOTE: No conformance creteria
2965     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
2966     }),
2967 wakaba 1.40 check_start => sub {
2968     my ($self, $item, $element_state) = @_;
2969     $element_state->{phase} = 'li or phrasing';
2970     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
2971     $self->{flag}->{in_menu} = 1;
2972     },
2973     check_child_element => sub {
2974     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2975     $child_is_transparent, $element_state) = @_;
2976     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2977     $self->{onerror}->(node => $child_el,
2978     type => 'element not allowed:minus',
2979     level => $self->{must_level});
2980     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2981     #
2982     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
2983     if ($element_state->{phase} eq 'li') {
2984     #
2985     } elsif ($element_state->{phase} eq 'li or phrasing') {
2986     $element_state->{phase} = 'li';
2987     } else {
2988     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2989     }
2990     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
2991     if ($element_state->{phase} eq 'phrasing') {
2992     #
2993     } elsif ($element_state->{phase} eq 'li or phrasing') {
2994     $element_state->{phase} = 'phrasing';
2995     } else {
2996     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2997     }
2998     } else {
2999     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3000     }
3001     },
3002     check_child_text => sub {
3003     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3004     if ($has_significant) {
3005     if ($element_state->{phase} eq 'phrasing') {
3006     #
3007     } elsif ($element_state->{phase} eq 'li or phrasing') {
3008     $element_state->{phase} = 'phrasing';
3009     } else {
3010     $self->{onerror}->(node => $child_node,
3011     type => 'character not allowed');
3012 wakaba 1.1 }
3013     }
3014 wakaba 1.40 },
3015     check_end => sub {
3016     my ($self, $item, $element_state) = @_;
3017     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
3018    
3019     if ($element_state->{phase} eq 'li') {
3020     $HTMLChecker{check_end}->(@_);
3021     } else { # 'phrasing' or 'li or phrasing'
3022     $HTMLPhrasingContentChecker{check_end}->(@_);
3023 wakaba 1.1 }
3024     },
3025 wakaba 1.8 };
3026    
3027     $Element->{$HTML_NS}->{datatemplate} = {
3028 wakaba 1.40 %HTMLChecker,
3029     check_child_element => sub {
3030     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3031     $child_is_transparent, $element_state) = @_;
3032     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3033     $self->{onerror}->(node => $child_el,
3034     type => 'element not allowed:minus',
3035     level => $self->{must_level});
3036     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3037     #
3038     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
3039     #
3040     } else {
3041     $self->{onerror}->(node => $child_el,
3042     type => 'element not allowed:datatemplate');
3043     }
3044     },
3045     check_child_text => sub {
3046     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3047     if ($has_significant) {
3048     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3049 wakaba 1.8 }
3050     },
3051     is_xml_root => 1,
3052     };
3053    
3054     $Element->{$HTML_NS}->{rule} = {
3055 wakaba 1.40 %HTMLChecker,
3056     check_attrs => $GetHTMLAttrsChecker->({
3057 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3058 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3059 wakaba 1.8 }),
3060 wakaba 1.40 check_start => sub {
3061     my ($self, $item, $element_state) = @_;
3062     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
3063     },
3064     check_child_element => sub { },
3065     check_child_text => sub { },
3066     check_end => sub {
3067     my ($self, $item, $element_state) = @_;
3068     $self->_remove_plus_elements ($element_state);
3069     $HTMLChecker{check_end}->(@_);
3070 wakaba 1.8 },
3071     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3072     ## is applied to some conforming data, results in a conforming DOM tree.":
3073     ## We don't check against this.
3074     };
3075    
3076     $Element->{$HTML_NS}->{nest} = {
3077 wakaba 1.40 %HTMLEmptyChecker,
3078     check_attrs => $GetHTMLAttrsChecker->({
3079 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3080     mode => sub {
3081     my ($self, $attr) = @_;
3082     my $value = $attr->value;
3083     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3084     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3085     }
3086     },
3087 wakaba 1.8 }),
3088 wakaba 1.1 };
3089    
3090     $Element->{$HTML_NS}->{legend} = {
3091 wakaba 1.40 %HTMLPhrasingContentChecker,
3092 wakaba 1.1 };
3093    
3094     $Element->{$HTML_NS}->{div} = {
3095 wakaba 1.40 %HTMLProseContentChecker,
3096 wakaba 1.1 };
3097    
3098     $Element->{$HTML_NS}->{font} = {
3099 wakaba 1.40 %HTMLTransparentChecker,
3100     check_attrs => $GetHTMLAttrsChecker->({}), ## TODO
3101 wakaba 1.1 };
3102    
3103     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3104    
3105     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24