/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.56 - (hide annotations) (download)
Tue Feb 26 10:18:56 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.55: +170 -35 lines
++ whatpm/Whatpm/ContentChecker/ChangeLog	26 Feb 2008 10:18:47 -0000
	* HTML.pm: First version of Web Forms 2.0 support (its weired,
	since the spec itself is frankensteinesque:-); note that though
	it is weired spec from the today's viewpoint, it is still
	better-written spec than any W3C (Candidate or Proposed or full)
	Recommendation, sadly.

2008-02-26  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.54 sub FEATURE_HTML5_LC () {
8     Whatpm::ContentChecker::FEATURE_STATUS_LC |
9     Whatpm::ContentChecker::FEATURE_ALLOWED
10     }
11     sub FEATURE_HTML5_AT_RISK () {
12     Whatpm::ContentChecker::FEATURE_STATUS_WD |
13     Whatpm::ContentChecker::FEATURE_ALLOWED
14     }
15     sub FEATURE_HTML5_WD () {
16     Whatpm::ContentChecker::FEATURE_STATUS_WD |
17     Whatpm::ContentChecker::FEATURE_ALLOWED
18     }
19     sub FEATURE_HTML5_FD () {
20     Whatpm::ContentChecker::FEATURE_STATUS_WD |
21     Whatpm::ContentChecker::FEATURE_ALLOWED
22     }
23     sub FEATURE_HTML5_DEFAULT () {
24     Whatpm::ContentChecker::FEATURE_STATUS_WD |
25     Whatpm::ContentChecker::FEATURE_ALLOWED
26 wakaba 1.49 }
27 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
28     ## NOTE: Was part of HTML5, but was dropped.
29 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
30     }
31 wakaba 1.54 sub FEATURE_WF2 () {
32     Whatpm::ContentChecker::FEATURE_STATUS_LC |
33     Whatpm::ContentChecker::FEATURE_ALLOWED
34     }
35 wakaba 1.56 sub FEATURE_WF2_DEPRECATED () {
36     Whatpm::ContentChecker::FEATURE_STATUS_LC
37     ## NOTE: MUST NOT be used.
38     }
39 wakaba 1.49
40 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
41     ## NOTE: Only additions to M12N10_REC are marked.
42     Whatpm::ContentChecker::FEATURE_STATUS_CR
43     }
44     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
45     Whatpm::ContentChecker::FEATURE_STATUS_CR |
46     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
47     }
48    
49 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
50     ## It contains a number of problems. (However, again, it's a REC!)
51 wakaba 1.54 sub FEATURE_M12N10_REC () {
52     ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process suck!
53     Whatpm::ContentChecker::FEATURE_STATUS_REC
54     }
55     sub FEATURE_M12N10_REC_DEPRECATED () {
56     Whatpm::ContentChecker::FEATURE_STATUS_REC |
57     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
58     }
59 wakaba 1.49
60     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
61     ## (second edition). Only missing attributes from M12N10 abstract
62     ## definition are added.
63 wakaba 1.54 sub FEATURE_XHTML10_REC () {
64     Whatpm::ContentChecker::FEATURE_STATUS_CR
65     }
66    
67 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
68     ## 4.01). Only missing attributes from XHTML10 are added.
69 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
70     Whatpm::ContentChecker::FEATURE_STATUS_WD
71     }
72    
73     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
74     ## rather than presentational attributes (deprecated or not deprecated).
75 wakaba 1.48
76 wakaba 1.29 ## December 2007 HTML5 Classification
77    
78     my $HTMLMetadataContent = {
79     $HTML_NS => {
80     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
81     'event-source' => 1, command => 1, datatemplate => 1,
82     ## NOTE: A |meta| with no |name| element is not allowed as
83     ## a metadata content other than |head| element.
84     meta => 1,
85 wakaba 1.56 ## NOTE: Only when empty [WF2]
86     form => 1,
87 wakaba 1.29 },
88     ## NOTE: RDF is mentioned in the HTML5 spec.
89     ## TODO: Other RDF elements?
90     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
91     };
92    
93     my $HTMLProseContent = {
94     $HTML_NS => {
95     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
96     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
97     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
98     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
99     details => 1, ## ISSUE: "Prose element" in spec.
100     datagrid => 1, ## ISSUE: "Prose element" in spec.
101     datatemplate => 1,
102     div => 1, ## ISSUE: No category in spec.
103     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
104     ## Additionally, it must be before any other element or
105     ## non-inter-element-whitespace text node.
106     style => 1,
107    
108 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
109 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
110     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
111     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
112     command => 1, font => 1,
113     a => 1,
114     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
115     ## NOTE: |area| is allowed only as a descendant of |map|.
116     area => 1,
117    
118     ins => 1, del => 1,
119    
120     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
121     menu => 1,
122    
123     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
124     canvas => 1,
125     },
126    
127     ## NOTE: Embedded
128     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
129     q<http://www.w3.org/2000/svg> => {svg => 1},
130     };
131    
132     my $HTMLSectioningContent = {
133     $HTML_NS => {
134     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
135     ## NOTE: |body| is only allowed in |html| element.
136     body => 1,
137     },
138     };
139    
140     my $HTMLHeadingContent = {
141     $HTML_NS => {
142     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
143     },
144     };
145    
146     my $HTMLPhrasingContent = {
147     ## NOTE: All phrasing content is also prose content.
148     $HTML_NS => {
149 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
150 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
151     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
152     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
153     command => 1, font => 1,
154     a => 1,
155     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
156     ## NOTE: |area| is allowed only as a descendant of |map|.
157     area => 1,
158    
159     ## NOTE: Transparent.
160     ins => 1, del => 1,
161    
162     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
163     menu => 1,
164    
165     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
166     canvas => 1,
167 wakaba 1.56
168     ## NOTE: WF2
169     input => 1, ## NOTE: type=hidden
170     datalist => 1, ## NOTE: block | where |select| allowed
171 wakaba 1.29 },
172    
173     ## NOTE: Embedded
174     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
175     q<http://www.w3.org/2000/svg> => {svg => 1},
176    
177     ## NOTE: And non-inter-element-whitespace text nodes.
178     };
179    
180 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
181 wakaba 1.29
182     my $HTMLInteractiveContent = {
183     $HTML_NS => {
184     a => 1,
185 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
186 wakaba 1.29 },
187     };
188    
189 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
190     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
191    
192     ## -- Common attribute syntacx checkers
193    
194 wakaba 1.1 our $AttrChecker;
195    
196     my $GetHTMLEnumeratedAttrChecker = sub {
197     my $states = shift; # {value => conforming ? 1 : -1}
198     return sub {
199     my ($self, $attr) = @_;
200     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
201     if ($states->{$value} > 0) {
202     #
203     } elsif ($states->{$value}) {
204     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
205     } else {
206     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
207     }
208     };
209     }; # $GetHTMLEnumeratedAttrChecker
210    
211     my $GetHTMLBooleanAttrChecker = sub {
212     my $local_name = shift;
213     return sub {
214     my ($self, $attr) = @_;
215     my $value = $attr->value;
216     unless ($value eq $local_name or $value eq '') {
217     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
218     }
219     };
220     }; # $GetHTMLBooleanAttrChecker
221    
222 wakaba 1.8 ## Unordered set of space-separated tokens
223 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
224 wakaba 1.8 my ($self, $attr) = @_;
225     my %word;
226     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
227     unless ($word{$word}) {
228     $word{$word} = 1;
229     } else {
230     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
231     }
232     }
233 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
234 wakaba 1.8
235 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
236     ## whose allowed values are defined by the section on link types)
237     my $HTMLLinkTypesAttrChecker = sub {
238 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
239 wakaba 1.1 my %word;
240     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
241     unless ($word{$word}) {
242     $word{$word} = 1;
243 wakaba 1.18 } elsif ($word eq 'up') {
244     #
245 wakaba 1.1 } else {
246     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
247     }
248     }
249     ## NOTE: Case sensitive match (since HTML5 spec does not say link
250     ## types are case-insensitive and it says "The value should not
251     ## be confusingly similar to any other defined value (e.g.
252     ## differing only in case).").
253     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
254     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
255     ## values to be used conformingly.
256     require Whatpm::_LinkTypeList;
257     our $LinkType;
258     for my $word (keys %word) {
259     my $def = $LinkType->{$word};
260     if (defined $def) {
261     if ($def->{status} eq 'accepted') {
262     if (defined $def->{effect}->[$a_or_area]) {
263     #
264     } else {
265     $self->{onerror}->(node => $attr,
266     type => 'link type:bad context:'.$word);
267     }
268     } elsif ($def->{status} eq 'proposal') {
269     $self->{onerror}->(node => $attr, level => 's',
270     type => 'link type:proposed:'.$word);
271 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
272     #
273     } else {
274     $self->{onerror}->(node => $attr,
275     type => 'link type:bad context:'.$word);
276     }
277 wakaba 1.1 } else { # rejected or synonym
278     $self->{onerror}->(node => $attr,
279     type => 'link type:non-conforming:'.$word);
280     }
281 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
282     if ($word eq 'alternate') {
283     #
284     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
285     $todo->{has_hyperlink_link_type} = 1;
286     }
287     }
288 wakaba 1.1 if ($def->{unique}) {
289     unless ($self->{has_link_type}->{$word}) {
290     $self->{has_link_type}->{$word} = 1;
291     } else {
292     $self->{onerror}->(node => $attr,
293     type => 'link type:duplicate:'.$word);
294     }
295     }
296     } else {
297     $self->{onerror}->(node => $attr, level => 'unsupported',
298     type => 'link type:'.$word);
299     }
300     }
301 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
302     if $word{alternate} and not $word{stylesheet};
303 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
304     ## says that using both X-Pingback: header field and HTML
305     ## <link rel=pingback> is deprecated and if both appears they
306     ## SHOULD contain exactly the same value.
307     ## ISSUE: Pingback 1.0 specification defines the exact representation
308     ## of its link element, which cannot be tested by the current arch.
309     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
310     ## include any string that matches to the pattern for the rel=pingback link,
311     ## which again inpossible to test.
312     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
313 wakaba 1.12
314     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
315 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
316     ## then they SHOULD be described in different paragraphs.".
317 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
318 wakaba 1.20
319     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
320 wakaba 1.1
321     ## URI (or IRI)
322     my $HTMLURIAttrChecker = sub {
323     my ($self, $attr) = @_;
324     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
325     my $value = $attr->value;
326     Whatpm::URIChecker->check_iri_reference ($value, sub {
327     my %opt = @_;
328     $self->{onerror}->(node => $attr, level => $opt{level},
329     type => 'URI::'.$opt{type}.
330     (defined $opt{position} ? ':'.$opt{position} : ''));
331     });
332 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
333 wakaba 1.1 }; # $HTMLURIAttrChecker
334    
335     ## A space separated list of one or more URIs (or IRIs)
336     my $HTMLSpaceURIsAttrChecker = sub {
337     my ($self, $attr) = @_;
338     my $i = 0;
339     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
340     Whatpm::URIChecker->check_iri_reference ($value, sub {
341     my %opt = @_;
342     $self->{onerror}->(node => $attr, level => $opt{level},
343 wakaba 1.2 type => 'URIs:'.':'.
344     $opt{type}.':'.$i.
345 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
346     });
347     $i++;
348     }
349     ## ISSUE: Relative references?
350     ## ISSUE: Leading or trailing white spaces are conformant?
351     ## ISSUE: A sequence of white space characters are conformant?
352     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
353     ## NOTE: Duplication seems not an error.
354 wakaba 1.4 $self->{has_uri_attr} = 1;
355 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
356    
357     my $HTMLDatetimeAttrChecker = sub {
358     my ($self, $attr) = @_;
359     my $value = $attr->value;
360     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
361     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
362     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
363     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
364     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
365     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
366     if $d < 1 or
367     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
368     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
369     if $M == 2 and $d == 29 and
370     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
371     } else {
372     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
373     }
374     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
375     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
376     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
377     if defined $s and $s > 59;
378     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
379     if $zh > 23;
380     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
381     if $zm > 59;
382     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
383     } else {
384     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
385     }
386     }; # $HTMLDatetimeAttrChecker
387    
388     my $HTMLIntegerAttrChecker = sub {
389     my ($self, $attr) = @_;
390     my $value = $attr->value;
391     unless ($value =~ /\A-?[0-9]+\z/) {
392     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
393     }
394     }; # $HTMLIntegerAttrChecker
395    
396     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
397     my $range_check = shift;
398     return sub {
399     my ($self, $attr) = @_;
400     my $value = $attr->value;
401     if ($value =~ /\A[0-9]+\z/) {
402     unless ($range_check->($value + 0)) {
403     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
404     }
405     } else {
406     $self->{onerror}->(node => $attr,
407     type => 'nninteger:syntax error');
408     }
409     };
410     }; # $GetHTMLNonNegativeIntegerAttrChecker
411    
412     my $GetHTMLFloatingPointNumberAttrChecker = sub {
413     my $range_check = shift;
414     return sub {
415     my ($self, $attr) = @_;
416     my $value = $attr->value;
417     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
418     unless ($range_check->($value + 0)) {
419     $self->{onerror}->(node => $attr, type => 'float:out of range');
420     }
421     } else {
422     $self->{onerror}->(node => $attr,
423     type => 'float:syntax error');
424     }
425     };
426     }; # $GetHTMLFloatingPointNumberAttrChecker
427    
428     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
429     ## ISSUE: RFC 2046 does not define syntax of media types.
430     ## ISSUE: The definition of "a valid MIME type" is unknown.
431     ## Syntactical correctness?
432     my $HTMLIMTAttrChecker = sub {
433     my ($self, $attr) = @_;
434     my $value = $attr->value;
435     ## ISSUE: RFC 2045 Content-Type header field allows insertion
436     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
437     ## ISSUE: RFC 2231 extension? Maybe no.
438     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
439     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
440     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
441     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
442     my @type = ($1, $2);
443     my $param = $3;
444     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
445     if (defined $2) {
446     push @type, $1 => $2;
447     } else {
448     my $n = $1;
449     my $v = $2;
450     $v =~ s/\\(.)/$1/gs;
451     push @type, $n => $v;
452     }
453     }
454     require Whatpm::IMTChecker;
455     Whatpm::IMTChecker->check_imt (sub {
456     my %opt = @_;
457     $self->{onerror}->(node => $attr, level => $opt{level},
458     type => 'IMT:'.$opt{type});
459     }, @type);
460     } else {
461     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
462     }
463     }; # $HTMLIMTAttrChecker
464    
465     my $HTMLLanguageTagAttrChecker = sub {
466 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
467    
468 wakaba 1.1 my ($self, $attr) = @_;
469 wakaba 1.6 my $value = $attr->value;
470     require Whatpm::LangTag;
471     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
472     my %opt = @_;
473     my $type = 'LangTag:'.$opt{type};
474     $type .= ':' . $opt{subtag} if defined $opt{subtag};
475     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
476     level => $opt{level});
477     });
478 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
479 wakaba 1.6
480     ## TODO: testdata
481 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
482    
483     ## "A valid media query [MQ]"
484     my $HTMLMQAttrChecker = sub {
485     my ($self, $attr) = @_;
486     $self->{onerror}->(node => $attr, level => 'unsupported',
487     type => 'media query');
488     ## ISSUE: What is "a valid media query"?
489     }; # $HTMLMQAttrChecker
490    
491     my $HTMLEventHandlerAttrChecker = sub {
492     my ($self, $attr) = @_;
493     $self->{onerror}->(node => $attr, level => 'unsupported',
494     type => 'event handler');
495     ## TODO: MUST contain valid ECMAScript code matching the
496     ## ECMAScript |FunctionBody| production. [ECMA262]
497     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
498     ## ISSUE: Automatic semicolon insertion does not apply?
499     ## ISSUE: Other script languages?
500     }; # $HTMLEventHandlerAttrChecker
501    
502     my $HTMLUsemapAttrChecker = sub {
503     my ($self, $attr) = @_;
504     ## MUST be a valid hashed ID reference to a |map| element
505     my $value = $attr->value;
506     if ($value =~ s/^#//) {
507     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
508     push @{$self->{usemap}}, [$value => $attr];
509     } else {
510     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
511     }
512     ## NOTE: Space characters in hashed ID references are conforming.
513     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
514     }; # $HTMLUsemapAttrChecker
515    
516     my $HTMLTargetAttrChecker = sub {
517     my ($self, $attr) = @_;
518     my $value = $attr->value;
519     if ($value =~ /^_/) {
520     $value = lc $value; ## ISSUE: ASCII case-insentitive?
521     unless ({
522     _self => 1, _parent => 1, _top => 1,
523     }->{$value}) {
524     $self->{onerror}->(node => $attr,
525     type => 'reserved browsing context name');
526     }
527     } else {
528 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
529 wakaba 1.1 }
530     }; # $HTMLTargetAttrChecker
531    
532 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
533     my ($self, $attr) = @_;
534    
535     ## ISSUE: Namespace resolution?
536    
537     my $value = $attr->value;
538    
539     require Whatpm::CSS::SelectorsParser;
540     my $p = Whatpm::CSS::SelectorsParser->new;
541     $p->{pseudo_class}->{$_} = 1 for qw/
542     active checked disabled empty enabled first-child first-of-type
543     focus hover indeterminate last-child last-of-type link only-child
544     only-of-type root target visited
545     lang nth-child nth-last-child nth-of-type nth-last-of-type not
546     -manakai-contains -manakai-current
547     /;
548    
549     $p->{pseudo_element}->{$_} = 1 for qw/
550     after before first-letter first-line
551     /;
552    
553     $p->{must_level} = $self->{must_level};
554     $p->{onerror} = sub {
555     my %opt = @_;
556     $opt{type} = 'selectors:'.$opt{type};
557     $self->{onerror}->(%opt, node => $attr);
558     };
559     $p->parse_string ($value);
560     }; # $HTMLSelectorsAttrChecker
561    
562 wakaba 1.1 my $HTMLAttrChecker = {
563     id => sub {
564     ## NOTE: |map| has its own variant of |id=""| checker
565     my ($self, $attr) = @_;
566     my $value = $attr->value;
567     if (length $value > 0) {
568     if ($self->{id}->{$value}) {
569     $self->{onerror}->(node => $attr, type => 'duplicate ID');
570     push @{$self->{id}->{$value}}, $attr;
571     } else {
572     $self->{id}->{$value} = [$attr];
573     }
574     if ($value =~ /[\x09-\x0D\x20]/) {
575     $self->{onerror}->(node => $attr, type => 'space in ID');
576     }
577     } else {
578     ## NOTE: MUST contain at least one character
579     $self->{onerror}->(node => $attr, type => 'empty attribute value');
580     }
581     },
582     title => sub {}, ## NOTE: No conformance creteria
583     lang => sub {
584     my ($self, $attr) = @_;
585 wakaba 1.6 my $value = $attr->value;
586     if ($value eq '') {
587     #
588     } else {
589     require Whatpm::LangTag;
590     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
591     my %opt = @_;
592     my $type = 'LangTag:'.$opt{type};
593     $type .= ':' . $opt{subtag} if defined $opt{subtag};
594     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
595     level => $opt{level});
596     });
597     }
598 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
599     unless ($attr->owner_document->manakai_is_html) {
600     $self->{onerror}->(node => $attr, type => 'in XML:lang');
601     }
602 wakaba 1.6
603     ## TODO: test data
604 wakaba 1.1 },
605     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
606     class => sub {
607     my ($self, $attr) = @_;
608     my %word;
609     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
610     unless ($word{$word}) {
611     $word{$word} = 1;
612     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
613     } else {
614     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
615     }
616     }
617     },
618     contextmenu => sub {
619     my ($self, $attr) = @_;
620     my $value = $attr->value;
621     push @{$self->{contextmenu}}, [$value => $attr];
622     ## ISSUE: "The value must be the ID of a menu element in the DOM."
623     ## What is "in the DOM"? A menu Element node that is not part
624     ## of the Document tree is in the DOM? A menu Element node that
625     ## belong to another Document tree is in the DOM?
626     },
627 wakaba 1.48 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'), ## TODO: status: Working Draft
628 wakaba 1.56 ## TODO: repeat, repeat-start, repeat-min, repeat-max, repeat-template ## TODO: global
629 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
630     ## TODO: ref, template, registrationmark
631 wakaba 1.1 };
632    
633 wakaba 1.49 my %HTMLAttrStatus = (
634 wakaba 1.50 class => FEATURE_HTML5_DEFAULT,
635     contenteditable => FEATURE_HTML5_DEFAULT,
636     contextmenu => FEATURE_HTML5_WD,
637     dir => FEATURE_HTML5_DEFAULT,
638     draggable => FEATURE_HTML5_LC,
639     id => FEATURE_HTML5_DEFAULT,
640     irrelevant => FEATURE_HTML5_WD,
641     lang => FEATURE_HTML5_DEFAULT,
642     ref => FEATURE_HTML5_AT_RISK,
643     registrationmark => FEATURE_HTML5_AT_RISK,
644     tabindex => FEATURE_HTML5_DEFAULT,
645     template => FEATURE_HTML5_AT_RISK,
646     title => FEATURE_HTML5_DEFAULT,
647 wakaba 1.49 );
648    
649     my %HTMLM12NCommonAttrStatus = (
650 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
651     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
652     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
653     onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
654     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
655     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
656     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
657     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
658     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
659     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
660     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
661     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
662     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
663 wakaba 1.55 style => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
664     FEATURE_M12N10_REC,
665 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
666 wakaba 1.49 );
667    
668 wakaba 1.1 for (qw/
669     onabort onbeforeunload onblur onchange onclick oncontextmenu
670     ondblclick ondrag ondragend ondragenter ondragleave ondragover
671     ondragstart ondrop onerror onfocus onkeydown onkeypress
672     onkeyup onload onmessage onmousedown onmousemove onmouseout
673     onmouseover onmouseup onmousewheel onresize onscroll onselect
674     onsubmit onunload
675     /) {
676     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
677 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
678 wakaba 1.1 }
679    
680     my $GetHTMLAttrsChecker = sub {
681     my $element_specific_checker = shift;
682 wakaba 1.49 my $element_specific_status = shift;
683 wakaba 1.1 return sub {
684 wakaba 1.40 my ($self, $item, $element_state) = @_;
685     for my $attr (@{$item->{node}->attributes}) {
686 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
687     $attr_ns = '' unless defined $attr_ns;
688     my $attr_ln = $attr->manakai_local_name;
689     my $checker;
690     if ($attr_ns eq '') {
691     $checker = $element_specific_checker->{$attr_ln}
692 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
693 wakaba 1.1 }
694     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
695 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
696 wakaba 1.1 if ($checker) {
697 wakaba 1.40 $checker->($self, $attr, $item);
698 wakaba 1.49 } elsif ($attr_ns eq '') {
699 wakaba 1.54 #
700 wakaba 1.1 } else {
701     $self->{onerror}->(node => $attr, level => 'unsupported',
702     type => 'attribute');
703 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
704     }
705     if ($attr_ns eq '') {
706     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
707 wakaba 1.1 }
708 wakaba 1.49 ## TODO: global attribute
709 wakaba 1.1 }
710     };
711     }; # $GetHTMLAttrsChecker
712    
713 wakaba 1.40 my %HTMLChecker = (
714     %Whatpm::ContentChecker::AnyChecker,
715 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
716 wakaba 1.40 );
717    
718     my %HTMLEmptyChecker = (
719     %HTMLChecker,
720     check_child_element => sub {
721     my ($self, $item, $child_el, $child_nsuri, $child_ln,
722     $child_is_transparent, $element_state) = @_;
723     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
724     $self->{onerror}->(node => $child_el,
725     type => 'element not allowed:minus',
726     level => $self->{must_level});
727     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
728     #
729     } else {
730     $self->{onerror}->(node => $child_el,
731     type => 'element not allowed:empty',
732     level => $self->{must_level});
733     }
734     },
735     check_child_text => sub {
736     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
737     if ($has_significant) {
738     $self->{onerror}->(node => $child_node,
739     type => 'character not allowed:empty',
740     level => $self->{must_level});
741     }
742     },
743     );
744    
745     my %HTMLTextChecker = (
746     %HTMLChecker,
747     check_child_element => sub {
748     my ($self, $item, $child_el, $child_nsuri, $child_ln,
749     $child_is_transparent, $element_state) = @_;
750     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
751     $self->{onerror}->(node => $child_el,
752     type => 'element not allowed:minus',
753     level => $self->{must_level});
754     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
755     #
756     } else {
757     $self->{onerror}->(node => $child_el, type => 'element not allowed');
758     }
759     },
760     );
761    
762     my %HTMLProseContentChecker = (
763     %HTMLChecker,
764     check_child_element => sub {
765     my ($self, $item, $child_el, $child_nsuri, $child_ln,
766     $child_is_transparent, $element_state) = @_;
767     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
768     $self->{onerror}->(node => $child_el,
769     type => 'element not allowed:minus',
770     level => $self->{must_level});
771     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
772     #
773     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
774     if ($element_state->{has_non_style} or
775     not $child_el->has_attribute_ns (undef, 'scoped')) {
776     $self->{onerror}->(node => $child_el,
777     type => 'element not allowed:prose style',
778     level => $self->{must_level});
779     }
780     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
781 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
782 wakaba 1.40 } else {
783     $element_state->{has_non_style} = 1;
784     $self->{onerror}->(node => $child_el,
785     type => 'element not allowed:prose',
786     level => $self->{must_level})
787     }
788     },
789     check_child_text => sub {
790     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
791     if ($has_significant) {
792     $element_state->{has_non_style} = 1;
793     }
794     },
795     check_end => sub {
796     my ($self, $item, $element_state) = @_;
797     if ($element_state->{has_significant}) {
798 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
799 wakaba 1.40 } elsif ($item->{transparent}) {
800     #
801     } else {
802     $self->{onerror}->(node => $item->{node},
803     level => $self->{should_level},
804     type => 'no significant content');
805     }
806     },
807     );
808    
809     my %HTMLPhrasingContentChecker = (
810     %HTMLChecker,
811     check_child_element => sub {
812     my ($self, $item, $child_el, $child_nsuri, $child_ln,
813     $child_is_transparent, $element_state) = @_;
814     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
815     $self->{onerror}->(node => $child_el,
816     type => 'element not allowed:minus',
817     level => $self->{must_level});
818     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
819     #
820     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
821     #
822     } else {
823     $self->{onerror}->(node => $child_el,
824     type => 'element not allowed:phrasing',
825     level => $self->{must_level});
826     }
827     },
828     check_end => $HTMLProseContentChecker{check_end},
829     ## NOTE: The definition for |li| assumes that the only differences
830     ## between prose and phrasing content checkers are |check_child_element|
831     ## and |check_child_text|.
832     );
833    
834     my %HTMLTransparentChecker = %HTMLProseContentChecker;
835     ## ISSUE: Significant content rule should be applied to transparent element
836 wakaba 1.46 ## with parent?
837 wakaba 1.40
838 wakaba 1.1 our $Element;
839     our $ElementDefault;
840    
841     $Element->{$HTML_NS}->{''} = {
842 wakaba 1.40 %HTMLChecker,
843 wakaba 1.1 };
844    
845     $Element->{$HTML_NS}->{html} = {
846 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
847 wakaba 1.1 is_root => 1,
848 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
849 wakaba 1.16 manifest => $HTMLURIAttrChecker,
850 wakaba 1.1 xmlns => sub {
851     my ($self, $attr) = @_;
852     my $value = $attr->value;
853     unless ($value eq $HTML_NS) {
854     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
855     }
856     unless ($attr->owner_document->manakai_is_html) {
857     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
858     ## TODO: Test
859     }
860     },
861 wakaba 1.49 }, {
862     %HTMLAttrStatus,
863 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
864     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
865     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
866     manifest => FEATURE_HTML5_DEFAULT,
867 wakaba 1.49 version => FEATURE_M12N10_REC,
868 wakaba 1.50 xmlns => FEATURE_HTML5_DEFAULT,
869 wakaba 1.1 }),
870 wakaba 1.40 check_start => sub {
871     my ($self, $item, $element_state) = @_;
872     $element_state->{phase} = 'before head';
873     },
874     check_child_element => sub {
875     my ($self, $item, $child_el, $child_nsuri, $child_ln,
876     $child_is_transparent, $element_state) = @_;
877     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
878     $self->{onerror}->(node => $child_el,
879     type => 'element not allowed:minus',
880     level => $self->{must_level});
881     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
882     #
883     } elsif ($element_state->{phase} eq 'before head') {
884     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
885     $element_state->{phase} = 'after head';
886     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
887     $self->{onerror}->(node => $child_el,
888     type => 'ps element missing:head');
889     $element_state->{phase} = 'after body';
890     } else {
891     $self->{onerror}->(node => $child_el,
892     type => 'element not allowed');
893     }
894     } elsif ($element_state->{phase} eq 'after head') {
895     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
896     $element_state->{phase} = 'after body';
897     } else {
898     $self->{onerror}->(node => $child_el,
899     type => 'element not allowed');
900     }
901     } elsif ($element_state->{phase} eq 'after body') {
902     $self->{onerror}->(node => $child_el,
903     type => 'element not allowed');
904     } else {
905     die "check_child_element: Bad |html| phase: $element_state->{phase}";
906     }
907     },
908     check_child_text => sub {
909     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
910     if ($has_significant) {
911     $self->{onerror}->(node => $child_node,
912     type => 'character not allowed');
913     }
914     },
915     check_end => sub {
916     my ($self, $item, $element_state) = @_;
917     if ($element_state->{phase} eq 'after body') {
918     #
919     } elsif ($element_state->{phase} eq 'before head') {
920     $self->{onerror}->(node => $item->{node},
921     type => 'child element missing:head');
922     $self->{onerror}->(node => $item->{node},
923     type => 'child element missing:body');
924     } elsif ($element_state->{phase} eq 'after head') {
925     $self->{onerror}->(node => $item->{node},
926     type => 'child element missing:body');
927     } else {
928     die "check_end: Bad |html| phase: $element_state->{phase}";
929     }
930 wakaba 1.1
931 wakaba 1.40 $HTMLChecker{check_end}->(@_);
932     },
933     };
934 wakaba 1.25
935 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
936 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
937     check_attrs => $GetHTMLAttrsChecker->({}, {
938     %HTMLAttrStatus,
939 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
940     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
941     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
942 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
943     }),
944 wakaba 1.40 check_child_element => sub {
945     my ($self, $item, $child_el, $child_nsuri, $child_ln,
946     $child_is_transparent, $element_state) = @_;
947     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
948     $self->{onerror}->(node => $child_el,
949     type => 'element not allowed:minus',
950     level => $self->{must_level});
951     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
952     #
953     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
954     unless ($element_state->{has_title}) {
955     $element_state->{has_title} = 1;
956     } else {
957     $self->{onerror}->(node => $child_el,
958     type => 'element not allowed:head title',
959     level => $self->{must_level});
960     }
961     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
962     if ($child_el->has_attribute_ns (undef, 'scoped')) {
963     $self->{onerror}->(node => $child_el,
964     type => 'element not allowed:head style',
965     level => $self->{must_level});
966 wakaba 1.1 }
967 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
968     #
969    
970     ## NOTE: |meta| is a metadata content. However, strictly speaking,
971     ## a |meta| element with none of |charset|, |name|,
972     ## or |http-equiv| attribute is not allowed. It is non-conforming
973     ## anyway.
974 wakaba 1.56
975     ## TODO: |form| MUST be empty and in XML [WF2].
976 wakaba 1.40 } else {
977     $self->{onerror}->(node => $child_el,
978     type => 'element not allowed:metadata',
979     level => $self->{must_level});
980     }
981     $element_state->{in_head_original} = $self->{flag}->{in_head};
982     $self->{flag}->{in_head} = 1;
983     },
984     check_child_text => sub {
985     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
986     if ($has_significant) {
987     $self->{onerror}->(node => $child_node, type => 'character not allowed');
988 wakaba 1.1 }
989 wakaba 1.40 },
990     check_end => sub {
991     my ($self, $item, $element_state) = @_;
992     unless ($element_state->{has_title}) {
993     $self->{onerror}->(node => $item->{node},
994     type => 'child element missing:title');
995 wakaba 1.1 }
996 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
997 wakaba 1.1
998 wakaba 1.40 $HTMLChecker{check_end}->(@_);
999 wakaba 1.1 },
1000     };
1001    
1002 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1003     %HTMLTextChecker,
1004 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1005     check_attrs => $GetHTMLAttrsChecker->({}, {
1006     %HTMLAttrStatus,
1007 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1008     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1009     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1010 wakaba 1.49 }),
1011 wakaba 1.40 };
1012 wakaba 1.1
1013 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1014 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1015 wakaba 1.40 %HTMLEmptyChecker,
1016     check_attrs => sub {
1017     my ($self, $item, $element_state) = @_;
1018 wakaba 1.1
1019 wakaba 1.40 if ($self->{has_base}) {
1020     $self->{onerror}->(node => $item->{node},
1021     type => 'element not allowed:base');
1022     } else {
1023     $self->{has_base} = 1;
1024 wakaba 1.29 }
1025    
1026 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1027     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1028 wakaba 1.14
1029     if ($self->{has_uri_attr} and $has_href) {
1030 wakaba 1.4 ## ISSUE: Are these examples conforming?
1031     ## <head profile="a b c"><base href> (except for |profile|'s
1032     ## non-conformance)
1033     ## <title xml:base="relative"/><base href/> (maybe it should be)
1034     ## <unknown xmlns="relative"/><base href/> (assuming that
1035     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1036     ## <style>@import 'relative';</style><base href>
1037     ## <script>location.href = 'relative';</script><base href>
1038 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1039     ## an exception.
1040 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1041 wakaba 1.4 type => 'basehref after URI attribute');
1042     }
1043 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1044 wakaba 1.4 ## ISSUE: Are these examples conforming?
1045     ## <head><title xlink:href=""/><base target="name"/></head>
1046     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1047     ## (assuming that |xbl:xbl| is allowed before |base|)
1048     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1049     ## <link href=""/><base target="name"/>
1050     ## <link rel=unknown href=""><base target=name>
1051 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1052 wakaba 1.4 type => 'basetarget after hyperlink');
1053     }
1054    
1055 wakaba 1.14 if (not $has_href and not $has_target) {
1056 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1057 wakaba 1.14 type => 'attribute missing:href|target');
1058     }
1059    
1060 wakaba 1.4 return $GetHTMLAttrsChecker->({
1061     href => $HTMLURIAttrChecker,
1062     target => $HTMLTargetAttrChecker,
1063 wakaba 1.49 }, {
1064     %HTMLAttrStatus,
1065 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1066     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1067     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1068 wakaba 1.40 })->($self, $item, $element_state);
1069 wakaba 1.4 },
1070 wakaba 1.1 };
1071    
1072     $Element->{$HTML_NS}->{link} = {
1073 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1074 wakaba 1.40 %HTMLEmptyChecker,
1075     check_attrs => sub {
1076     my ($self, $item, $element_state) = @_;
1077 wakaba 1.1 $GetHTMLAttrsChecker->({
1078     href => $HTMLURIAttrChecker,
1079 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
1080 wakaba 1.1 media => $HTMLMQAttrChecker,
1081     hreflang => $HTMLLanguageTagAttrChecker,
1082     type => $HTMLIMTAttrChecker,
1083     ## NOTE: Though |title| has special semantics,
1084     ## syntactically same as the |title| as global attribute.
1085 wakaba 1.49 }, {
1086     %HTMLAttrStatus,
1087     %HTMLM12NCommonAttrStatus,
1088     charset => FEATURE_M12N10_REC,
1089 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1090     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1091     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1092     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1093     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1094 wakaba 1.49 rev => FEATURE_M12N10_REC,
1095     target => FEATURE_M12N10_REC,
1096 wakaba 1.50 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1097 wakaba 1.40 })->($self, $item, $element_state);
1098     if ($item->{node}->has_attribute_ns (undef, 'href')) {
1099     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
1100 wakaba 1.4 } else {
1101 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1102 wakaba 1.1 type => 'attribute missing:href');
1103     }
1104 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
1105     $self->{onerror}->(node => $item->{node},
1106 wakaba 1.1 type => 'attribute missing:rel');
1107     }
1108     },
1109     };
1110    
1111     $Element->{$HTML_NS}->{meta} = {
1112 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1113 wakaba 1.40 %HTMLEmptyChecker,
1114     check_attrs => sub {
1115     my ($self, $item, $element_state) = @_;
1116 wakaba 1.1 my $name_attr;
1117     my $http_equiv_attr;
1118     my $charset_attr;
1119     my $content_attr;
1120 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1121 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1122     $attr_ns = '' unless defined $attr_ns;
1123     my $attr_ln = $attr->manakai_local_name;
1124     my $checker;
1125     if ($attr_ns eq '') {
1126     if ($attr_ln eq 'content') {
1127     $content_attr = $attr;
1128     $checker = 1;
1129     } elsif ($attr_ln eq 'name') {
1130     $name_attr = $attr;
1131     $checker = 1;
1132     } elsif ($attr_ln eq 'http-equiv') {
1133     $http_equiv_attr = $attr;
1134     $checker = 1;
1135     } elsif ($attr_ln eq 'charset') {
1136     $charset_attr = $attr;
1137     $checker = 1;
1138     } else {
1139     $checker = $HTMLAttrChecker->{$attr_ln}
1140     || $AttrChecker->{$attr_ns}->{$attr_ln}
1141     || $AttrChecker->{$attr_ns}->{''};
1142     }
1143     } else {
1144     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1145     || $AttrChecker->{$attr_ns}->{''};
1146     }
1147     if ($checker) {
1148     $checker->($self, $attr) if ref $checker;
1149 wakaba 1.49 } elsif ($attr_ns eq '') {
1150 wakaba 1.54 #
1151 wakaba 1.1 } else {
1152     $self->{onerror}->(node => $attr, level => 'unsupported',
1153     type => 'attribute');
1154 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1155     }
1156    
1157     if ($attr_ns eq '') {
1158     $self->_attr_status_info ($attr, {
1159     %HTMLAttrStatus,
1160 wakaba 1.50 charset => FEATURE_HTML5_DEFAULT,
1161     content => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1162     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1163     'http-equiv' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1164     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1165     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1166     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1167 wakaba 1.49 scheme => FEATURE_M12N10_REC,
1168     }->{$attr_ln});
1169 wakaba 1.1 }
1170     }
1171    
1172     if (defined $name_attr) {
1173     if (defined $http_equiv_attr) {
1174     $self->{onerror}->(node => $http_equiv_attr,
1175     type => 'attribute not allowed');
1176     } elsif (defined $charset_attr) {
1177     $self->{onerror}->(node => $charset_attr,
1178     type => 'attribute not allowed');
1179     }
1180     my $metadata_name = $name_attr->value;
1181     my $metadata_value;
1182     if (defined $content_attr) {
1183     $metadata_value = $content_attr->value;
1184     } else {
1185 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1186 wakaba 1.1 type => 'attribute missing:content');
1187     $metadata_value = '';
1188     }
1189     } elsif (defined $http_equiv_attr) {
1190     if (defined $charset_attr) {
1191     $self->{onerror}->(node => $charset_attr,
1192     type => 'attribute not allowed');
1193     }
1194     unless (defined $content_attr) {
1195 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1196 wakaba 1.1 type => 'attribute missing:content');
1197     }
1198     } elsif (defined $charset_attr) {
1199     if (defined $content_attr) {
1200     $self->{onerror}->(node => $content_attr,
1201     type => 'attribute not allowed');
1202     }
1203     } else {
1204     if (defined $content_attr) {
1205     $self->{onerror}->(node => $content_attr,
1206     type => 'attribute not allowed');
1207 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1208 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1209     } else {
1210 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1211 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1212     }
1213     }
1214    
1215 wakaba 1.32 my $check_charset_decl = sub () {
1216 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1217 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1218     for my $el (@{$parent->child_nodes}) {
1219     next unless $el->node_type == 1; # ELEMENT_NODE
1220 wakaba 1.40 unless ($el eq $item->{node}) {
1221 wakaba 1.29 ## NOTE: Not the first child element.
1222 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1223 wakaba 1.32 type => 'element not allowed:meta charset',
1224     level => $self->{must_level});
1225 wakaba 1.29 }
1226     last;
1227     ## NOTE: Entity references are not supported.
1228     }
1229     } else {
1230 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1231 wakaba 1.32 type => 'element not allowed:meta charset',
1232     level => $self->{must_level});
1233 wakaba 1.29 }
1234    
1235 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1236     $self->{onerror}->(node => $item->{node},
1237 wakaba 1.32 type => 'in XML:charset',
1238     level => $self->{must_level});
1239 wakaba 1.1 }
1240 wakaba 1.32 }; # $check_charset_decl
1241 wakaba 1.21
1242 wakaba 1.32 my $check_charset = sub ($$) {
1243     my ($attr, $charset_value) = @_;
1244 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1245     ## is not explicitly spelled in the HTML5 spec, the Character Set
1246     ## registry of IANA, which is referenced from HTML5 spec, says that
1247     ## charset name is case-insensitive.
1248     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1249    
1250     require Message::Charset::Info;
1251     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1252 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1253 wakaba 1.21 if (defined $ic) {
1254     ## TODO: Test for this case
1255     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1256     if ($charset ne $ic_charset) {
1257 wakaba 1.32 $self->{onerror}->(node => $attr,
1258 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1259 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1260     level => $self->{must_level});
1261 wakaba 1.21 }
1262     } else {
1263     ## NOTE: MUST, but not checkable, since the document is not originally
1264     ## in serialized form (or the parser does not preserve the input
1265     ## encoding information).
1266 wakaba 1.32 $self->{onerror}->(node => $attr,
1267     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1268 wakaba 1.21 level => 'unsupported');
1269     }
1270    
1271     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1272     ## Syntactically valid and registered? What about x-charset names?
1273     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1274     ($charset_value)) {
1275 wakaba 1.32 $self->{onerror}->(node => $attr,
1276     type => 'charset:syntax error:'.$charset_value, ## TODO
1277     level => $self->{must_level});
1278 wakaba 1.21 }
1279    
1280     if ($charset) {
1281     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1282     ## with no "preferred MIME name" label)?
1283     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1284     if (($charset_status &
1285     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1286     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1287 wakaba 1.32 $self->{onerror}->(node => $attr,
1288 wakaba 1.21 type => 'charset:not preferred:'.
1289 wakaba 1.32 $charset_value, ## TODO
1290     level => $self->{must_level});
1291 wakaba 1.21 }
1292     if (($charset_status &
1293     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1294     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1295     if ($charset_value =~ /^x-/) {
1296 wakaba 1.32 $self->{onerror}->(node => $attr,
1297     type => 'charset:private:'.$charset_value, ## TODO
1298 wakaba 1.21 level => $self->{good_level});
1299     } else {
1300 wakaba 1.32 $self->{onerror}->(node => $attr,
1301 wakaba 1.21 type => 'charset:not registered:'.
1302 wakaba 1.32 $charset_value, ## TODO
1303 wakaba 1.21 level => $self->{good_level});
1304     }
1305     }
1306     } elsif ($charset_value =~ /^x-/) {
1307 wakaba 1.32 $self->{onerror}->(node => $attr,
1308     type => 'charset:private:'.$charset_value, ## TODO
1309 wakaba 1.21 level => $self->{good_level});
1310     } else {
1311 wakaba 1.32 $self->{onerror}->(node => $attr,
1312     type => 'charset:not registered:'.$charset_value, ## TODO
1313 wakaba 1.21 level => $self->{good_level});
1314     }
1315    
1316 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1317     $self->{onerror}->(node => $attr,
1318 wakaba 1.22 type => 'character reference in charset',
1319     level => $self->{must_level});
1320     }
1321 wakaba 1.32 }; # $check_charset
1322    
1323     ## TODO: metadata conformance
1324    
1325     ## TODO: pragma conformance
1326     if (defined $http_equiv_attr) { ## An enumerated attribute
1327     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1328     if ({
1329     'refresh' => 1,
1330     'default-style' => 1,
1331     }->{$keyword}) {
1332     #
1333 wakaba 1.33
1334     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1335 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1336 wakaba 1.33 ## ISSUE: Though it is renamed as "Encoding declaration" state in rev
1337     ## 1221, there are still many occurence of "Content-Type" state in
1338     ## the spec.
1339    
1340 wakaba 1.32 $check_charset_decl->();
1341     if ($content_attr) {
1342     my $content = $content_attr->value;
1343     if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) {
1344     $check_charset->($content_attr, $1);
1345     } else {
1346     $self->{onerror}->(node => $content_attr,
1347     type => 'meta content-type syntax error',
1348     level => $self->{must_level});
1349     }
1350     }
1351     } else {
1352     $self->{onerror}->(node => $http_equiv_attr,
1353     type => 'enumerated:invalid');
1354     }
1355     }
1356    
1357     if (defined $charset_attr) {
1358     $check_charset_decl->();
1359     $check_charset->($charset_attr, $charset_attr->value);
1360 wakaba 1.1 }
1361     },
1362     };
1363    
1364     $Element->{$HTML_NS}->{style} = {
1365 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1366 wakaba 1.40 %HTMLChecker,
1367     check_attrs => $GetHTMLAttrsChecker->({
1368 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1369     media => $HTMLMQAttrChecker,
1370     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1371     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1372     ## not different
1373 wakaba 1.49 }, {
1374     %HTMLAttrStatus,
1375 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1376     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1377     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1378     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1379     scoped => FEATURE_HTML5_DEFAULT,
1380     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1381     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1382 wakaba 1.1 }),
1383 wakaba 1.40 check_start => sub {
1384     my ($self, $item, $element_state) = @_;
1385    
1386 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1387 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1388 wakaba 1.27 if (not defined $type or
1389     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1390 wakaba 1.40 $element_state->{allow_element} = 0;
1391     $element_state->{style_type} = 'text/css';
1392     } else {
1393     $element_state->{allow_element} = 1; # unknown
1394     $element_state->{style_type} = $type; ## TODO: $type normalization
1395     }
1396     },
1397     check_child_element => sub {
1398     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1399     $child_is_transparent, $element_state) = @_;
1400     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1401     $self->{onerror}->(node => $child_el,
1402     type => 'element not allowed:minus',
1403     level => $self->{must_level});
1404     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1405     #
1406     } elsif ($element_state->{allow_element}) {
1407     #
1408     } else {
1409     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1410     }
1411     },
1412     check_child_text => sub {
1413     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1414     $element_state->{text} .= $child_node->text_content;
1415     },
1416     check_end => sub {
1417     my ($self, $item, $element_state) = @_;
1418     if ($element_state->{style_type} eq 'text/css') {
1419     $self->{onsubdoc}->({s => $element_state->{text},
1420     container_node => $item->{node},
1421 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1422 wakaba 1.27 } else {
1423 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1424     type => 'style:'.$element_state->{style_type});
1425 wakaba 1.27 }
1426 wakaba 1.40
1427     $HTMLChecker{check_end}->(@_);
1428 wakaba 1.1 },
1429     };
1430 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1431 wakaba 1.1
1432     $Element->{$HTML_NS}->{body} = {
1433 wakaba 1.40 %HTMLProseContentChecker,
1434 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1435     check_attrs => $GetHTMLAttrsChecker->({}, {
1436     %HTMLAttrStatus,
1437     %HTMLM12NCommonAttrStatus,
1438     alink => FEATURE_M12N10_REC_DEPRECATED,
1439     background => FEATURE_M12N10_REC_DEPRECATED,
1440     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
1441 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1442 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
1443 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1444     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1445 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
1446     vlink => FEATURE_M12N10_REC_DEPRECATED,
1447     }),
1448 wakaba 1.1 };
1449    
1450     $Element->{$HTML_NS}->{section} = {
1451 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1452 wakaba 1.40 %HTMLProseContentChecker,
1453 wakaba 1.1 };
1454    
1455     $Element->{$HTML_NS}->{nav} = {
1456 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1457 wakaba 1.40 %HTMLProseContentChecker,
1458 wakaba 1.1 };
1459    
1460     $Element->{$HTML_NS}->{article} = {
1461 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1462 wakaba 1.40 %HTMLProseContentChecker,
1463 wakaba 1.1 };
1464    
1465     $Element->{$HTML_NS}->{blockquote} = {
1466 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1467 wakaba 1.40 %HTMLProseContentChecker,
1468     check_attrs => $GetHTMLAttrsChecker->({
1469 wakaba 1.1 cite => $HTMLURIAttrChecker,
1470 wakaba 1.49 }, {
1471     %HTMLAttrStatus,
1472     %HTMLM12NCommonAttrStatus,
1473 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1474     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1475 wakaba 1.1 }),
1476     };
1477    
1478     $Element->{$HTML_NS}->{aside} = {
1479 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1480 wakaba 1.40 %HTMLProseContentChecker,
1481 wakaba 1.1 };
1482    
1483     $Element->{$HTML_NS}->{h1} = {
1484 wakaba 1.40 %HTMLPhrasingContentChecker,
1485 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1486     check_attrs => $GetHTMLAttrsChecker->({}, {
1487     %HTMLAttrStatus,
1488     %HTMLM12NCommonAttrStatus,
1489     align => FEATURE_M12N10_REC_DEPRECATED,
1490 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1491 wakaba 1.49 }),
1492 wakaba 1.40 check_start => sub {
1493     my ($self, $item, $element_state) = @_;
1494     $self->{flag}->{has_hn} = 1;
1495 wakaba 1.1 },
1496     };
1497    
1498 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1499 wakaba 1.1
1500 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1501 wakaba 1.1
1502 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1503 wakaba 1.1
1504 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1505 wakaba 1.1
1506 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1507 wakaba 1.1
1508 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1509    
1510 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1511 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1512 wakaba 1.40 %HTMLProseContentChecker,
1513     check_start => sub {
1514     my ($self, $item, $element_state) = @_;
1515     $self->_add_minus_elements ($element_state,
1516     {$HTML_NS => {qw/header 1 footer 1/}},
1517     $HTMLSectioningContent);
1518     $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1519     $self->{flag}->{has_hn} = 0;
1520     },
1521     check_end => sub {
1522     my ($self, $item, $element_state) = @_;
1523     $self->_remove_minus_elements ($element_state);
1524     unless ($self->{flag}->{has_hn}) {
1525     $self->{onerror}->(node => $item->{node},
1526     type => 'element missing:hn');
1527     }
1528     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1529 wakaba 1.1
1530 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1531 wakaba 1.1 },
1532 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1533 wakaba 1.1 };
1534    
1535     $Element->{$HTML_NS}->{footer} = {
1536 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1537 wakaba 1.40 %HTMLProseContentChecker,
1538     check_start => sub {
1539     my ($self, $item, $element_state) = @_;
1540     $self->_add_minus_elements ($element_state,
1541     {$HTML_NS => {footer => 1}},
1542     $HTMLSectioningContent, $HTMLHeadingContent);
1543     },
1544     check_end => sub {
1545     my ($self, $item, $element_state) = @_;
1546     $self->_remove_minus_elements ($element_state);
1547 wakaba 1.1
1548 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1549 wakaba 1.1 },
1550     };
1551    
1552     $Element->{$HTML_NS}->{address} = {
1553 wakaba 1.40 %HTMLProseContentChecker,
1554 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1555     check_attrs => $GetHTMLAttrsChecker->({}, {
1556     %HTMLAttrStatus,
1557     %HTMLM12NCommonAttrStatus,
1558 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1559 wakaba 1.49 }),
1560 wakaba 1.40 check_start => sub {
1561     my ($self, $item, $element_state) = @_;
1562     $self->_add_minus_elements ($element_state,
1563     {$HTML_NS => {footer => 1, address => 1}},
1564     $HTMLSectioningContent, $HTMLHeadingContent);
1565     },
1566     check_end => sub {
1567     my ($self, $item, $element_state) = @_;
1568     $self->_remove_minus_elements ($element_state);
1569 wakaba 1.29
1570 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1571 wakaba 1.29 },
1572 wakaba 1.1 };
1573    
1574     $Element->{$HTML_NS}->{p} = {
1575 wakaba 1.40 %HTMLPhrasingContentChecker,
1576 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1577     check_attrs => $GetHTMLAttrsChecker->({}, {
1578     %HTMLAttrStatus,
1579     %HTMLM12NCommonAttrStatus,
1580     align => FEATURE_M12N10_REC_DEPRECATED,
1581 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1582 wakaba 1.49 }),
1583 wakaba 1.1 };
1584    
1585     $Element->{$HTML_NS}->{hr} = {
1586 wakaba 1.40 %HTMLEmptyChecker,
1587 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1588     check_attrs => $GetHTMLAttrsChecker->({}, {
1589     %HTMLAttrStatus,
1590     %HTMLM12NCommonAttrStatus,
1591     align => FEATURE_M12N10_REC_DEPRECATED,
1592 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1593 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
1594     size => FEATURE_M12N10_REC_DEPRECATED,
1595     width => FEATURE_M12N10_REC_DEPRECATED,
1596     }),
1597 wakaba 1.1 };
1598    
1599     $Element->{$HTML_NS}->{br} = {
1600 wakaba 1.40 %HTMLEmptyChecker,
1601 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1602     check_attrs => $GetHTMLAttrsChecker->({}, {
1603     %HTMLAttrStatus,
1604 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1605 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
1606 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1607 wakaba 1.49 style => FEATURE_XHTML10_REC,
1608 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1609 wakaba 1.49 }),
1610 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1611     ## (This requirement is semantic so that we cannot check.)
1612 wakaba 1.1 };
1613    
1614     $Element->{$HTML_NS}->{dialog} = {
1615 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1616 wakaba 1.40 %HTMLChecker,
1617     check_start => sub {
1618     my ($self, $item, $element_state) = @_;
1619     $element_state->{phase} = 'before dt';
1620     },
1621     check_child_element => sub {
1622     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1623     $child_is_transparent, $element_state) = @_;
1624     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1625     $self->{onerror}->(node => $child_el,
1626     type => 'element not allowed:minus',
1627     level => $self->{must_level});
1628     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1629     #
1630     } elsif ($element_state->{phase} eq 'before dt') {
1631     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1632     $element_state->{phase} = 'before dd';
1633     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1634     $self->{onerror}
1635     ->(node => $child_el, type => 'ps element missing:dt');
1636     $element_state->{phase} = 'before dt';
1637     } else {
1638     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1639     }
1640     } elsif ($element_state->{phase} eq 'before dd') {
1641     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1642     $element_state->{phase} = 'before dt';
1643     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1644     $self->{onerror}
1645     ->(node => $child_el, type => 'ps element missing:dd');
1646     $element_state->{phase} = 'before dd';
1647     } else {
1648     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1649 wakaba 1.1 }
1650 wakaba 1.40 } else {
1651     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1652     }
1653     },
1654     check_child_text => sub {
1655     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1656     if ($has_significant) {
1657     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1658 wakaba 1.1 }
1659 wakaba 1.40 },
1660     check_end => sub {
1661     my ($self, $item, $element_state) = @_;
1662     if ($element_state->{phase} eq 'before dd') {
1663     $self->{onerror}->(node => $item->{node},
1664     type => 'child element missing:dd');
1665 wakaba 1.1 }
1666 wakaba 1.40
1667     $HTMLChecker{check_end}->(@_);
1668 wakaba 1.1 },
1669     };
1670    
1671     $Element->{$HTML_NS}->{pre} = {
1672 wakaba 1.40 %HTMLPhrasingContentChecker,
1673 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1674     check_attrs => $GetHTMLAttrsChecker->({}, {
1675     %HTMLAttrStatus,
1676     %HTMLM12NCommonAttrStatus,
1677 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1678 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
1679     }),
1680 wakaba 1.1 };
1681    
1682     $Element->{$HTML_NS}->{ol} = {
1683 wakaba 1.40 %HTMLChecker,
1684 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1685 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1686 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1687 wakaba 1.53 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
1688 wakaba 1.49 }, {
1689     %HTMLAttrStatus,
1690     %HTMLM12NCommonAttrStatus,
1691     compact => FEATURE_M12N10_REC_DEPRECATED,
1692 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1693 wakaba 1.53 reversed => FEATURE_HTML5_DEFAULT,
1694 wakaba 1.54 #start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
1695     start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1696 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1697 wakaba 1.1 }),
1698 wakaba 1.40 check_child_element => sub {
1699     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1700     $child_is_transparent, $element_state) = @_;
1701     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1702     $self->{onerror}->(node => $child_el,
1703     type => 'element not allowed:minus',
1704     level => $self->{must_level});
1705     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1706     #
1707     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1708     #
1709     } else {
1710     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1711 wakaba 1.1 }
1712 wakaba 1.40 },
1713     check_child_text => sub {
1714     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1715     if ($has_significant) {
1716     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1717 wakaba 1.1 }
1718     },
1719     };
1720    
1721     $Element->{$HTML_NS}->{ul} = {
1722 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1723 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1724     check_attrs => $GetHTMLAttrsChecker->({}, {
1725     %HTMLAttrStatus,
1726     %HTMLM12NCommonAttrStatus,
1727     compact => FEATURE_M12N10_REC_DEPRECATED,
1728 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1729 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1730     }),
1731 wakaba 1.1 };
1732    
1733     $Element->{$HTML_NS}->{li} = {
1734 wakaba 1.40 %HTMLProseContentChecker,
1735 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1736 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1737 wakaba 1.49 value => sub {
1738 wakaba 1.1 my ($self, $attr) = @_;
1739     my $parent = $attr->owner_element->manakai_parent_element;
1740     if (defined $parent) {
1741     my $parent_ns = $parent->namespace_uri;
1742     $parent_ns = '' unless defined $parent_ns;
1743     my $parent_ln = $parent->manakai_local_name;
1744     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1745     $self->{onerror}->(node => $attr, level => 'unsupported',
1746     type => 'attribute');
1747     }
1748     }
1749     $HTMLIntegerAttrChecker->($self, $attr);
1750 wakaba 1.49 }, ## TODO: test
1751     }, {
1752     %HTMLAttrStatus,
1753     %HTMLM12NCommonAttrStatus,
1754 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1755 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1756 wakaba 1.55 #value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
1757     # FEATURE_M12N10_REC_DEPRECATED,
1758     value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
1759     FEATURE_M12N10_REC,
1760 wakaba 1.1 }),
1761 wakaba 1.40 check_child_element => sub {
1762     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1763     $child_is_transparent, $element_state) = @_;
1764     if ($self->{flag}->{in_menu}) {
1765     $HTMLPhrasingContentChecker{check_child_element}->(@_);
1766     } else {
1767     $HTMLProseContentChecker{check_child_element}->(@_);
1768     }
1769     },
1770     check_child_text => sub {
1771     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1772     if ($self->{flag}->{in_menu}) {
1773     $HTMLPhrasingContentChecker{check_child_text}->(@_);
1774 wakaba 1.1 } else {
1775 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
1776 wakaba 1.1 }
1777     },
1778     };
1779    
1780     $Element->{$HTML_NS}->{dl} = {
1781 wakaba 1.40 %HTMLChecker,
1782 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1783     check_attrs => $GetHTMLAttrsChecker->({}, {
1784     %HTMLAttrStatus,
1785     %HTMLM12NCommonAttrStatus,
1786     compact => FEATURE_M12N10_REC_DEPRECATED,
1787 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1788 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1789     }),
1790 wakaba 1.40 check_start => sub {
1791     my ($self, $item, $element_state) = @_;
1792     $element_state->{phase} = 'before dt';
1793     },
1794     check_child_element => sub {
1795     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1796     $child_is_transparent, $element_state) = @_;
1797     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1798     $self->{onerror}->(node => $child_el,
1799     type => 'element not allowed:minus',
1800     level => $self->{must_level});
1801     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1802     #
1803     } elsif ($element_state->{phase} eq 'in dds') {
1804     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1805     #$element_state->{phase} = 'in dds';
1806     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1807     $element_state->{phase} = 'in dts';
1808     } else {
1809     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1810     }
1811     } elsif ($element_state->{phase} eq 'in dts') {
1812     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1813     #$element_state->{phase} = 'in dts';
1814     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1815     $element_state->{phase} = 'in dds';
1816     } else {
1817     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1818     }
1819     } elsif ($element_state->{phase} eq 'before dt') {
1820     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1821     $element_state->{phase} = 'in dts';
1822     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1823     $self->{onerror}
1824     ->(node => $child_el, type => 'ps element missing:dt');
1825     $element_state->{phase} = 'in dds';
1826     } else {
1827     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1828 wakaba 1.1 }
1829 wakaba 1.40 } else {
1830     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
1831 wakaba 1.1 }
1832 wakaba 1.40 },
1833     check_child_text => sub {
1834     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1835     if ($has_significant) {
1836     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1837     }
1838     },
1839     check_end => sub {
1840     my ($self, $item, $element_state) = @_;
1841     if ($element_state->{phase} eq 'in dts') {
1842     $self->{onerror}->(node => $item->{node},
1843     type => 'child element missing:dd');
1844 wakaba 1.1 }
1845    
1846 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1847 wakaba 1.1 },
1848     };
1849    
1850     $Element->{$HTML_NS}->{dt} = {
1851 wakaba 1.40 %HTMLPhrasingContentChecker,
1852 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1853     check_attrs => $GetHTMLAttrsChecker->({}, {
1854     %HTMLAttrStatus,
1855     %HTMLM12NCommonAttrStatus,
1856 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1857 wakaba 1.49 }),
1858 wakaba 1.1 };
1859    
1860     $Element->{$HTML_NS}->{dd} = {
1861 wakaba 1.40 %HTMLProseContentChecker,
1862 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1863     check_attrs => $GetHTMLAttrsChecker->({}, {
1864     %HTMLAttrStatus,
1865     %HTMLM12NCommonAttrStatus,
1866 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1867 wakaba 1.49 }),
1868 wakaba 1.1 };
1869    
1870     $Element->{$HTML_NS}->{a} = {
1871 wakaba 1.40 %HTMLPhrasingContentChecker,
1872 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1873 wakaba 1.40 check_attrs => sub {
1874     my ($self, $item, $element_state) = @_;
1875 wakaba 1.1 my %attr;
1876 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1877 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1878     $attr_ns = '' unless defined $attr_ns;
1879     my $attr_ln = $attr->manakai_local_name;
1880     my $checker;
1881     if ($attr_ns eq '') {
1882     $checker = {
1883     target => $HTMLTargetAttrChecker,
1884     href => $HTMLURIAttrChecker,
1885     ping => $HTMLSpaceURIsAttrChecker,
1886 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
1887 wakaba 1.1 media => $HTMLMQAttrChecker,
1888     hreflang => $HTMLLanguageTagAttrChecker,
1889     type => $HTMLIMTAttrChecker,
1890     }->{$attr_ln};
1891     if ($checker) {
1892     $attr{$attr_ln} = $attr;
1893     } else {
1894     $checker = $HTMLAttrChecker->{$attr_ln};
1895     }
1896     }
1897     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1898     || $AttrChecker->{$attr_ns}->{''};
1899     if ($checker) {
1900     $checker->($self, $attr) if ref $checker;
1901 wakaba 1.49 } elsif ($attr_ns eq '') {
1902 wakaba 1.54 #
1903 wakaba 1.1 } else {
1904     $self->{onerror}->(node => $attr, level => 'unsupported',
1905     type => 'attribute');
1906 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
1907 wakaba 1.1 }
1908 wakaba 1.49
1909     if ($attr_ns eq '') {
1910     $self->_attr_status_info ($attr, {
1911     %HTMLAttrStatus,
1912     %HTMLM12NCommonAttrStatus,
1913     accesskey => FEATURE_M12N10_REC,
1914     charset => FEATURE_M12N10_REC,
1915     coords => FEATURE_M12N10_REC,
1916 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1917     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1918     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1919     media => FEATURE_HTML5_DEFAULT,
1920 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
1921 wakaba 1.50 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1922     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1923     ping => FEATURE_HTML5_DEFAULT,
1924     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1925 wakaba 1.49 rev => FEATURE_M12N10_REC,
1926     shape => FEATURE_M12N10_REC,
1927 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1928     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1929     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1930 wakaba 1.49 }->{$attr_ln});
1931     }
1932 wakaba 1.1 }
1933    
1934 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
1935 wakaba 1.4 if (defined $attr{href}) {
1936     $self->{has_hyperlink_element} = 1;
1937 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
1938 wakaba 1.4 } else {
1939 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1940     if (defined $attr{$_}) {
1941     $self->{onerror}->(node => $attr{$_},
1942     type => 'attribute not allowed');
1943     }
1944     }
1945     }
1946     },
1947 wakaba 1.40 check_start => sub {
1948     my ($self, $item, $element_state) = @_;
1949     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
1950     },
1951     check_end => sub {
1952     my ($self, $item, $element_state) = @_;
1953     $self->_remove_minus_elements ($element_state);
1954 wakaba 1.1
1955 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1956 wakaba 1.1 },
1957     };
1958    
1959     $Element->{$HTML_NS}->{q} = {
1960 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1961 wakaba 1.40 %HTMLPhrasingContentChecker,
1962     check_attrs => $GetHTMLAttrsChecker->({
1963 wakaba 1.50 cite => $HTMLURIAttrChecker,
1964     }, {
1965 wakaba 1.49 %HTMLAttrStatus,
1966     %HTMLM12NCommonAttrStatus,
1967 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1968     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1969 wakaba 1.1 }),
1970     };
1971    
1972     $Element->{$HTML_NS}->{cite} = {
1973 wakaba 1.40 %HTMLPhrasingContentChecker,
1974 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1975     check_attrs => $GetHTMLAttrsChecker->({}, {
1976     %HTMLAttrStatus,
1977     %HTMLM12NCommonAttrStatus,
1978 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1979 wakaba 1.49 }),
1980 wakaba 1.1 };
1981    
1982     $Element->{$HTML_NS}->{em} = {
1983 wakaba 1.40 %HTMLPhrasingContentChecker,
1984 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1985     check_attrs => $GetHTMLAttrsChecker->({}, {
1986     %HTMLAttrStatus,
1987     %HTMLM12NCommonAttrStatus,
1988 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1989 wakaba 1.49 }),
1990 wakaba 1.1 };
1991    
1992     $Element->{$HTML_NS}->{strong} = {
1993 wakaba 1.40 %HTMLPhrasingContentChecker,
1994 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1995     check_attrs => $GetHTMLAttrsChecker->({}, {
1996     %HTMLAttrStatus,
1997     %HTMLM12NCommonAttrStatus,
1998 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1999 wakaba 1.49 }),
2000 wakaba 1.1 };
2001    
2002     $Element->{$HTML_NS}->{small} = {
2003 wakaba 1.40 %HTMLPhrasingContentChecker,
2004 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2005     check_attrs => $GetHTMLAttrsChecker->({}, {
2006     %HTMLAttrStatus,
2007     %HTMLM12NCommonAttrStatus,
2008 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2009 wakaba 1.49 }),
2010 wakaba 1.1 };
2011    
2012 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
2013     %HTMLPhrasingContentChecker,
2014     status => FEATURE_M12N10_REC,
2015     check_attrs => $GetHTMLAttrsChecker->({}, {
2016     %HTMLAttrStatus,
2017     %HTMLM12NCommonAttrStatus,
2018     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2019     }),
2020     };
2021    
2022 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
2023 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2024 wakaba 1.40 %HTMLPhrasingContentChecker,
2025 wakaba 1.1 };
2026    
2027     $Element->{$HTML_NS}->{dfn} = {
2028 wakaba 1.40 %HTMLPhrasingContentChecker,
2029 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2030     check_attrs => $GetHTMLAttrsChecker->({}, {
2031     %HTMLAttrStatus,
2032     %HTMLM12NCommonAttrStatus,
2033 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2034 wakaba 1.49 }),
2035 wakaba 1.40 check_start => sub {
2036     my ($self, $item, $element_state) = @_;
2037     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
2038 wakaba 1.1
2039 wakaba 1.40 my $node = $item->{node};
2040 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
2041     unless (defined $term) {
2042     for my $child (@{$node->child_nodes}) {
2043     if ($child->node_type == 1) { # ELEMENT_NODE
2044     if (defined $term) {
2045     undef $term;
2046     last;
2047     } elsif ($child->manakai_local_name eq 'abbr') {
2048     my $nsuri = $child->namespace_uri;
2049     if (defined $nsuri and $nsuri eq $HTML_NS) {
2050     my $attr = $child->get_attribute_node_ns (undef, 'title');
2051     if ($attr) {
2052     $term = $attr->value;
2053     }
2054     }
2055     }
2056     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2057     ## TEXT_NODE or CDATA_SECTION_NODE
2058     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2059     next;
2060     }
2061     undef $term;
2062     last;
2063     }
2064     }
2065     unless (defined $term) {
2066     $term = $node->text_content;
2067     }
2068     }
2069     if ($self->{term}->{$term}) {
2070     $self->{onerror}->(node => $node, type => 'duplicate term');
2071     push @{$self->{term}->{$term}}, $node;
2072     } else {
2073     $self->{term}->{$term} = [$node];
2074     }
2075     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2076     ## has |title|.
2077 wakaba 1.40 },
2078     check_end => sub {
2079     my ($self, $item, $element_state) = @_;
2080     $self->_remove_minus_elements ($element_state);
2081 wakaba 1.1
2082 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2083 wakaba 1.1 },
2084     };
2085    
2086     $Element->{$HTML_NS}->{abbr} = {
2087 wakaba 1.40 %HTMLPhrasingContentChecker,
2088 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2089     check_attrs => $GetHTMLAttrsChecker->({}, {
2090     %HTMLAttrStatus,
2091     %HTMLM12NCommonAttrStatus,
2092 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2093 wakaba 1.49 }),
2094     };
2095    
2096     $Element->{$HTML_NS}->{acronym} = {
2097     %HTMLPhrasingContentChecker,
2098     status => FEATURE_M12N10_REC,
2099     check_attrs => $GetHTMLAttrsChecker->({}, {
2100     %HTMLAttrStatus,
2101     %HTMLM12NCommonAttrStatus,
2102 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2103 wakaba 1.49 }),
2104 wakaba 1.1 };
2105    
2106     $Element->{$HTML_NS}->{time} = {
2107 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2108 wakaba 1.40 %HTMLPhrasingContentChecker,
2109     check_attrs => $GetHTMLAttrsChecker->({
2110 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
2111 wakaba 1.49 }, {
2112     %HTMLAttrStatus,
2113     %HTMLM12NCommonAttrStatus,
2114 wakaba 1.50 datetime => FEATURE_HTML5_DEFAULT,
2115 wakaba 1.1 }),
2116     ## TODO: Write tests
2117 wakaba 1.40 check_end => sub {
2118     my ($self, $item, $element_state) = @_;
2119 wakaba 1.1
2120 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
2121 wakaba 1.1 my $input;
2122     my $reg_sp;
2123     my $input_node;
2124     if ($attr) {
2125     $input = $attr->value;
2126     $reg_sp = qr/[\x09-\x0D\x20]*/;
2127     $input_node = $attr;
2128     } else {
2129 wakaba 1.40 $input = $item->{node}->text_content;
2130 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
2131 wakaba 1.40 $input_node = $item->{node};
2132 wakaba 1.1
2133     ## ISSUE: What is the definition for "successfully extracts a date
2134     ## or time"? If the algorithm says the string is invalid but
2135     ## return some date or time, is it "successfully"?
2136     }
2137    
2138     my $hour;
2139     my $minute;
2140     my $second;
2141     if ($input =~ /
2142     \A
2143     [\x09-\x0D\x20]*
2144     ([0-9]+) # 1
2145     (?>
2146     -([0-9]+) # 2
2147     -([0-9]+) # 3
2148     [\x09-\x0D\x20]*
2149     (?>
2150     T
2151     [\x09-\x0D\x20]*
2152     )?
2153     ([0-9]+) # 4
2154     :([0-9]+) # 5
2155     (?>
2156     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2157     )?
2158     [\x09-\x0D\x20]*
2159     (?>
2160     Z
2161     [\x09-\x0D\x20]*
2162     |
2163     [+-]([0-9]+):([0-9]+) # 7, 8
2164     [\x09-\x0D\x20]*
2165     )?
2166     \z
2167     |
2168     :([0-9]+) # 9
2169     (?>
2170     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2171     )?
2172     [\x09-\x0D\x20]*\z
2173     )
2174     /x) {
2175     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2176     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2177     length $4 != 2 or length $5 != 2) {
2178     $self->{onerror}->(node => $input_node,
2179     type => 'dateortime:syntax error');
2180     }
2181    
2182     if (1 <= $2 and $2 <= 12) {
2183     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2184     if $3 < 1 or
2185     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2186     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2187     if $2 == 2 and $3 == 29 and
2188     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2189     } else {
2190     $self->{onerror}->(node => $input_node,
2191     type => 'datetime:bad month');
2192     }
2193    
2194     ($hour, $minute, $second) = ($4, $5, $6);
2195    
2196     if (defined $7) { ## [+-]hh:mm
2197     if (length $7 != 2 or length $8 != 2) {
2198     $self->{onerror}->(node => $input_node,
2199     type => 'dateortime:syntax error');
2200     }
2201    
2202     $self->{onerror}->(node => $input_node,
2203     type => 'datetime:bad timezone hour')
2204     if $7 > 23;
2205     $self->{onerror}->(node => $input_node,
2206     type => 'datetime:bad timezone minute')
2207     if $8 > 59;
2208     }
2209     } else { ## hh:mm
2210     if (length $1 != 2 or length $9 != 2) {
2211     $self->{onerror}->(node => $input_node,
2212     type => qq'dateortime:syntax error');
2213     }
2214    
2215     ($hour, $minute, $second) = ($1, $9, $10);
2216     }
2217    
2218     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2219     if $hour > 23;
2220     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2221     if $minute > 59;
2222    
2223     if (defined $second) { ## s
2224     ## NOTE: Integer part of second don't have to have length of two.
2225    
2226     if (substr ($second, 0, 1) eq '.') {
2227     $self->{onerror}->(node => $input_node,
2228     type => 'dateortime:syntax error');
2229     }
2230    
2231     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2232     if $second >= 60;
2233     }
2234     } else {
2235     $self->{onerror}->(node => $input_node,
2236     type => 'dateortime:syntax error');
2237     }
2238    
2239 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2240 wakaba 1.1 },
2241     };
2242    
2243     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2244 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2245 wakaba 1.40 %HTMLPhrasingContentChecker,
2246     check_attrs => $GetHTMLAttrsChecker->({
2247 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2248     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2249     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2250     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2251     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2252     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2253 wakaba 1.50 }, {
2254     %HTMLAttrStatus,
2255     high => FEATURE_HTML5_DEFAULT,
2256     low => FEATURE_HTML5_DEFAULT,
2257     max => FEATURE_HTML5_DEFAULT,
2258     min => FEATURE_HTML5_DEFAULT,
2259     optimum => FEATURE_HTML5_DEFAULT,
2260     value => FEATURE_HTML5_DEFAULT,
2261 wakaba 1.1 }),
2262     };
2263    
2264     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2265 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2266 wakaba 1.40 %HTMLPhrasingContentChecker,
2267     check_attrs => $GetHTMLAttrsChecker->({
2268 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2269     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2270 wakaba 1.50 }, {
2271     %HTMLAttrStatus,
2272     max => FEATURE_HTML5_DEFAULT,
2273     value => FEATURE_HTML5_DEFAULT,
2274 wakaba 1.1 }),
2275     };
2276    
2277     $Element->{$HTML_NS}->{code} = {
2278 wakaba 1.40 %HTMLPhrasingContentChecker,
2279 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2280     check_attrs => $GetHTMLAttrsChecker->({}, {
2281     %HTMLAttrStatus,
2282     %HTMLM12NCommonAttrStatus,
2283 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2284 wakaba 1.49 }),
2285 wakaba 1.1 };
2286    
2287     $Element->{$HTML_NS}->{var} = {
2288 wakaba 1.40 %HTMLPhrasingContentChecker,
2289 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2290     check_attrs => $GetHTMLAttrsChecker->({}, {
2291     %HTMLAttrStatus,
2292     %HTMLM12NCommonAttrStatus,
2293 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2294 wakaba 1.49 }),
2295 wakaba 1.1 };
2296    
2297     $Element->{$HTML_NS}->{samp} = {
2298 wakaba 1.40 %HTMLPhrasingContentChecker,
2299 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2300     check_attrs => $GetHTMLAttrsChecker->({}, {
2301     %HTMLAttrStatus,
2302     %HTMLM12NCommonAttrStatus,
2303 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2304 wakaba 1.49 }),
2305 wakaba 1.1 };
2306    
2307     $Element->{$HTML_NS}->{kbd} = {
2308 wakaba 1.40 %HTMLPhrasingContentChecker,
2309 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2310     check_attrs => $GetHTMLAttrsChecker->({}, {
2311     %HTMLAttrStatus,
2312     %HTMLM12NCommonAttrStatus,
2313 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2314 wakaba 1.49 }),
2315 wakaba 1.1 };
2316    
2317     $Element->{$HTML_NS}->{sub} = {
2318 wakaba 1.40 %HTMLPhrasingContentChecker,
2319 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2320     check_attrs => $GetHTMLAttrsChecker->({}, {
2321     %HTMLAttrStatus,
2322     %HTMLM12NCommonAttrStatus,
2323 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2324 wakaba 1.49 }),
2325 wakaba 1.1 };
2326    
2327 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
2328 wakaba 1.1
2329     $Element->{$HTML_NS}->{span} = {
2330 wakaba 1.40 %HTMLPhrasingContentChecker,
2331 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2332     check_attrs => $GetHTMLAttrsChecker->({}, {
2333     %HTMLAttrStatus,
2334     %HTMLM12NCommonAttrStatus,
2335     datafld => FEATURE_HTML4_REC_RESERVED,
2336     dataformatas => FEATURE_HTML4_REC_RESERVED,
2337     datasrc => FEATURE_HTML4_REC_RESERVED,
2338 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2339 wakaba 1.49 }),
2340 wakaba 1.1 };
2341    
2342     $Element->{$HTML_NS}->{i} = {
2343 wakaba 1.40 %HTMLPhrasingContentChecker,
2344 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2345     check_attrs => $GetHTMLAttrsChecker->({}, {
2346     %HTMLAttrStatus,
2347     %HTMLM12NCommonAttrStatus,
2348 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2349 wakaba 1.49 }),
2350 wakaba 1.1 };
2351    
2352 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
2353    
2354     $Element->{$HTML_NS}->{tt} = $Element->{$HTML_NS}->{big};
2355    
2356     $Element->{$HTML_NS}->{s} = {
2357 wakaba 1.40 %HTMLPhrasingContentChecker,
2358 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
2359 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
2360     %HTMLAttrStatus,
2361     %HTMLM12NCommonAttrStatus,
2362 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2363 wakaba 1.49 }),
2364 wakaba 1.1 };
2365    
2366 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
2367    
2368     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
2369    
2370 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
2371 wakaba 1.40 %HTMLPhrasingContentChecker,
2372 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2373 wakaba 1.40 check_attrs => sub {
2374     my ($self, $item, $element_state) = @_;
2375 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
2376     %HTMLAttrStatus,
2377 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2378     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2379     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2380 wakaba 1.49 style => FEATURE_XHTML10_REC,
2381 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2382     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2383 wakaba 1.49 })->($self, $item, $element_state);
2384 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
2385     $self->{onerror}->(node => $item->{node},
2386     type => 'attribute missing:dir');
2387 wakaba 1.1 }
2388     },
2389     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2390     };
2391    
2392 wakaba 1.29 =pod
2393    
2394     ## TODO:
2395    
2396     +
2397     + <p>Partly because of the confusion described above, authors are
2398     + strongly recommended to always mark up all paragraphs with the
2399     + <code>p</code> element, and to not have any <code>ins</code> or
2400     + <code>del</code> elements that cross across any <span
2401     + title="paragraph">implied paragraphs</span>.</p>
2402     +
2403     (An informative note)
2404    
2405     <p><code>ins</code> elements should not cross <span
2406     + title="paragraph">implied paragraph</span> boundaries.</p>
2407     (normative)
2408    
2409     + <p><code>del</code> elements should not cross <span
2410     + title="paragraph">implied paragraph</span> boundaries.</p>
2411     (normative)
2412    
2413     =cut
2414    
2415 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2416 wakaba 1.40 %HTMLTransparentChecker,
2417 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2418 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2419 wakaba 1.1 cite => $HTMLURIAttrChecker,
2420     datetime => $HTMLDatetimeAttrChecker,
2421 wakaba 1.49 }, {
2422     %HTMLAttrStatus,
2423     %HTMLM12NCommonAttrStatus,
2424 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2425     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2426     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2427 wakaba 1.1 }),
2428     };
2429    
2430     $Element->{$HTML_NS}->{del} = {
2431 wakaba 1.40 %HTMLTransparentChecker,
2432 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2433 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2434 wakaba 1.1 cite => $HTMLURIAttrChecker,
2435     datetime => $HTMLDatetimeAttrChecker,
2436 wakaba 1.49 }, {
2437     %HTMLAttrStatus,
2438     %HTMLM12NCommonAttrStatus,
2439 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2440     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2441     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2442 wakaba 1.1 }),
2443 wakaba 1.40 check_end => sub {
2444     my ($self, $item, $element_state) = @_;
2445     if ($element_state->{has_significant}) {
2446     ## NOTE: Significantness flag does not propagate.
2447     } elsif ($item->{transparent}) {
2448     #
2449     } else {
2450     $self->{onerror}->(node => $item->{node},
2451     level => $self->{should_level},
2452     type => 'no significant content');
2453     }
2454 wakaba 1.1 },
2455     };
2456    
2457 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
2458 wakaba 1.40 %HTMLProseContentChecker,
2459 wakaba 1.48 status => FEATURE_HTML5_FD,
2460 wakaba 1.53 ## NOTE: legend, Prose | Prose, legend?
2461 wakaba 1.41 check_child_element => sub {
2462     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2463     $child_is_transparent, $element_state) = @_;
2464     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2465     $self->{onerror}->(node => $child_el,
2466     type => 'element not allowed:minus',
2467     level => $self->{must_level});
2468     $element_state->{has_non_legend} = 1;
2469     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2470     #
2471     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
2472     if ($element_state->{has_legend_at_first}) {
2473     $self->{onerror}->(node => $child_el,
2474     type => 'element not allowed:figure legend',
2475     level => $self->{must_level});
2476     } elsif ($element_state->{has_legend}) {
2477     $self->{onerror}->(node => $element_state->{has_legend},
2478     type => 'element not allowed:figure legend',
2479     level => $self->{must_level});
2480     $element_state->{has_legend} = $child_el;
2481     } elsif ($element_state->{has_non_legend}) {
2482     $element_state->{has_legend} = $child_el;
2483     } else {
2484     $element_state->{has_legend_at_first} = 1;
2485 wakaba 1.35 }
2486 wakaba 1.41 delete $element_state->{has_non_legend};
2487     } else {
2488     $HTMLProseContentChecker{check_child_element}->(@_);
2489 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
2490 wakaba 1.41 }
2491     },
2492     check_child_text => sub {
2493     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2494     if ($has_significant) {
2495     $element_state->{has_non_legend} = 1;
2496 wakaba 1.35 }
2497 wakaba 1.41 },
2498     check_end => sub {
2499     my ($self, $item, $element_state) = @_;
2500 wakaba 1.35
2501 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2502     #
2503     } elsif ($element_state->{has_legend}) {
2504     if ($element_state->{has_non_legend}) {
2505     $self->{onerror}->(node => $element_state->{has_legend},
2506 wakaba 1.35 type => 'element not allowed:figure legend',
2507     level => $self->{must_level});
2508     }
2509     }
2510 wakaba 1.41
2511     $HTMLProseContentChecker{check_end}->(@_);
2512     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2513 wakaba 1.35 },
2514     };
2515 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2516 wakaba 1.1
2517     $Element->{$HTML_NS}->{img} = {
2518 wakaba 1.40 %HTMLEmptyChecker,
2519 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2520 wakaba 1.40 check_attrs => sub {
2521     my ($self, $item, $element_state) = @_;
2522 wakaba 1.1 $GetHTMLAttrsChecker->({
2523     alt => sub { }, ## NOTE: No syntactical requirement
2524     src => $HTMLURIAttrChecker,
2525     usemap => $HTMLUsemapAttrChecker,
2526     ismap => sub {
2527 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2528     if (not $self->{flag}->{in_a_href}) {
2529 wakaba 1.15 $self->{onerror}->(node => $attr,
2530     type => 'attribute not allowed:ismap');
2531 wakaba 1.1 }
2532 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2533 wakaba 1.1 },
2534     ## TODO: height
2535     ## TODO: width
2536 wakaba 1.49 }, {
2537     %HTMLAttrStatus,
2538     %HTMLM12NCommonAttrStatus,
2539     align => FEATURE_M12N10_REC_DEPRECATED,
2540 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2541 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
2542 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2543 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
2544 wakaba 1.50 ismap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2545     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2546 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2547     name => FEATURE_M12N10_REC_DEPRECATED,
2548 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2549     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2550 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
2551 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2552 wakaba 1.40 })->($self, $item);
2553     unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2554     $self->{onerror}->(node => $item->{node},
2555 wakaba 1.37 type => 'attribute missing:alt',
2556     level => $self->{should_level});
2557 wakaba 1.1 }
2558 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2559     $self->{onerror}->(node => $item->{node},
2560     type => 'attribute missing:src');
2561 wakaba 1.1 }
2562     },
2563     };
2564    
2565     $Element->{$HTML_NS}->{iframe} = {
2566 wakaba 1.40 %HTMLTextChecker,
2567 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2568     ## NOTE: Not part of M12N10 Strict
2569 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2570 wakaba 1.1 src => $HTMLURIAttrChecker,
2571 wakaba 1.49 }, {
2572     %HTMLAttrStatus,
2573     %HTMLM12NCommonAttrStatus,
2574     align => FEATURE_XHTML10_REC,
2575 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2576 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
2577     height => FEATURE_M12N10_REC,
2578 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2579 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2580     marginheight => FEATURE_M12N10_REC,
2581     marginwidth => FEATURE_M12N10_REC,
2582     name => FEATURE_M12N10_REC_DEPRECATED,
2583     scrolling => FEATURE_M12N10_REC,
2584 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2585     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2586 wakaba 1.49 width => FEATURE_M12N10_REC,
2587 wakaba 1.1 }),
2588 wakaba 1.40 };
2589    
2590 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2591 wakaba 1.40 %HTMLEmptyChecker,
2592 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2593 wakaba 1.40 check_attrs => sub {
2594     my ($self, $item, $element_state) = @_;
2595 wakaba 1.1 my $has_src;
2596 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2597 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2598     $attr_ns = '' unless defined $attr_ns;
2599     my $attr_ln = $attr->manakai_local_name;
2600     my $checker;
2601     if ($attr_ns eq '') {
2602     if ($attr_ln eq 'src') {
2603     $checker = $HTMLURIAttrChecker;
2604     $has_src = 1;
2605     } elsif ($attr_ln eq 'type') {
2606     $checker = $HTMLIMTAttrChecker;
2607     } else {
2608     ## TODO: height
2609     ## TODO: width
2610     $checker = $HTMLAttrChecker->{$attr_ln}
2611     || sub { }; ## NOTE: Any local attribute is ok.
2612     }
2613     }
2614     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2615     || $AttrChecker->{$attr_ns}->{''};
2616     if ($checker) {
2617     $checker->($self, $attr);
2618 wakaba 1.50 } elsif ($attr_ns eq '') {
2619 wakaba 1.54 #
2620 wakaba 1.1 } else {
2621     $self->{onerror}->(node => $attr, level => 'unsupported',
2622     type => 'attribute');
2623 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
2624     }
2625    
2626     if ($attr_ns eq '') {
2627     my $status = {
2628     %HTMLAttrStatus,
2629     height => FEATURE_HTML5_DEFAULT,
2630     src => FEATURE_HTML5_DEFAULT,
2631     type => FEATURE_HTML5_DEFAULT,
2632     width => FEATURE_HTML5_DEFAULT,
2633     }->{$attr_ln};
2634     $self->_attr_status_info ($attr, $status) if $status;
2635 wakaba 1.1 }
2636     }
2637    
2638     unless ($has_src) {
2639 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2640 wakaba 1.1 type => 'attribute missing:src');
2641     }
2642     },
2643     };
2644    
2645 wakaba 1.49 ## TODO:
2646     ## {applet} FEATURE_M12N10_REC_DEPRECATED
2647     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
2648    
2649 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
2650 wakaba 1.40 %HTMLTransparentChecker,
2651 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2652 wakaba 1.40 check_attrs => sub {
2653     my ($self, $item, $element_state) = @_;
2654 wakaba 1.1 $GetHTMLAttrsChecker->({
2655     data => $HTMLURIAttrChecker,
2656     type => $HTMLIMTAttrChecker,
2657     usemap => $HTMLUsemapAttrChecker,
2658     ## TODO: width
2659     ## TODO: height
2660 wakaba 1.49 }, {
2661     %HTMLAttrStatus,
2662     %HTMLM12NCommonAttrStatus,
2663     align => FEATURE_XHTML10_REC,
2664     archive => FEATURE_M12N10_REC,
2665     border => FEATURE_XHTML10_REC,
2666     classid => FEATURE_M12N10_REC,
2667     codebase => FEATURE_M12N10_REC,
2668     codetype => FEATURE_M12N10_REC,
2669 wakaba 1.50 data => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2670 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
2671     dataformatas => FEATURE_HTML4_REC_RESERVED,
2672     datasrc => FEATURE_HTML4_REC_RESERVED,
2673     declare => FEATURE_M12N10_REC,
2674 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2675 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
2676 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2677 wakaba 1.49 name => FEATURE_M12N10_REC,
2678     standby => FEATURE_M12N10_REC,
2679 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2680     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2681     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2682 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
2683 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2684 wakaba 1.40 })->($self, $item);
2685     unless ($item->{node}->has_attribute_ns (undef, 'data')) {
2686     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
2687     $self->{onerror}->(node => $item->{node},
2688 wakaba 1.1 type => 'attribute missing:data|type');
2689     }
2690     }
2691     },
2692 wakaba 1.41 ## NOTE: param*, transparent (Prose)
2693     check_child_element => sub {
2694     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2695     $child_is_transparent, $element_state) = @_;
2696     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2697     $self->{onerror}->(node => $child_el,
2698     type => 'element not allowed:minus',
2699     level => $self->{must_level});
2700     $element_state->{has_non_legend} = 1;
2701     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2702     #
2703     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
2704     if ($element_state->{has_non_param}) {
2705     $self->{onerror}->(node => $child_el,
2706     type => 'element not allowed:prose',
2707     level => $self->{must_level});
2708 wakaba 1.39 }
2709 wakaba 1.41 } else {
2710     $HTMLProseContentChecker{check_child_element}->(@_);
2711     $element_state->{has_non_param} = 1;
2712 wakaba 1.39 }
2713 wakaba 1.25 },
2714 wakaba 1.41 check_child_text => sub {
2715     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2716     if ($has_significant) {
2717     $element_state->{has_non_param} = 1;
2718     }
2719 wakaba 1.42 },
2720     check_end => sub {
2721     my ($self, $item, $element_state) = @_;
2722     if ($element_state->{has_significant}) {
2723 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
2724 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
2725     ## NOTE: Transparent.
2726     } else {
2727     $self->{onerror}->(node => $item->{node},
2728     level => $self->{should_level},
2729     type => 'no significant content');
2730     }
2731     },
2732 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2733 wakaba 1.1 };
2734 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
2735     ## What about |<section><object data><style scoped></style>x</object></section>|?
2736     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
2737 wakaba 1.1
2738     $Element->{$HTML_NS}->{param} = {
2739 wakaba 1.40 %HTMLEmptyChecker,
2740 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2741 wakaba 1.40 check_attrs => sub {
2742     my ($self, $item, $element_state) = @_;
2743 wakaba 1.1 $GetHTMLAttrsChecker->({
2744     name => sub { },
2745     value => sub { },
2746 wakaba 1.49 }, {
2747     %HTMLAttrStatus,
2748 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2749     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2750 wakaba 1.49 type => FEATURE_M12N10_REC,
2751 wakaba 1.50 value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2752 wakaba 1.49 valuetype => FEATURE_M12N10_REC,
2753 wakaba 1.40 })->($self, $item);
2754     unless ($item->{node}->has_attribute_ns (undef, 'name')) {
2755     $self->{onerror}->(node => $item->{node},
2756 wakaba 1.1 type => 'attribute missing:name');
2757     }
2758 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
2759     $self->{onerror}->(node => $item->{node},
2760 wakaba 1.1 type => 'attribute missing:value');
2761     }
2762     },
2763     };
2764    
2765     $Element->{$HTML_NS}->{video} = {
2766 wakaba 1.40 %HTMLTransparentChecker,
2767 wakaba 1.48 status => FEATURE_HTML5_LC,
2768 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2769 wakaba 1.1 src => $HTMLURIAttrChecker,
2770     ## TODO: start, loopstart, loopend, end
2771     ## ISSUE: they MUST be "value time offset"s. Value?
2772 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2773 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2774     controls => $GetHTMLBooleanAttrChecker->('controls'),
2775 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2776 wakaba 1.42 ## TODO: width, height
2777 wakaba 1.50 }, {
2778     %HTMLAttrStatus,
2779     autoplay => FEATURE_HTML5_LC,
2780     controls => FEATURE_HTML5_LC,
2781     end => FEATURE_HTML5_LC,
2782     height => FEATURE_HTML5_LC,
2783     loopend => FEATURE_HTML5_LC,
2784     loopstart => FEATURE_HTML5_LC,
2785     playcount => FEATURE_HTML5_LC,
2786     poster => FEATURE_HTML5_LC,
2787     src => FEATURE_HTML5_LC,
2788     start => FEATURE_HTML5_LC,
2789     width => FEATURE_HTML5_LC,
2790 wakaba 1.1 }),
2791 wakaba 1.42 check_start => sub {
2792     my ($self, $item, $element_state) = @_;
2793     $element_state->{allow_source}
2794     = not $item->{node}->has_attribute_ns (undef, 'src');
2795     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
2796     ## NOTE: It might be set true by |check_element|.
2797     },
2798     check_child_element => sub {
2799     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2800     $child_is_transparent, $element_state) = @_;
2801     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2802     $self->{onerror}->(node => $child_el,
2803     type => 'element not allowed:minus',
2804     level => $self->{must_level});
2805     delete $element_state->{allow_source};
2806     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2807     #
2808     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
2809 wakaba 1.45 unless ($element_state->{allow_source}) {
2810 wakaba 1.42 $self->{onerror}->(node => $child_el,
2811     type => 'element not allowed:prose',
2812     level => $self->{must_level});
2813     }
2814 wakaba 1.45 $element_state->{has_source} = 1;
2815 wakaba 1.1 } else {
2816 wakaba 1.42 delete $element_state->{allow_source};
2817     $HTMLProseContentChecker{check_child_element}->(@_);
2818     }
2819     },
2820     check_child_text => sub {
2821     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2822     if ($has_significant) {
2823     delete $element_state->{allow_source};
2824     }
2825     $HTMLProseContentChecker{check_child_text}->(@_);
2826     },
2827     check_end => sub {
2828     my ($self, $item, $element_state) = @_;
2829     if ($element_state->{has_source} == -1) {
2830     $self->{onerror}->(node => $item->{node},
2831     type => 'element missing:source',
2832     level => $self->{must_level});
2833 wakaba 1.1 }
2834 wakaba 1.42
2835     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
2836 wakaba 1.1 },
2837     };
2838    
2839     $Element->{$HTML_NS}->{audio} = {
2840 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
2841 wakaba 1.48 status => FEATURE_HTML5_LC,
2842 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
2843     src => $HTMLURIAttrChecker,
2844     ## TODO: start, loopstart, loopend, end
2845     ## ISSUE: they MUST be "value time offset"s. Value?
2846     ## ISSUE: playcount has no conformance creteria
2847     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2848     controls => $GetHTMLBooleanAttrChecker->('controls'),
2849 wakaba 1.50 }, {
2850     %HTMLAttrStatus,
2851     autoplay => FEATURE_HTML5_LC,
2852     controls => FEATURE_HTML5_LC,
2853     end => FEATURE_HTML5_LC,
2854     loopend => FEATURE_HTML5_LC,
2855     loopstart => FEATURE_HTML5_LC,
2856     playcount => FEATURE_HTML5_LC,
2857     src => FEATURE_HTML5_LC,
2858     start => FEATURE_HTML5_LC,
2859 wakaba 1.42 }),
2860 wakaba 1.1 };
2861    
2862     $Element->{$HTML_NS}->{source} = {
2863 wakaba 1.40 %HTMLEmptyChecker,
2864 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2865 wakaba 1.40 check_attrs => sub {
2866     my ($self, $item, $element_state) = @_;
2867 wakaba 1.1 $GetHTMLAttrsChecker->({
2868     src => $HTMLURIAttrChecker,
2869     type => $HTMLIMTAttrChecker,
2870     media => $HTMLMQAttrChecker,
2871 wakaba 1.50 }, {
2872     %HTMLAttrStatus,
2873     media => FEATURE_HTML5_DEFAULT,
2874     src => FEATURE_HTML5_DEFAULT,
2875     type => FEATURE_HTML5_DEFAULT,
2876 wakaba 1.40 })->($self, $item, $element_state);
2877     unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2878     $self->{onerror}->(node => $item->{node},
2879 wakaba 1.1 type => 'attribute missing:src');
2880     }
2881     },
2882     };
2883    
2884     $Element->{$HTML_NS}->{canvas} = {
2885 wakaba 1.40 %HTMLTransparentChecker,
2886 wakaba 1.48 status => FEATURE_HTML5_LC,
2887 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2888 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2889     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2890 wakaba 1.50 }, {
2891     %HTMLAttrStatus,
2892     height => FEATURE_HTML5_LC,
2893     width => FEATURE_HTML5_LC,
2894 wakaba 1.1 }),
2895     };
2896    
2897     $Element->{$HTML_NS}->{map} = {
2898 wakaba 1.40 %HTMLProseContentChecker,
2899 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2900 wakaba 1.40 check_attrs => sub {
2901     my ($self, $item, $element_state) = @_;
2902 wakaba 1.4 my $has_id;
2903     $GetHTMLAttrsChecker->({
2904     id => sub {
2905     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2906     my ($self, $attr) = @_;
2907     my $value = $attr->value;
2908     if (length $value > 0) {
2909     if ($self->{id}->{$value}) {
2910     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2911     push @{$self->{id}->{$value}}, $attr;
2912     } else {
2913     $self->{id}->{$value} = [$attr];
2914     }
2915 wakaba 1.1 } else {
2916 wakaba 1.4 ## NOTE: MUST contain at least one character
2917     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2918 wakaba 1.1 }
2919 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2920     $self->{onerror}->(node => $attr, type => 'space in ID');
2921     }
2922     $self->{map}->{$value} ||= $attr;
2923     $has_id = 1;
2924     },
2925 wakaba 1.49 }, {
2926     %HTMLAttrStatus,
2927 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2928     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2929     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2930     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2931 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
2932 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2933     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2934     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2935     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2936     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2937     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2938     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2939     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2940     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2941     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2942     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2943 wakaba 1.40 })->($self, $item, $element_state);
2944     $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
2945 wakaba 1.4 unless $has_id;
2946     },
2947 wakaba 1.1 };
2948    
2949     $Element->{$HTML_NS}->{area} = {
2950 wakaba 1.40 %HTMLEmptyChecker,
2951 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2952 wakaba 1.40 check_attrs => sub {
2953     my ($self, $item, $element_state) = @_;
2954 wakaba 1.1 my %attr;
2955     my $coords;
2956 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2957 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2958     $attr_ns = '' unless defined $attr_ns;
2959     my $attr_ln = $attr->manakai_local_name;
2960     my $checker;
2961     if ($attr_ns eq '') {
2962     $checker = {
2963     alt => sub { },
2964     ## NOTE: |alt| value has no conformance creteria.
2965     shape => $GetHTMLEnumeratedAttrChecker->({
2966     circ => -1, circle => 1,
2967     default => 1,
2968     poly => 1, polygon => -1,
2969     rect => 1, rectangle => -1,
2970     }),
2971     coords => sub {
2972     my ($self, $attr) = @_;
2973     my $value = $attr->value;
2974     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2975     $coords = [split /,/, $value];
2976     } else {
2977     $self->{onerror}->(node => $attr,
2978     type => 'coords:syntax error');
2979     }
2980     },
2981     target => $HTMLTargetAttrChecker,
2982     href => $HTMLURIAttrChecker,
2983     ping => $HTMLSpaceURIsAttrChecker,
2984 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
2985 wakaba 1.1 media => $HTMLMQAttrChecker,
2986     hreflang => $HTMLLanguageTagAttrChecker,
2987     type => $HTMLIMTAttrChecker,
2988     }->{$attr_ln};
2989     if ($checker) {
2990     $attr{$attr_ln} = $attr;
2991     } else {
2992     $checker = $HTMLAttrChecker->{$attr_ln};
2993     }
2994     }
2995     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2996     || $AttrChecker->{$attr_ns}->{''};
2997     if ($checker) {
2998     $checker->($self, $attr) if ref $checker;
2999 wakaba 1.49 } elsif ($attr_ns eq '') {
3000 wakaba 1.54 #
3001 wakaba 1.1 } else {
3002     $self->{onerror}->(node => $attr, level => 'unsupported',
3003     type => 'attribute');
3004     ## ISSUE: No comformance createria for unknown attributes in the spec
3005     }
3006 wakaba 1.49
3007     if ($attr_ns eq '') {
3008     $self->_attr_status_info ($attr, {
3009     %HTMLAttrStatus,
3010     %HTMLM12NCommonAttrStatus,
3011     accesskey => FEATURE_M12N10_REC,
3012 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3013     coords => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3014     href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3015 wakaba 1.54 hreflang => FEATURE_HTML5_DEFAULT,
3016 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3017     media => FEATURE_HTML5_DEFAULT,
3018 wakaba 1.49 nohref => FEATURE_M12N10_REC,
3019 wakaba 1.50 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3020     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3021     ping => FEATURE_HTML5_DEFAULT,
3022     rel => FEATURE_HTML5_DEFAULT,
3023     shape => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3024     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3025     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3026     type => FEATURE_HTML5_DEFAULT,
3027 wakaba 1.49 }->{$attr_ln});
3028     }
3029 wakaba 1.1 }
3030    
3031     if (defined $attr{href}) {
3032 wakaba 1.4 $self->{has_hyperlink_element} = 1;
3033 wakaba 1.1 unless (defined $attr{alt}) {
3034 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3035 wakaba 1.1 type => 'attribute missing:alt');
3036     }
3037     } else {
3038     for (qw/target ping rel media hreflang type alt/) {
3039     if (defined $attr{$_}) {
3040     $self->{onerror}->(node => $attr{$_},
3041     type => 'attribute not allowed');
3042     }
3043     }
3044     }
3045    
3046     my $shape = 'rectangle';
3047     if (defined $attr{shape}) {
3048     $shape = {
3049     circ => 'circle', circle => 'circle',
3050     default => 'default',
3051     poly => 'polygon', polygon => 'polygon',
3052     rect => 'rectangle', rectangle => 'rectangle',
3053     }->{lc $attr{shape}->value} || 'rectangle';
3054     ## TODO: ASCII lowercase?
3055     }
3056    
3057     if ($shape eq 'circle') {
3058     if (defined $attr{coords}) {
3059     if (defined $coords) {
3060     if (@$coords == 3) {
3061     if ($coords->[2] < 0) {
3062     $self->{onerror}->(node => $attr{coords},
3063     type => 'coords:out of range:2');
3064     }
3065     } else {
3066     $self->{onerror}->(node => $attr{coords},
3067     type => 'coords:number:3:'.@$coords);
3068     }
3069     } else {
3070     ## NOTE: A syntax error has been reported.
3071     }
3072     } else {
3073 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3074 wakaba 1.1 type => 'attribute missing:coords');
3075     }
3076     } elsif ($shape eq 'default') {
3077     if (defined $attr{coords}) {
3078     $self->{onerror}->(node => $attr{coords},
3079     type => 'attribute not allowed');
3080     }
3081     } elsif ($shape eq 'polygon') {
3082     if (defined $attr{coords}) {
3083     if (defined $coords) {
3084     if (@$coords >= 6) {
3085     unless (@$coords % 2 == 0) {
3086     $self->{onerror}->(node => $attr{coords},
3087     type => 'coords:number:even:'.@$coords);
3088     }
3089     } else {
3090     $self->{onerror}->(node => $attr{coords},
3091     type => 'coords:number:>=6:'.@$coords);
3092     }
3093     } else {
3094     ## NOTE: A syntax error has been reported.
3095     }
3096     } else {
3097 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3098 wakaba 1.1 type => 'attribute missing:coords');
3099     }
3100     } elsif ($shape eq 'rectangle') {
3101     if (defined $attr{coords}) {
3102     if (defined $coords) {
3103     if (@$coords == 4) {
3104     unless ($coords->[0] < $coords->[2]) {
3105     $self->{onerror}->(node => $attr{coords},
3106     type => 'coords:out of range:0');
3107     }
3108     unless ($coords->[1] < $coords->[3]) {
3109     $self->{onerror}->(node => $attr{coords},
3110     type => 'coords:out of range:1');
3111     }
3112     } else {
3113     $self->{onerror}->(node => $attr{coords},
3114     type => 'coords:number:4:'.@$coords);
3115     }
3116     } else {
3117     ## NOTE: A syntax error has been reported.
3118     }
3119     } else {
3120 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3121 wakaba 1.1 type => 'attribute missing:coords');
3122     }
3123     }
3124     },
3125     };
3126     ## TODO: only in map
3127    
3128     $Element->{$HTML_NS}->{table} = {
3129 wakaba 1.40 %HTMLChecker,
3130 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3131     check_attrs => $GetHTMLAttrsChecker->({}, {
3132     %HTMLAttrStatus,
3133     %HTMLM12NCommonAttrStatus,
3134     align => FEATURE_M12N10_REC_DEPRECATED,
3135     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3136     border => FEATURE_M12N10_REC,
3137     cellpadding => FEATURE_M12N10_REC,
3138     cellspacing => FEATURE_M12N10_REC,
3139     datafld => FEATURE_HTML4_REC_RESERVED,
3140     dataformatas => FEATURE_HTML4_REC_RESERVED,
3141     datapagesize => FEATURE_M12N10_REC,
3142     datasrc => FEATURE_HTML4_REC_RESERVED,
3143     frame => FEATURE_M12N10_REC,
3144 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3145 wakaba 1.49 rules => FEATURE_M12N10_REC,
3146     summary => FEATURE_M12N10_REC,
3147     width => FEATURE_M12N10_REC,
3148     }),
3149 wakaba 1.40 check_start => sub {
3150     my ($self, $item, $element_state) = @_;
3151     $element_state->{phase} = 'before caption';
3152     },
3153     check_child_element => sub {
3154     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3155     $child_is_transparent, $element_state) = @_;
3156     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3157     $self->{onerror}->(node => $child_el,
3158     type => 'element not allowed:minus',
3159     level => $self->{must_level});
3160     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3161     #
3162     } elsif ($element_state->{phase} eq 'in tbodys') {
3163     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3164     #$element_state->{phase} = 'in tbodys';
3165     } elsif (not $element_state->{has_tfoot} and
3166     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3167     $element_state->{phase} = 'after tfoot';
3168     $element_state->{has_tfoot} = 1;
3169     } else {
3170     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3171     }
3172     } elsif ($element_state->{phase} eq 'in trs') {
3173     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3174     #$element_state->{phase} = 'in trs';
3175     } elsif (not $element_state->{has_tfoot} and
3176     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3177     $element_state->{phase} = 'after tfoot';
3178     $element_state->{has_tfoot} = 1;
3179     } else {
3180     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3181     }
3182     } elsif ($element_state->{phase} eq 'after thead') {
3183     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3184     $element_state->{phase} = 'in tbodys';
3185     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3186     $element_state->{phase} = 'in trs';
3187     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3188     $element_state->{phase} = 'in tbodys';
3189     $element_state->{has_tfoot} = 1;
3190     } else {
3191     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3192     }
3193     } elsif ($element_state->{phase} eq 'in colgroup') {
3194     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3195     $element_state->{phase} = 'in colgroup';
3196     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3197     $element_state->{phase} = 'after thead';
3198     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3199     $element_state->{phase} = 'in tbodys';
3200     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3201     $element_state->{phase} = 'in trs';
3202     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3203     $element_state->{phase} = 'in tbodys';
3204     $element_state->{has_tfoot} = 1;
3205     } else {
3206     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3207     }
3208     } elsif ($element_state->{phase} eq 'before caption') {
3209     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
3210     $element_state->{phase} = 'in colgroup';
3211     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3212     $element_state->{phase} = 'in colgroup';
3213     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3214     $element_state->{phase} = 'after thead';
3215     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3216     $element_state->{phase} = 'in tbodys';
3217     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3218     $element_state->{phase} = 'in trs';
3219     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3220     $element_state->{phase} = 'in tbodys';
3221     $element_state->{has_tfoot} = 1;
3222     } else {
3223     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3224     }
3225     } elsif ($element_state->{phase} eq 'after tfoot') {
3226     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3227     } else {
3228     die "check_child_element: Bad |table| phase: $element_state->{phase}";
3229     }
3230     },
3231     check_child_text => sub {
3232     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3233     if ($has_significant) {
3234     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3235 wakaba 1.1 }
3236 wakaba 1.40 },
3237     check_end => sub {
3238     my ($self, $item, $element_state) = @_;
3239 wakaba 1.1
3240     ## Table model errors
3241     require Whatpm::HTMLTable;
3242 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
3243 wakaba 1.1 my %opt = @_;
3244     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
3245     });
3246 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
3247 wakaba 1.1
3248 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3249 wakaba 1.1 },
3250     };
3251    
3252     $Element->{$HTML_NS}->{caption} = {
3253 wakaba 1.40 %HTMLPhrasingContentChecker,
3254 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3255     check_attrs => $GetHTMLAttrsChecker->({}, {
3256     %HTMLAttrStatus,
3257     %HTMLM12NCommonAttrStatus,
3258     align => FEATURE_M12N10_REC_DEPRECATED,
3259 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3260 wakaba 1.49 }),
3261 wakaba 1.1 };
3262    
3263     $Element->{$HTML_NS}->{colgroup} = {
3264 wakaba 1.40 %HTMLEmptyChecker,
3265 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3266 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3267 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3268     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
3269     ## TODO: "attribute not supported" if |col|.
3270     ## ISSUE: MUST NOT if any |col|?
3271     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
3272 wakaba 1.49 }, {
3273     %HTMLAttrStatus,
3274     %HTMLM12NCommonAttrStatus,
3275     align => FEATURE_M12N10_REC,
3276     char => FEATURE_M12N10_REC,
3277     charoff => FEATURE_M12N10_REC,
3278 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3279     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3280 wakaba 1.49 valign => FEATURE_M12N10_REC,
3281     width => FEATURE_M12N10_REC,
3282 wakaba 1.1 }),
3283 wakaba 1.40 check_child_element => sub {
3284     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3285     $child_is_transparent, $element_state) = @_;
3286     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3287     $self->{onerror}->(node => $child_el,
3288     type => 'element not allowed:minus',
3289     level => $self->{must_level});
3290     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3291     #
3292     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
3293     #
3294     } else {
3295     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3296     }
3297     },
3298     check_child_text => sub {
3299     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3300     if ($has_significant) {
3301     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3302 wakaba 1.1 }
3303     },
3304     };
3305    
3306     $Element->{$HTML_NS}->{col} = {
3307 wakaba 1.40 %HTMLEmptyChecker,
3308 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3309 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3310 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3311 wakaba 1.49 }, {
3312     %HTMLAttrStatus,
3313     %HTMLM12NCommonAttrStatus,
3314     align => FEATURE_M12N10_REC,
3315     char => FEATURE_M12N10_REC,
3316     charoff => FEATURE_M12N10_REC,
3317 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3318     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3319 wakaba 1.49 valign => FEATURE_M12N10_REC,
3320     width => FEATURE_M12N10_REC,
3321 wakaba 1.1 }),
3322     };
3323    
3324     $Element->{$HTML_NS}->{tbody} = {
3325 wakaba 1.40 %HTMLChecker,
3326 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3327     check_attrs => $GetHTMLAttrsChecker->({}, {
3328     %HTMLAttrStatus,
3329     %HTMLM12NCommonAttrStatus,
3330     align => FEATURE_M12N10_REC,
3331     char => FEATURE_M12N10_REC,
3332     charoff => FEATURE_M12N10_REC,
3333 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3334 wakaba 1.49 valign => FEATURE_M12N10_REC,
3335     }),
3336 wakaba 1.40 check_child_element => sub {
3337     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3338     $child_is_transparent, $element_state) = @_;
3339     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3340     $self->{onerror}->(node => $child_el,
3341     type => 'element not allowed:minus',
3342     level => $self->{must_level});
3343     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3344     #
3345     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3346     $element_state->{has_tr} = 1;
3347     } else {
3348     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3349     }
3350     },
3351     check_child_text => sub {
3352     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3353     if ($has_significant) {
3354     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3355 wakaba 1.1 }
3356 wakaba 1.40 },
3357     check_end => sub {
3358     my ($self, $item, $element_state) = @_;
3359     unless ($element_state->{has_tr}) {
3360     $self->{onerror}->(node => $item->{node},
3361     type => 'child element missing:tr');
3362 wakaba 1.1 }
3363 wakaba 1.40
3364     $HTMLChecker{check_end}->(@_);
3365 wakaba 1.1 },
3366     };
3367    
3368     $Element->{$HTML_NS}->{thead} = {
3369 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3370 wakaba 1.1 };
3371    
3372     $Element->{$HTML_NS}->{tfoot} = {
3373 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3374 wakaba 1.1 };
3375    
3376     $Element->{$HTML_NS}->{tr} = {
3377 wakaba 1.40 %HTMLChecker,
3378 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3379     check_attrs => $GetHTMLAttrsChecker->({}, {
3380     %HTMLAttrStatus,
3381     %HTMLM12NCommonAttrStatus,
3382     align => FEATURE_M12N10_REC,
3383     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3384     char => FEATURE_M12N10_REC,
3385     charoff => FEATURE_M12N10_REC,
3386 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3387 wakaba 1.49 valign => FEATURE_M12N10_REC,
3388     }),
3389 wakaba 1.40 check_child_element => sub {
3390     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3391     $child_is_transparent, $element_state) = @_;
3392     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3393     $self->{onerror}->(node => $child_el,
3394     type => 'element not allowed:minus',
3395     level => $self->{must_level});
3396     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3397     #
3398     } elsif ($child_nsuri eq $HTML_NS and
3399     ($child_ln eq 'td' or $child_ln eq 'th')) {
3400     $element_state->{has_cell} = 1;
3401     } else {
3402     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3403     }
3404     },
3405     check_child_text => sub {
3406     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3407     if ($has_significant) {
3408     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3409 wakaba 1.1 }
3410 wakaba 1.40 },
3411     check_end => sub {
3412     my ($self, $item, $element_state) = @_;
3413     unless ($element_state->{has_cell}) {
3414     $self->{onerror}->(node => $item->{node},
3415     type => 'child element missing:td|th');
3416 wakaba 1.1 }
3417 wakaba 1.40
3418     $HTMLChecker{check_end}->(@_);
3419 wakaba 1.1 },
3420     };
3421    
3422     $Element->{$HTML_NS}->{td} = {
3423 wakaba 1.40 %HTMLProseContentChecker,
3424 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3425 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3426 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3427     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3428 wakaba 1.49 }, {
3429     %HTMLAttrStatus,
3430     %HTMLM12NCommonAttrStatus,
3431     abbr => FEATURE_M12N10_REC,
3432     align => FEATURE_M12N10_REC,
3433     axis => FEATURE_M12N10_REC,
3434     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3435     char => FEATURE_M12N10_REC,
3436     charoff => FEATURE_M12N10_REC,
3437 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3438 wakaba 1.49 headers => FEATURE_M12N10_REC,
3439     height => FEATURE_M12N10_REC_DEPRECATED,
3440 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3441 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3442 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3443 wakaba 1.49 scope => FEATURE_M12N10_REC,
3444     valign => FEATURE_M12N10_REC,
3445     width => FEATURE_M12N10_REC_DEPRECATED,
3446 wakaba 1.1 }),
3447     };
3448    
3449     $Element->{$HTML_NS}->{th} = {
3450 wakaba 1.40 %HTMLPhrasingContentChecker,
3451 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3452 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3453 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3454     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3455     scope => $GetHTMLEnumeratedAttrChecker
3456     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3457 wakaba 1.49 }, {
3458     %HTMLAttrStatus,
3459     %HTMLM12NCommonAttrStatus,
3460     abbr => FEATURE_M12N10_REC,
3461     align => FEATURE_M12N10_REC,
3462     axis => FEATURE_M12N10_REC,
3463     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3464     char => FEATURE_M12N10_REC,
3465     charoff => FEATURE_M12N10_REC,
3466 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3467 wakaba 1.49 headers => FEATURE_M12N10_REC,
3468     height => FEATURE_M12N10_REC_DEPRECATED,
3469 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3470 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3471 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3472     scope => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3473 wakaba 1.49 valign => FEATURE_M12N10_REC,
3474     width => FEATURE_M12N10_REC_DEPRECATED,
3475 wakaba 1.1 }),
3476     };
3477    
3478 wakaba 1.52 my $AttrCheckerNotImplemented = sub {
3479     my ($self, $attr) = @_;
3480     $self->{onerror}->(node => $attr, level => 'unsupported',
3481     type => 'attribute');
3482     };
3483    
3484     $Element->{$HTML_NS}->{form} = {
3485 wakaba 1.56 %HTMLProseContentChecker, ## NOTE: Flow* [WF2]
3486     ## TODO: form in form is allowed in XML [WF2]
3487 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3488     check_attrs => $GetHTMLAttrsChecker->({
3489 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3490 wakaba 1.52 'accept-charset' => $AttrCheckerNotImplemented, ## TODO: Charsets
3491     action => $HTMLURIAttrChecker, ## TODO: "User agent behavior for a value other than HTTP URI is undefined" [HTML4]
3492 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3493     enctype => $HTMLIMTAttrChecker, ## TODO: "multipart/form-data" should be used when type=file is used [HTML4] ## TODO: MUST NOT parameter [WF2]
3494     method => $GetHTMLEnumeratedAttrChecker->({
3495     get => 1, post => 1, put => 1, delete => 1,
3496     }),
3497 wakaba 1.52 ## NOTE: "get" SHOULD be used for idempotent submittion,
3498     ## "post" SHOULD be used otherwise [HTML4]. This cannot be tested.
3499     name => sub { }, # CDATA in HTML4 ## TODO: must be same as |id| (informative!) [XHTML10]
3500 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
3501     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3502 wakaba 1.52 target => $HTMLTargetAttrChecker,
3503     ## TODO: Warn for combination whose behavior is not defined.
3504     }, {
3505     %HTMLAttrStatus,
3506     %HTMLM12NCommonAttrStatus,
3507 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3508 wakaba 1.52 'accept-charset' => FEATURE_M12N10_REC,
3509 wakaba 1.56 action => FEATURE_WF2 | FEATURE_M12N10_REC,
3510     data => FEATURE_WF2,
3511     enctype => FEATURE_WF2 | FEATURE_M12N10_REC,
3512 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3513 wakaba 1.56 method => FEATURE_WF2 | FEATURE_M12N10_REC,
3514 wakaba 1.52 name => FEATURE_M12N10_REC_DEPRECATED,
3515 wakaba 1.56 onreceived => FEATURE_WF2,
3516 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3517     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3518 wakaba 1.56 replace => FEATURE_WF2,
3519 wakaba 1.52 target => FEATURE_M12N10_REC,
3520     }),
3521     ## TODO: Tests
3522     ## TODO: Tests for <nest/> in <form>
3523     };
3524    
3525     $Element->{$HTML_NS}->{fieldset} = {
3526     %HTMLProseContentChecker, ## NOTE: legend, %Flow; ## TODO: legend
3527     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3528 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3529     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3530     ## TODO: form [WF2]
3531     }, {
3532 wakaba 1.52 %HTMLAttrStatus,
3533     %HTMLM12NCommonAttrStatus,
3534 wakaba 1.56 disabled => FEATURE_WF2,
3535     form => FEATURE_WF2,
3536 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3537     }),
3538     ## TODO: Tests
3539     ## TODO: Tests for <nest/> in <fieldset>
3540     };
3541    
3542     $Element->{$HTML_NS}->{input} = {
3543 wakaba 1.56 %HTMLEmptyChecker, ## MUST [WF2]
3544 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3545     check_attrs => $GetHTMLAttrsChecker->({
3546 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3547 wakaba 1.52 accesskey => $AttrCheckerNotImplemented, ## TODO: Character
3548     ## TODO: "Note. Authors should consider the input method of the expected reader when specifying an accesskey." [HTML4]
3549     ## "We recommend that authors include the access key in label text or wherever the access key is to apply." [HTML4]
3550 wakaba 1.56 action => $HTMLURIAttrChecker,
3551 wakaba 1.52 align => $GetHTMLEnumeratedAttrChecker->({
3552     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
3553     }),
3554     alt => sub {}, ## NOTE: Text [M12N] ## TODO: |alt| should be provided for |type=image| [HTML4]
3555     ## NOTE: HTML4 has a "should" for accessibility, which cannot be tested
3556     ## here.
3557 wakaba 1.56 autocomplete => $GetHTMLEnumeratedAttrChecker->({on => 1, off => 1}),
3558     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3559 wakaba 1.52 checked => $GetHTMLBooleanAttrChecker->('checked'),
3560     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3561 wakaba 1.56 enctype => $HTMLIMTAttrChecker,
3562     ## TODO: form [WF2]
3563     ## TODO: inputmode [WF2]
3564 wakaba 1.52 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
3565 wakaba 1.56 ## TODO: list [WF2]
3566     ## TODO: max [WF2]
3567 wakaba 1.52 maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3568 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
3569     get => 1, post => 1, put => 1, delete => 1,
3570     }),
3571     ## TODO: min [WF2]
3572 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3573     readonly => $GetHTMLBooleanAttrChecker->('readonly'),
3574 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3575     required => $GetHTMLBooleanAttrChecker->('required'),
3576 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3577     src => $HTMLURIAttrChecker,
3578 wakaba 1.56 ## TODO: step [WF2]
3579     target => $HTMLTargetAttrChecker,
3580     ## TODO: template
3581 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
3582     text => 1, password => 1, checkbox => 1, radio => 1, submit => 1,
3583     reset => 1, file => 1, hidden => 1, image => 1, button => 1,
3584 wakaba 1.56 ## [WF2]
3585     datatime => 1, 'datetime-local' => 1, date => 1, month => 1, week => 1,
3586     time => 1, number => 1, range => 1, email => 1, url => 1,
3587     add => 1, remove => 1, 'move-up' => 1, 'move-down' => 1,
3588 wakaba 1.52 }),
3589     usemap => $HTMLUsemapAttrChecker,
3590 wakaba 1.56 value => sub {}, ## NOTE: CDATA [M12N] ## TODO: "optional except when the type attribute has the value "radio" or "checkbox"" [HTML4] ## TODO: constraints [WF2]
3591     ## TODO: "authors should ensure that in each set of radio buttons that one is initially "on"." [HTML4] [WF2]
3592 wakaba 1.52 }, {
3593     %HTMLAttrStatus,
3594     %HTMLM12NCommonAttrStatus,
3595 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3596 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
3597 wakaba 1.56 action => FEATURE_WF2,
3598 wakaba 1.52 align => FEATURE_M12N10_REC_DEPRECATED,
3599     alt => FEATURE_M12N10_REC,
3600 wakaba 1.56 autocomplete => FEATURE_WF2,
3601     autofocus => FEATURE_WF2,
3602 wakaba 1.52 checked => FEATURE_M12N10_REC,
3603     datafld => FEATURE_HTML4_REC_RESERVED,
3604     dataformatas => FEATURE_HTML4_REC_RESERVED,
3605     datasrc => FEATURE_HTML4_REC_RESERVED,
3606 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3607     form => FEATURE_WF2,
3608     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
3609 wakaba 1.52 ismap => FEATURE_M12N10_REC,
3610     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3611 wakaba 1.56 list => FEATURE_WF2,
3612     max => FEATURE_WF2,
3613     maxlength => FEATURE_WF2 | FEATURE_M12N10_REC,
3614     method => FEATURE_WF2,
3615     min => FEATURE_WF2,
3616 wakaba 1.52 name => FEATURE_M12N10_REC,
3617     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3618     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3619     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3620     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3621 wakaba 1.56 readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
3622     required => FEATURE_WF2,
3623     size => FEATURE_WF2_DEPRECATED | FEATURE_M12N10_REC,
3624 wakaba 1.52 src => FEATURE_M12N10_REC,
3625 wakaba 1.56 step => FEATURE_WF2,
3626 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3627 wakaba 1.56 template => FEATURE_WF2,
3628 wakaba 1.52 type => FEATURE_M12N10_REC,
3629     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
3630     value => FEATURE_M12N10_REC,
3631     }),
3632     ## TODO: Tests
3633     ## TODO: Tests for <nest/> in <input>
3634     };
3635    
3636 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
3637    
3638 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
3639     %HTMLProseContentChecker, ## NOTE: %Flow; - something [XHTML10]
3640     ## TODO: -A|%formctrl;|form|fieldset [HTML4]
3641     ## TODO: image map (img) in |button| is "illegal" [HTML4].
3642     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3643     check_attrs => $GetHTMLAttrsChecker->({
3644     accesskey => $AttrCheckerNotImplemented, ## TODO: Character
3645 wakaba 1.56 action => $HTMLURIAttrChecker,
3646     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3647 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3648 wakaba 1.56 ## TODO: form [WF2]
3649     method => $GetHTMLEnumeratedAttrChecker->({
3650     get => 1, post => 1, put => 1, delete => 1,
3651     }),
3652 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3653 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
3654     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3655     target => $HTMLTargetAttrChecker,
3656     ## TODO: template [WF2]
3657 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
3658     button => 1, submit => 1, reset => 1,
3659     }),
3660     value => sub {}, ## NOTE: CDATA [M12N]
3661     }, {
3662     %HTMLAttrStatus,
3663     %HTMLM12NCommonAttrStatus,
3664     accesskey => FEATURE_M12N10_REC,
3665 wakaba 1.56 action => FEATURE_WF2,
3666     autofocus => FEATURE_WF2,
3667 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
3668     dataformatas => FEATURE_HTML4_REC_RESERVED,
3669     datasrc => FEATURE_HTML4_REC_RESERVED,
3670 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3671     enctype => FEATURE_WF2,
3672     form => FEATURE_WF2,
3673 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3674 wakaba 1.56 method => FEATURE_WF2,
3675 wakaba 1.52 name => FEATURE_M12N10_REC,
3676     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3677     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3678 wakaba 1.56 oninvalid => FEATURE_WF2,
3679     replace => FEATURE_WF2,
3680 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3681 wakaba 1.56 target => FEATURE_WF2,
3682     template => FEATURE_WF2,
3683 wakaba 1.52 type => FEATURE_M12N10_REC,
3684     value => FEATURE_M12N10_REC,
3685     }),
3686     ## TODO: Tests
3687     ## TODO: Tests for <nest/> in <button>
3688     };
3689    
3690     $Element->{$HTML_NS}->{label} = {
3691     %HTMLPhrasingContentChecker, ## NOTE: %Inline - label [XHTML10] ## TODO: -label
3692 wakaba 1.56 ## TODO: At most one form control [WF2]
3693 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3694     check_attrs => $GetHTMLAttrsChecker->({
3695     accesskey => $AttrCheckerNotImplemented, ## TODO: Charcter
3696     for => $AttrCheckerNotImplemented, ## TODO: IDREF ## TODO: Must be |id| of control [HTML4] ## TODO: Or, "may only contain one control element"
3697     }, {
3698     %HTMLAttrStatus,
3699     %HTMLM12NCommonAttrStatus,
3700 wakaba 1.56 accesskey => FEATURE_WF2 | FEATURE_M12N10_REC,
3701 wakaba 1.52 for => FEATURE_M12N10_REC,
3702     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3703     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3704     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3705     }),
3706     ## TODO: Tests
3707     ## TODO: Tests for <nest/> in <label>
3708     };
3709    
3710     $Element->{$HTML_NS}->{select} = {
3711 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (optgroup|option)* [HTML4] + [WF2] ## TODO: SHOULD avoid empty and visible [WF2]
3712 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
3713     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
3714     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3715 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
3716 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
3717 wakaba 1.56 ## TODO: accesskey [WF2]
3718     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3719 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3720 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3721     ## TODO: form [WF2]
3722 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3723     name => sub {}, ## NOTE: CDATA [M12N]
3724 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
3725     ## TODO: pattern [WF2] ## TODO: |title| semantics
3726 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3727     }, {
3728     %HTMLAttrStatus,
3729     %HTMLM12NCommonAttrStatus,
3730 wakaba 1.56 accesskey => FEATURE_WF2,
3731     autofocus => FEATURE_WF2,
3732     data => FEATURE_WF2,
3733 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
3734     dataformatas => FEATURE_HTML4_REC_RESERVED,
3735     datasrc => FEATURE_HTML4_REC_RESERVED,
3736 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3737     form => FEATURE_WF2,
3738 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3739     multiple => FEATURE_M12N10_REC,
3740     name => FEATURE_M12N10_REC,
3741     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3742     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3743     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3744 wakaba 1.56 oninvalid => FEATURE_WF2,
3745     pattern => FEATURE_WF2,
3746 wakaba 1.52 size => FEATURE_M12N10_REC,
3747     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3748     }),
3749     ## TODO: Tests
3750     ## TODO: Tests for <nest/> in <select>
3751     };
3752 wakaba 1.1
3753 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
3754 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (transparent | option)*
3755     ## TODO: |option| child MUST be empty [WF2]
3756 wakaba 1.52 status => FEATURE_WF2,
3757 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3758     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3759     }, {
3760 wakaba 1.52 %HTMLAttrStatus,
3761 wakaba 1.56 data => FEATURE_WF2,
3762 wakaba 1.52 }),
3763     ## TODO: Tests
3764     ## TODO: Tests for <nest/> in <datalist>
3765     };
3766 wakaba 1.49
3767 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
3768 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (option|optgroup)* [HTML4] + [WF2] SHOULD avoid empty and visible [WF2]
3769 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3770     check_attrs => $GetHTMLAttrsChecker->({
3771     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3772     label => sub {}, ## NOTE: Text [M12N] ## TODO: required
3773     }, {
3774     %HTMLAttrStatus,
3775     %HTMLM12NCommonAttrStatus,
3776 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3777 wakaba 1.52 label => FEATURE_M12N10_REC,
3778     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3779     }),
3780     ## TODO: Tests
3781     ## TODO: Tests for <nest/> in <optgroup>
3782     };
3783    
3784     $Element->{$HTML_NS}->{option} = {
3785     %HTMLTextChecker,
3786     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3787     check_attrs => $GetHTMLAttrsChecker->({
3788     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3789     label => sub {}, ## NOTE: Text [M12N]
3790     selected => $GetHTMLBooleanAttrChecker->('selected'),
3791     value => sub {}, ## NOTE: CDATA [M12N]
3792     }, {
3793     %HTMLAttrStatus,
3794     %HTMLM12NCommonAttrStatus,
3795 wakaba 1.56 disabled => FEATURE_WF2, FEATURE_M12N10_REC,
3796 wakaba 1.52 label => FEATURE_M12N10_REC,
3797     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3798     selected => FEATURE_M12N10_REC,
3799     value => FEATURE_M12N10_REC,
3800     }),
3801     ## TODO: Tests
3802     ## TODO: Tests for <nest/> in <option>
3803     };
3804 wakaba 1.49
3805 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
3806     %HTMLTextChecker,
3807     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3808     check_attrs => $GetHTMLAttrsChecker->({
3809 wakaba 1.56 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type
3810 wakaba 1.52 accesskey => $AttrCheckerNotImplemented, ## TODO: Character
3811 wakaba 1.56 autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3812     cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## TODO: SHOULD if wrap=hard [WF2]
3813 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3814 wakaba 1.56 ## TODO: form [WF2]
3815     ## TODO: inputmode [WF2]
3816     maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3817 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3818 wakaba 1.56 ## TODO: pattern [WF2] ## TODO: |title| special semantics
3819 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
3820 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
3821     rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3822     oninvalid => $HTMLEventHandlerAttrChecker,
3823     wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
3824 wakaba 1.52 }, {
3825     %HTMLAttrStatus,
3826     %HTMLM12NCommonAttrStatus,
3827 wakaba 1.56 accept => FEATURE_WF2,
3828 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
3829 wakaba 1.56 autofocus => FEATURE_WF2,
3830 wakaba 1.52 cols => FEATURE_M12N10_REC,
3831     datafld => FEATURE_HTML4_REC_RESERVED,
3832 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
3833     datasrc => FEATURE_HTML4_REC_RESERVED,
3834 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3835     form => FEATURE_WF2,
3836     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
3837 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3838 wakaba 1.56 maxlength => FEATURE_WF2,
3839 wakaba 1.52 name => FEATURE_M12N10_REC,
3840     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3841     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3842     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3843 wakaba 1.56 oninvalid => FEATURE_WF2,
3844 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3845 wakaba 1.56 pattern => FEATURE_WF2,
3846     readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
3847     required => FEATURE_WF2,
3848 wakaba 1.52 rows => FEATURE_M12N10_REC,
3849     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3850 wakaba 1.56 wrap => FEATURE_WF2,
3851 wakaba 1.52 }),
3852     ## TODO: Tests
3853     ## TODO: Tests for <nest/> in <textarea>
3854     };
3855 wakaba 1.49
3856 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
3857 wakaba 1.56 %HTMLPhrasingContentChecker, ## Inline [WF2]
3858 wakaba 1.52 status => FEATURE_WF2,
3859 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3860     ## TODO: for [WF2]
3861     ## TODO: form [WF2]
3862     ## TODO: name [WF2]
3863     ## onformchange[WF2]
3864     ## onforminput[WF2]
3865     }, {
3866 wakaba 1.52 %HTMLAttrStatus,
3867 wakaba 1.56 for => FEATURE_WF2,
3868     form => FEATURE_WF2,
3869     name => FEATURE_WF2,
3870     onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
3871     onformchange => FEATURE_WF2,
3872     onforminput => FEATURE_WF2,
3873 wakaba 1.52 }),
3874     ## TODO: Tests
3875     ## TODO: Tests for <nest/> in <output>
3876 wakaba 1.56 ## NOTE: "The output element should be used when ..." [WF2]
3877 wakaba 1.52 };
3878    
3879     ## TODO: repetition template
3880    
3881     $Element->{$HTML_NS}->{isindex} = {
3882     %HTMLEmptyChecker,
3883 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
3884     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
3885 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
3886     prompt => sub {}, ## NOTE: Text [M12N]
3887     }, {
3888     %HTMLAttrStatus,
3889     class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3890     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3891     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3892     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3893     prompt => FEATURE_M12N10_REC_DEPRECATED,
3894     style => FEATURE_XHTML10_REC,
3895     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3896     }),
3897     ## TODO: Tests
3898     ## TODO: Tests for <nest/> in <isindex>
3899     };
3900 wakaba 1.49
3901 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
3902 wakaba 1.40 %HTMLChecker,
3903 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3904 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3905 wakaba 1.1 src => $HTMLURIAttrChecker,
3906     defer => $GetHTMLBooleanAttrChecker->('defer'),
3907     async => $GetHTMLBooleanAttrChecker->('async'),
3908     type => $HTMLIMTAttrChecker,
3909 wakaba 1.49 }, {
3910     %HTMLAttrStatus,
3911     %HTMLM12NCommonAttrStatus,
3912 wakaba 1.50 async => FEATURE_HTML5_DEFAULT,
3913 wakaba 1.49 charset => FEATURE_M12N10_REC,
3914 wakaba 1.50 defer => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3915 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
3916     for => FEATURE_HTML4_REC_RESERVED,
3917 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3918 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
3919 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3920     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3921 wakaba 1.9 }),
3922 wakaba 1.40 check_start => sub {
3923     my ($self, $item, $element_state) = @_;
3924 wakaba 1.1
3925 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
3926     $element_state->{must_be_empty} = 1;
3927 wakaba 1.1 } else {
3928     ## NOTE: No content model conformance in HTML5 spec.
3929 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
3930     my $language = $item->{node}->get_attribute_ns (undef, 'language');
3931 wakaba 1.1 if ((defined $type and $type eq '') or
3932     (defined $language and $language eq '')) {
3933     $type = 'text/javascript';
3934     } elsif (defined $type) {
3935     #
3936     } elsif (defined $language) {
3937     $type = 'text/' . $language;
3938     } else {
3939     $type = 'text/javascript';
3940     }
3941 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
3942     }
3943     },
3944     check_child_element => sub {
3945     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3946     $child_is_transparent, $element_state) = @_;
3947     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3948     $self->{onerror}->(node => $child_el,
3949     type => 'element not allowed:minus',
3950     level => $self->{must_level});
3951     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3952     #
3953     } else {
3954     if ($element_state->{must_be_empty}) {
3955     $self->{onerror}->(node => $child_el,
3956     type => 'element not allowed');
3957     }
3958     }
3959     },
3960     check_child_text => sub {
3961     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3962     if ($has_significant and
3963     $element_state->{must_be_empty}) {
3964     $self->{onerror}->(node => $child_node,
3965     type => 'character not allowed');
3966     }
3967     },
3968     check_end => sub {
3969     my ($self, $item, $element_state) = @_;
3970     unless ($element_state->{must_be_empty}) {
3971     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
3972     type => 'script:'.$element_state->{script_type});
3973     ## TODO: text/javascript support
3974    
3975     $HTMLChecker{check_end}->(@_);
3976 wakaba 1.1 }
3977     },
3978     };
3979 wakaba 1.25 ## ISSUE: Significant check and text child node
3980 wakaba 1.1
3981     ## NOTE: When script is disabled.
3982     $Element->{$HTML_NS}->{noscript} = {
3983 wakaba 1.40 %HTMLTransparentChecker,
3984 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3985     check_attrs => $GetHTMLAttrsChecker->({}, {
3986     %HTMLAttrStatus,
3987     %HTMLM12NCommonAttrStatus,
3988 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3989 wakaba 1.49 }),
3990 wakaba 1.40 check_start => sub {
3991     my ($self, $item, $element_state) = @_;
3992 wakaba 1.3
3993 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
3994     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
3995 wakaba 1.3 }
3996    
3997 wakaba 1.40 unless ($self->{flag}->{in_head}) {
3998     $self->_add_minus_elements ($element_state,
3999     {$HTML_NS => {noscript => 1}});
4000     }
4001 wakaba 1.3 },
4002 wakaba 1.40 check_child_element => sub {
4003     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4004     $child_is_transparent, $element_state) = @_;
4005     if ($self->{flag}->{in_head}) {
4006     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4007     $self->{onerror}->(node => $child_el,
4008     type => 'element not allowed:minus',
4009     level => $self->{must_level});
4010     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4011     #
4012     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
4013     #
4014     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
4015     if ($child_el->has_attribute_ns (undef, 'scoped')) {
4016     $self->{onerror}->(node => $child_el,
4017     type => 'element not allowed:head noscript',
4018     level => $self->{must_level});
4019     }
4020     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
4021 wakaba 1.47 my $http_equiv_attr
4022     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
4023     if ($http_equiv_attr) {
4024     ## TODO: case
4025     if (lc $http_equiv_attr->value eq 'content-type') {
4026 wakaba 1.40 $self->{onerror}->(node => $child_el,
4027 wakaba 1.34 type => 'element not allowed:head noscript',
4028     level => $self->{must_level});
4029 wakaba 1.47 } else {
4030     #
4031 wakaba 1.3 }
4032 wakaba 1.47 } else {
4033     $self->{onerror}->(node => $child_el,
4034     type => 'element not allowed:head noscript',
4035     level => $self->{must_level});
4036 wakaba 1.3 }
4037 wakaba 1.40 } else {
4038     $self->{onerror}->(node => $child_el,
4039     type => 'element not allowed:head noscript',
4040     level => $self->{must_level});
4041     }
4042     } else {
4043     $HTMLTransparentChecker{check_child_element}->(@_);
4044     }
4045     },
4046     check_child_text => sub {
4047     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4048     if ($self->{flag}->{in_head}) {
4049     if ($has_significant) {
4050     $self->{onerror}->(node => $child_node,
4051     type => 'character not allowed');
4052 wakaba 1.3 }
4053     } else {
4054 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
4055     }
4056     },
4057     check_end => sub {
4058     my ($self, $item, $element_state) = @_;
4059     $self->_remove_minus_elements ($element_state);
4060     if ($self->{flag}->{in_head}) {
4061     $HTMLChecker{check_end}->(@_);
4062     } else {
4063     $HTMLPhrasingContentChecker{check_end}->(@_);
4064 wakaba 1.3 }
4065 wakaba 1.1 },
4066     };
4067 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
4068 wakaba 1.1
4069     $Element->{$HTML_NS}->{'event-source'} = {
4070 wakaba 1.40 %HTMLEmptyChecker,
4071 wakaba 1.48 status => FEATURE_HTML5_LC,
4072 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4073 wakaba 1.1 src => $HTMLURIAttrChecker,
4074 wakaba 1.50 }, {
4075     %HTMLAttrStatus,
4076     src => FEATURE_HTML5_LC,
4077 wakaba 1.1 }),
4078     };
4079    
4080     $Element->{$HTML_NS}->{details} = {
4081 wakaba 1.40 %HTMLProseContentChecker,
4082 wakaba 1.48 status => FEATURE_HTML5_WD,
4083 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4084 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
4085 wakaba 1.50 }, {
4086     %HTMLAttrStatus,
4087     open => FEATURE_HTML5_WD,
4088 wakaba 1.1 }),
4089 wakaba 1.43 ## NOTE: legend, Prose
4090     check_child_element => sub {
4091     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4092     $child_is_transparent, $element_state) = @_;
4093     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4094     $self->{onerror}->(node => $child_el,
4095     type => 'element not allowed:minus',
4096     level => $self->{must_level});
4097     $element_state->{has_non_legend} = 1;
4098     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4099     #
4100     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4101     if ($element_state->{has_non_legend}) {
4102     $self->{onerror}->(node => $child_el,
4103     type => 'element not allowed:details legend',
4104     level => $self->{must_level});
4105     }
4106     $element_state->{has_legend} = 1;
4107     $element_state->{has_non_legend} = 1;
4108     } else {
4109     $HTMLProseContentChecker{check_child_element}->(@_);
4110     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4111     ## ISSUE: |<details><object><legend>xx</legend></object>..</details>|
4112     ## is conforming?
4113     }
4114     },
4115     check_child_text => sub {
4116     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4117     if ($has_significant) {
4118     $element_state->{has_non_legend} = 1;
4119     }
4120     },
4121     check_end => sub {
4122     my ($self, $item, $element_state) = @_;
4123 wakaba 1.1
4124 wakaba 1.43 unless ($element_state->{has_legend}) {
4125     $self->{onerror}->(node => $item->{node},
4126     type => 'element missing:legend',
4127     level => $self->{must_level});
4128     }
4129    
4130     $HTMLProseContentChecker{check_end}->(@_);
4131     ## ISSUE: |<details><legend>aa</legend></details>| error?
4132 wakaba 1.1 },
4133     };
4134    
4135     $Element->{$HTML_NS}->{datagrid} = {
4136 wakaba 1.40 %HTMLProseContentChecker,
4137 wakaba 1.48 status => FEATURE_HTML5_WD,
4138 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4139 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4140     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
4141 wakaba 1.50 }, {
4142     %HTMLAttrStatus,
4143     disabled => FEATURE_HTML5_WD,
4144     multiple => FEATURE_HTML5_WD,
4145 wakaba 1.1 }),
4146 wakaba 1.40 check_start => sub {
4147     my ($self, $item, $element_state) = @_;
4148 wakaba 1.1
4149 wakaba 1.40 $self->_add_minus_elements ($element_state,
4150     {$HTML_NS => {a => 1, datagrid => 1}});
4151     $element_state->{phase} = 'any';
4152     },
4153     ## Prose -(text* table Prose*) | table | select | datalist | Empty
4154     check_child_element => sub {
4155     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4156     $child_is_transparent, $element_state) = @_;
4157     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4158     $self->{onerror}->(node => $child_el,
4159     type => 'element not allowed:minus',
4160     level => $self->{must_level});
4161     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4162     #
4163     } elsif ($element_state->{phase} eq 'prose') {
4164     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4165 wakaba 1.44 if (not $element_state->{has_element} and
4166 wakaba 1.40 $child_nsuri eq $HTML_NS and
4167     $child_ln eq 'table') {
4168     $self->{onerror}->(node => $child_el,
4169     type => 'element not allowed');
4170     } else {
4171 wakaba 1.8 #
4172 wakaba 1.1 }
4173 wakaba 1.40 } else {
4174     $self->{onerror}->(node => $child_el,
4175     type => 'element not allowed');
4176     }
4177 wakaba 1.43 $element_state->{has_element} = 1;
4178 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
4179     if ($child_nsuri eq $HTML_NS and
4180     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
4181     $element_state->{phase} = 'none';
4182     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4183     $element_state->{has_element} = 1;
4184     $element_state->{phase} = 'prose';
4185 wakaba 1.43 ## TODO: transparent?
4186 wakaba 1.40 } else {
4187     $self->{onerror}->(node => $child_el,
4188     type => 'element not allowed');
4189     }
4190     } elsif ($element_state->{phase} eq 'none') {
4191     $self->{onerror}->(node => $child_el,
4192     type => 'element not allowed');
4193     } else {
4194     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
4195     }
4196     },
4197     check_child_text => sub {
4198     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4199     if ($has_significant) {
4200     if ($element_state->{phase} eq 'prose') {
4201     #
4202     } elsif ($element_state->{phase} eq 'any') {
4203     $element_state->{phase} = 'prose';
4204     } else {
4205     $self->{onerror}->(node => $child_node,
4206     type => 'character not allowed');
4207 wakaba 1.1 }
4208     }
4209 wakaba 1.40 },
4210     check_end => sub {
4211     my ($self, $item, $element_state) = @_;
4212     $self->_remove_minus_elements ($element_state);
4213 wakaba 1.1
4214 wakaba 1.40 if ($element_state->{phase} eq 'none') {
4215     $HTMLChecker{check_end}->(@_);
4216     } else {
4217     $HTMLPhrasingContentChecker{check_end}->(@_);
4218     }
4219     },
4220 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
4221     ## are not disallowed (assuming that form control contents are also
4222     ## prose content).
4223 wakaba 1.1 };
4224    
4225     $Element->{$HTML_NS}->{command} = {
4226 wakaba 1.40 %HTMLEmptyChecker,
4227 wakaba 1.48 status => FEATURE_HTML5_WD,
4228 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4229 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
4230     default => $GetHTMLBooleanAttrChecker->('default'),
4231     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4232     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
4233     icon => $HTMLURIAttrChecker,
4234     label => sub { }, ## NOTE: No conformance creteria
4235     radiogroup => sub { }, ## NOTE: No conformance creteria
4236     type => sub {
4237     my ($self, $attr) = @_;
4238     my $value = $attr->value;
4239     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
4240     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
4241     }
4242     },
4243 wakaba 1.50 }, {
4244     %HTMLAttrStatus,
4245     checked => FEATURE_HTML5_WD,
4246     default => FEATURE_HTML5_WD,
4247     disabled => FEATURE_HTML5_WD,
4248     hidden => FEATURE_HTML5_WD,
4249     icon => FEATURE_HTML5_WD,
4250     label => FEATURE_HTML5_WD,
4251     radiogroup => FEATURE_HTML5_WD,
4252     type => FEATURE_HTML5_WD,
4253 wakaba 1.1 }),
4254     };
4255    
4256     $Element->{$HTML_NS}->{menu} = {
4257 wakaba 1.40 %HTMLPhrasingContentChecker,
4258 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
4259     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
4260     ## NOTE: We don't want any |menu| element warned as deprecated.
4261 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4262 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
4263     id => sub {
4264     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
4265     my ($self, $attr) = @_;
4266     my $value = $attr->value;
4267     if (length $value > 0) {
4268     if ($self->{id}->{$value}) {
4269     $self->{onerror}->(node => $attr, type => 'duplicate ID');
4270     push @{$self->{id}->{$value}}, $attr;
4271     } else {
4272     $self->{id}->{$value} = [$attr];
4273     }
4274     } else {
4275     ## NOTE: MUST contain at least one character
4276     $self->{onerror}->(node => $attr, type => 'empty attribute value');
4277     }
4278     if ($value =~ /[\x09-\x0D\x20]/) {
4279     $self->{onerror}->(node => $attr, type => 'space in ID');
4280     }
4281     $self->{menu}->{$value} ||= $attr;
4282     ## ISSUE: <menu id=""><p contextmenu=""> match?
4283     },
4284     label => sub { }, ## NOTE: No conformance creteria
4285     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
4286 wakaba 1.49 }, {
4287     %HTMLAttrStatus,
4288     %HTMLM12NCommonAttrStatus,
4289 wakaba 1.50 autosubmit => FEATURE_HTML5_WD,
4290 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
4291 wakaba 1.50 label => FEATURE_HTML5_WD,
4292     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4293     type => FEATURE_HTML5_WD,
4294 wakaba 1.1 }),
4295 wakaba 1.40 check_start => sub {
4296     my ($self, $item, $element_state) = @_;
4297     $element_state->{phase} = 'li or phrasing';
4298     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
4299     $self->{flag}->{in_menu} = 1;
4300     },
4301     check_child_element => sub {
4302     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4303     $child_is_transparent, $element_state) = @_;
4304     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4305     $self->{onerror}->(node => $child_el,
4306     type => 'element not allowed:minus',
4307     level => $self->{must_level});
4308     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4309     #
4310     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
4311     if ($element_state->{phase} eq 'li') {
4312     #
4313     } elsif ($element_state->{phase} eq 'li or phrasing') {
4314     $element_state->{phase} = 'li';
4315     } else {
4316     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4317     }
4318     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4319     if ($element_state->{phase} eq 'phrasing') {
4320     #
4321     } elsif ($element_state->{phase} eq 'li or phrasing') {
4322     $element_state->{phase} = 'phrasing';
4323     } else {
4324     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4325     }
4326     } else {
4327     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4328     }
4329     },
4330     check_child_text => sub {
4331     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4332     if ($has_significant) {
4333     if ($element_state->{phase} eq 'phrasing') {
4334     #
4335     } elsif ($element_state->{phase} eq 'li or phrasing') {
4336     $element_state->{phase} = 'phrasing';
4337     } else {
4338     $self->{onerror}->(node => $child_node,
4339     type => 'character not allowed');
4340 wakaba 1.1 }
4341     }
4342 wakaba 1.40 },
4343     check_end => sub {
4344     my ($self, $item, $element_state) = @_;
4345     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
4346    
4347     if ($element_state->{phase} eq 'li') {
4348     $HTMLChecker{check_end}->(@_);
4349     } else { # 'phrasing' or 'li or phrasing'
4350     $HTMLPhrasingContentChecker{check_end}->(@_);
4351 wakaba 1.1 }
4352     },
4353 wakaba 1.8 };
4354    
4355     $Element->{$HTML_NS}->{datatemplate} = {
4356 wakaba 1.40 %HTMLChecker,
4357 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4358 wakaba 1.40 check_child_element => sub {
4359     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4360     $child_is_transparent, $element_state) = @_;
4361     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4362     $self->{onerror}->(node => $child_el,
4363     type => 'element not allowed:minus',
4364     level => $self->{must_level});
4365     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4366     #
4367     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
4368     #
4369     } else {
4370     $self->{onerror}->(node => $child_el,
4371     type => 'element not allowed:datatemplate');
4372     }
4373     },
4374     check_child_text => sub {
4375     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4376     if ($has_significant) {
4377     $self->{onerror}->(node => $child_node, type => 'character not allowed');
4378 wakaba 1.8 }
4379     },
4380     is_xml_root => 1,
4381     };
4382    
4383     $Element->{$HTML_NS}->{rule} = {
4384 wakaba 1.40 %HTMLChecker,
4385 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4386 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4387 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
4388 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
4389 wakaba 1.50 }, {
4390     %HTMLAttrStatus,
4391     condition => FEATURE_HTML5_AT_RISK,
4392     mode => FEATURE_HTML5_AT_RISK,
4393 wakaba 1.8 }),
4394 wakaba 1.40 check_start => sub {
4395     my ($self, $item, $element_state) = @_;
4396     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
4397     },
4398     check_child_element => sub { },
4399     check_child_text => sub { },
4400     check_end => sub {
4401     my ($self, $item, $element_state) = @_;
4402     $self->_remove_plus_elements ($element_state);
4403     $HTMLChecker{check_end}->(@_);
4404 wakaba 1.8 },
4405     ## NOTE: "MAY be anything that, when the parent |datatemplate|
4406     ## is applied to some conforming data, results in a conforming DOM tree.":
4407     ## We don't check against this.
4408     };
4409    
4410     $Element->{$HTML_NS}->{nest} = {
4411 wakaba 1.40 %HTMLEmptyChecker,
4412 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4413 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4414 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
4415     mode => sub {
4416     my ($self, $attr) = @_;
4417     my $value = $attr->value;
4418     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
4419     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
4420     }
4421     },
4422 wakaba 1.50 }, {
4423     %HTMLAttrStatus,
4424     filter => FEATURE_HTML5_AT_RISK,
4425     mode => FEATURE_HTML5_AT_RISK,
4426 wakaba 1.8 }),
4427 wakaba 1.1 };
4428    
4429     $Element->{$HTML_NS}->{legend} = {
4430 wakaba 1.40 %HTMLPhrasingContentChecker,
4431 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4432 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4433     # accesskey => $AttrCheckerNotImplemented, ## TODO: Character ## TODO: This attribute is not part of HTML5
4434     # align => $GetHTMLEnumeratedAttrChecker->({
4435     # top => 1, bottom => 1, left => 1, right => 1,
4436     # }),
4437     }, {
4438 wakaba 1.49 %HTMLAttrStatus,
4439     %HTMLM12NCommonAttrStatus,
4440     accesskey => FEATURE_M12N10_REC,
4441     align => FEATURE_M12N10_REC_DEPRECATED,
4442 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4443 wakaba 1.49 }),
4444 wakaba 1.1 };
4445    
4446     $Element->{$HTML_NS}->{div} = {
4447 wakaba 1.40 %HTMLProseContentChecker,
4448 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4449     check_attrs => $GetHTMLAttrsChecker->({}, {
4450     %HTMLAttrStatus,
4451     %HTMLM12NCommonAttrStatus,
4452     align => FEATURE_M12N10_REC_DEPRECATED,
4453     datafld => FEATURE_HTML4_REC_RESERVED,
4454     dataformatas => FEATURE_HTML4_REC_RESERVED,
4455     datasrc => FEATURE_HTML4_REC_RESERVED,
4456 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4457 wakaba 1.49 }),
4458 wakaba 1.1 };
4459    
4460     $Element->{$HTML_NS}->{font} = {
4461 wakaba 1.40 %HTMLTransparentChecker,
4462 wakaba 1.50 status => FEATURE_HTML5_AT_RISK | FEATURE_M12N10_REC_DEPRECATED,
4463 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({ ## TODO
4464     }, {
4465     %HTMLAttrStatus,
4466 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4467 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
4468 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4469 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
4470 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4471     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4472 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
4473 wakaba 1.50 style => FEATURE_HTML5_AT_RISK | FEATURE_XHTML10_REC,
4474     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4475 wakaba 1.49 }),
4476 wakaba 1.1 };
4477 wakaba 1.49
4478     ## TODO: frameset FEATURE_M12N10_REC
4479     ## class title id cols rows onload onunload style(x10)
4480     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
4481     ## noframes Common, lang(xhtml10)
4482    
4483     ## TODO: deprecated:
4484     ## basefont color face id size
4485     ## center Common lang(xhtml10)
4486     ## dir Common compat lang(xhtml10)
4487    
4488     ## TODO: CR: ruby rb rt rp rbc rtc @rbspan
4489 wakaba 1.56
4490     =pod
4491    
4492     WF2: Documents MUST comply to [CHARMOD].
4493     WF2: Vencor extensions MUST NOT be used.
4494    
4495     =cut
4496 wakaba 1.1
4497     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
4498    
4499     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24