/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.60 - (hide annotations) (download)
Sun Mar 2 11:16:34 2008 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.59: +6 -1 lines
++ whatpm/t/ChangeLog	2 Mar 2008 11:10:02 -0000
	* content-model-atom-1.dat: New test data are added.

2008-03-02  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	2 Mar 2008 11:16:26 -0000
	* ContentChecker.pm: Typo fixed.  Don't raise "character encoding"
	and related errors unless it is an HTML document (though the spec
	is unclear on whether it is applied to XHTML document).

	* HTML.pm (%HTMLAttrStatus): WF2 repetition model attributes
	are added.

2008-03-02  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	2 Mar 2008 11:11:31 -0000
	* Atom.pm: Reimplemented.

2008-03-02  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.58 sub FEATURE_HTML5_ROLE () {
8     Whatpm::ContentChecker::FEATURE_STATUS_WD
9     ## TODO: svg:*/@role
10     }
11    
12 wakaba 1.54 sub FEATURE_HTML5_LC () {
13     Whatpm::ContentChecker::FEATURE_STATUS_LC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16     sub FEATURE_HTML5_AT_RISK () {
17     Whatpm::ContentChecker::FEATURE_STATUS_WD |
18     Whatpm::ContentChecker::FEATURE_ALLOWED
19     }
20     sub FEATURE_HTML5_WD () {
21     Whatpm::ContentChecker::FEATURE_STATUS_WD |
22     Whatpm::ContentChecker::FEATURE_ALLOWED
23     }
24     sub FEATURE_HTML5_FD () {
25     Whatpm::ContentChecker::FEATURE_STATUS_WD |
26     Whatpm::ContentChecker::FEATURE_ALLOWED
27     }
28     sub FEATURE_HTML5_DEFAULT () {
29     Whatpm::ContentChecker::FEATURE_STATUS_WD |
30     Whatpm::ContentChecker::FEATURE_ALLOWED
31 wakaba 1.49 }
32 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
33     ## NOTE: Was part of HTML5, but was dropped.
34 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
35     }
36 wakaba 1.54 sub FEATURE_WF2 () {
37     Whatpm::ContentChecker::FEATURE_STATUS_LC |
38     Whatpm::ContentChecker::FEATURE_ALLOWED
39     }
40 wakaba 1.56 sub FEATURE_WF2_DEPRECATED () {
41     Whatpm::ContentChecker::FEATURE_STATUS_LC
42     ## NOTE: MUST NOT be used.
43     }
44 wakaba 1.49
45 wakaba 1.58 ## TODO: RDFa LC
46    
47     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
48     ## attribute can be used- the only requirements for that matter is:
49     ## "the attribute MUST be referenced using its namespace-qualified form" (and
50     ## this is a host language conformance!).
51    
52 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
53     ## NOTE: Only additions to M12N10_REC are marked.
54     Whatpm::ContentChecker::FEATURE_STATUS_CR
55     }
56     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
57     Whatpm::ContentChecker::FEATURE_STATUS_CR |
58     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
59     }
60    
61 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
62     ## It contains a number of problems. (However, again, it's a REC!)
63 wakaba 1.54 sub FEATURE_M12N10_REC () {
64     ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process suck!
65     Whatpm::ContentChecker::FEATURE_STATUS_REC
66     }
67     sub FEATURE_M12N10_REC_DEPRECATED () {
68     Whatpm::ContentChecker::FEATURE_STATUS_REC |
69     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
70     }
71 wakaba 1.58 ## NOTE: XHTML M12N 1.1 is a LC at the time of writing and no
72     ## addition from 1.0.
73 wakaba 1.49
74     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
75     ## (second edition). Only missing attributes from M12N10 abstract
76     ## definition are added.
77 wakaba 1.54 sub FEATURE_XHTML10_REC () {
78     Whatpm::ContentChecker::FEATURE_STATUS_CR
79     }
80    
81 wakaba 1.58 ## TODO: ISO-HTML
82    
83 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
84     ## 4.01). Only missing attributes from XHTML10 are added.
85 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
86     Whatpm::ContentChecker::FEATURE_STATUS_WD
87     }
88    
89     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
90     ## rather than presentational attributes (deprecated or not deprecated).
91 wakaba 1.48
92 wakaba 1.58 ## TODO: HTML 3.2 REC
93     ## TODO: HTML 2.x RFC
94     ## TODO: HTML 2.0 RFC
95     ## TODO: Other HTML RFCs
96    
97 wakaba 1.29 ## December 2007 HTML5 Classification
98    
99     my $HTMLMetadataContent = {
100     $HTML_NS => {
101     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
102     'event-source' => 1, command => 1, datatemplate => 1,
103     ## NOTE: A |meta| with no |name| element is not allowed as
104     ## a metadata content other than |head| element.
105     meta => 1,
106 wakaba 1.56 ## NOTE: Only when empty [WF2]
107     form => 1,
108 wakaba 1.29 },
109     ## NOTE: RDF is mentioned in the HTML5 spec.
110     ## TODO: Other RDF elements?
111     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
112     };
113    
114     my $HTMLProseContent = {
115     $HTML_NS => {
116     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
117     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
118     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
119     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
120     details => 1, ## ISSUE: "Prose element" in spec.
121     datagrid => 1, ## ISSUE: "Prose element" in spec.
122     datatemplate => 1,
123     div => 1, ## ISSUE: No category in spec.
124     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
125     ## Additionally, it must be before any other element or
126     ## non-inter-element-whitespace text node.
127     style => 1,
128    
129 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
130 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
131     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
132     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
133     command => 1, font => 1,
134     a => 1,
135     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
136     ## NOTE: |area| is allowed only as a descendant of |map|.
137     area => 1,
138    
139     ins => 1, del => 1,
140    
141     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
142     menu => 1,
143    
144     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
145     canvas => 1,
146     },
147    
148     ## NOTE: Embedded
149     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
150     q<http://www.w3.org/2000/svg> => {svg => 1},
151     };
152    
153 wakaba 1.58 my $HTMLSectioningContent = {
154 wakaba 1.57 $HTML_NS => {
155     section => 1, nav => 1, article => 1, aside => 1,
156     ## NOTE: |body| is only allowed in |html| element.
157     body => 1,
158     },
159     };
160    
161 wakaba 1.58 my $HTMLSectioningRoot = {
162 wakaba 1.29 $HTML_NS => {
163 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
164 wakaba 1.29 },
165     };
166    
167     my $HTMLHeadingContent = {
168     $HTML_NS => {
169     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
170     },
171     };
172    
173     my $HTMLPhrasingContent = {
174     ## NOTE: All phrasing content is also prose content.
175     $HTML_NS => {
176 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
177 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
178     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
179     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
180     command => 1, font => 1,
181     a => 1,
182     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
183     ## NOTE: |area| is allowed only as a descendant of |map|.
184     area => 1,
185    
186     ## NOTE: Transparent.
187     ins => 1, del => 1,
188    
189     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
190     menu => 1,
191    
192     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
193     canvas => 1,
194 wakaba 1.56
195     ## NOTE: WF2
196     input => 1, ## NOTE: type=hidden
197     datalist => 1, ## NOTE: block | where |select| allowed
198 wakaba 1.29 },
199    
200     ## NOTE: Embedded
201     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
202     q<http://www.w3.org/2000/svg> => {svg => 1},
203    
204     ## NOTE: And non-inter-element-whitespace text nodes.
205     };
206    
207 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
208 wakaba 1.29
209     my $HTMLInteractiveContent = {
210     $HTML_NS => {
211     a => 1,
212 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
213 wakaba 1.29 },
214     };
215    
216 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
217     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
218    
219     ## -- Common attribute syntacx checkers
220    
221 wakaba 1.1 our $AttrChecker;
222    
223     my $GetHTMLEnumeratedAttrChecker = sub {
224     my $states = shift; # {value => conforming ? 1 : -1}
225     return sub {
226     my ($self, $attr) = @_;
227     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
228     if ($states->{$value} > 0) {
229     #
230     } elsif ($states->{$value}) {
231     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
232     } else {
233     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
234     }
235     };
236     }; # $GetHTMLEnumeratedAttrChecker
237    
238     my $GetHTMLBooleanAttrChecker = sub {
239     my $local_name = shift;
240     return sub {
241     my ($self, $attr) = @_;
242     my $value = $attr->value;
243     unless ($value eq $local_name or $value eq '') {
244     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
245     }
246     };
247     }; # $GetHTMLBooleanAttrChecker
248    
249 wakaba 1.8 ## Unordered set of space-separated tokens
250 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
251 wakaba 1.8 my ($self, $attr) = @_;
252     my %word;
253     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
254     unless ($word{$word}) {
255     $word{$word} = 1;
256     } else {
257     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
258     }
259     }
260 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
261 wakaba 1.8
262 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
263     ## whose allowed values are defined by the section on link types)
264     my $HTMLLinkTypesAttrChecker = sub {
265 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
266 wakaba 1.1 my %word;
267     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
268     unless ($word{$word}) {
269     $word{$word} = 1;
270 wakaba 1.18 } elsif ($word eq 'up') {
271     #
272 wakaba 1.1 } else {
273     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
274     }
275     }
276     ## NOTE: Case sensitive match (since HTML5 spec does not say link
277     ## types are case-insensitive and it says "The value should not
278     ## be confusingly similar to any other defined value (e.g.
279     ## differing only in case).").
280     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
281     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
282     ## values to be used conformingly.
283     require Whatpm::_LinkTypeList;
284     our $LinkType;
285     for my $word (keys %word) {
286     my $def = $LinkType->{$word};
287     if (defined $def) {
288     if ($def->{status} eq 'accepted') {
289     if (defined $def->{effect}->[$a_or_area]) {
290     #
291     } else {
292     $self->{onerror}->(node => $attr,
293     type => 'link type:bad context:'.$word);
294     }
295     } elsif ($def->{status} eq 'proposal') {
296     $self->{onerror}->(node => $attr, level => 's',
297     type => 'link type:proposed:'.$word);
298 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
299     #
300     } else {
301     $self->{onerror}->(node => $attr,
302     type => 'link type:bad context:'.$word);
303     }
304 wakaba 1.1 } else { # rejected or synonym
305     $self->{onerror}->(node => $attr,
306     type => 'link type:non-conforming:'.$word);
307     }
308 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
309     if ($word eq 'alternate') {
310     #
311     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
312     $todo->{has_hyperlink_link_type} = 1;
313     }
314     }
315 wakaba 1.1 if ($def->{unique}) {
316     unless ($self->{has_link_type}->{$word}) {
317     $self->{has_link_type}->{$word} = 1;
318     } else {
319     $self->{onerror}->(node => $attr,
320     type => 'link type:duplicate:'.$word);
321     }
322     }
323     } else {
324     $self->{onerror}->(node => $attr, level => 'unsupported',
325     type => 'link type:'.$word);
326     }
327     }
328 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
329     if $word{alternate} and not $word{stylesheet};
330 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
331     ## says that using both X-Pingback: header field and HTML
332     ## <link rel=pingback> is deprecated and if both appears they
333     ## SHOULD contain exactly the same value.
334     ## ISSUE: Pingback 1.0 specification defines the exact representation
335     ## of its link element, which cannot be tested by the current arch.
336     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
337     ## include any string that matches to the pattern for the rel=pingback link,
338     ## which again inpossible to test.
339     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
340 wakaba 1.12
341     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
342 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
343     ## then they SHOULD be described in different paragraphs.".
344 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
345 wakaba 1.20
346     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
347 wakaba 1.1
348     ## URI (or IRI)
349     my $HTMLURIAttrChecker = sub {
350     my ($self, $attr) = @_;
351     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
352     my $value = $attr->value;
353     Whatpm::URIChecker->check_iri_reference ($value, sub {
354     my %opt = @_;
355     $self->{onerror}->(node => $attr, level => $opt{level},
356     type => 'URI::'.$opt{type}.
357     (defined $opt{position} ? ':'.$opt{position} : ''));
358     });
359 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
360 wakaba 1.1 }; # $HTMLURIAttrChecker
361    
362     ## A space separated list of one or more URIs (or IRIs)
363     my $HTMLSpaceURIsAttrChecker = sub {
364     my ($self, $attr) = @_;
365     my $i = 0;
366     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
367     Whatpm::URIChecker->check_iri_reference ($value, sub {
368     my %opt = @_;
369     $self->{onerror}->(node => $attr, level => $opt{level},
370 wakaba 1.2 type => 'URIs:'.':'.
371     $opt{type}.':'.$i.
372 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
373     });
374     $i++;
375     }
376     ## ISSUE: Relative references?
377     ## ISSUE: Leading or trailing white spaces are conformant?
378     ## ISSUE: A sequence of white space characters are conformant?
379     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
380     ## NOTE: Duplication seems not an error.
381 wakaba 1.4 $self->{has_uri_attr} = 1;
382 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
383    
384     my $HTMLDatetimeAttrChecker = sub {
385     my ($self, $attr) = @_;
386     my $value = $attr->value;
387     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
388     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
389     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
390     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
391     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
392     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
393     if $d < 1 or
394     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
395     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
396     if $M == 2 and $d == 29 and
397     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
398     } else {
399     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
400     }
401     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
402     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
403     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
404     if defined $s and $s > 59;
405     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
406     if $zh > 23;
407     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
408     if $zm > 59;
409     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
410     } else {
411     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
412     }
413     }; # $HTMLDatetimeAttrChecker
414    
415     my $HTMLIntegerAttrChecker = sub {
416     my ($self, $attr) = @_;
417     my $value = $attr->value;
418     unless ($value =~ /\A-?[0-9]+\z/) {
419     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
420     }
421     }; # $HTMLIntegerAttrChecker
422    
423     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
424     my $range_check = shift;
425     return sub {
426     my ($self, $attr) = @_;
427     my $value = $attr->value;
428     if ($value =~ /\A[0-9]+\z/) {
429     unless ($range_check->($value + 0)) {
430     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
431     }
432     } else {
433     $self->{onerror}->(node => $attr,
434     type => 'nninteger:syntax error');
435     }
436     };
437     }; # $GetHTMLNonNegativeIntegerAttrChecker
438    
439     my $GetHTMLFloatingPointNumberAttrChecker = sub {
440     my $range_check = shift;
441     return sub {
442     my ($self, $attr) = @_;
443     my $value = $attr->value;
444     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
445     unless ($range_check->($value + 0)) {
446     $self->{onerror}->(node => $attr, type => 'float:out of range');
447     }
448     } else {
449     $self->{onerror}->(node => $attr,
450     type => 'float:syntax error');
451     }
452     };
453     }; # $GetHTMLFloatingPointNumberAttrChecker
454    
455     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
456     ## ISSUE: RFC 2046 does not define syntax of media types.
457     ## ISSUE: The definition of "a valid MIME type" is unknown.
458     ## Syntactical correctness?
459     my $HTMLIMTAttrChecker = sub {
460     my ($self, $attr) = @_;
461     my $value = $attr->value;
462     ## ISSUE: RFC 2045 Content-Type header field allows insertion
463     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
464     ## ISSUE: RFC 2231 extension? Maybe no.
465     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
466     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
467     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
468     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
469     my @type = ($1, $2);
470     my $param = $3;
471     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
472     if (defined $2) {
473     push @type, $1 => $2;
474     } else {
475     my $n = $1;
476     my $v = $2;
477     $v =~ s/\\(.)/$1/gs;
478     push @type, $n => $v;
479     }
480     }
481     require Whatpm::IMTChecker;
482     Whatpm::IMTChecker->check_imt (sub {
483     my %opt = @_;
484     $self->{onerror}->(node => $attr, level => $opt{level},
485     type => 'IMT:'.$opt{type});
486     }, @type);
487     } else {
488     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
489     }
490     }; # $HTMLIMTAttrChecker
491    
492     my $HTMLLanguageTagAttrChecker = sub {
493 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
494    
495 wakaba 1.1 my ($self, $attr) = @_;
496 wakaba 1.6 my $value = $attr->value;
497     require Whatpm::LangTag;
498     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
499     my %opt = @_;
500     my $type = 'LangTag:'.$opt{type};
501     $type .= ':' . $opt{subtag} if defined $opt{subtag};
502     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
503     level => $opt{level});
504     });
505 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
506 wakaba 1.6
507     ## TODO: testdata
508 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
509    
510     ## "A valid media query [MQ]"
511     my $HTMLMQAttrChecker = sub {
512     my ($self, $attr) = @_;
513     $self->{onerror}->(node => $attr, level => 'unsupported',
514     type => 'media query');
515     ## ISSUE: What is "a valid media query"?
516     }; # $HTMLMQAttrChecker
517    
518     my $HTMLEventHandlerAttrChecker = sub {
519     my ($self, $attr) = @_;
520     $self->{onerror}->(node => $attr, level => 'unsupported',
521     type => 'event handler');
522     ## TODO: MUST contain valid ECMAScript code matching the
523     ## ECMAScript |FunctionBody| production. [ECMA262]
524     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
525     ## ISSUE: Automatic semicolon insertion does not apply?
526     ## ISSUE: Other script languages?
527     }; # $HTMLEventHandlerAttrChecker
528    
529     my $HTMLUsemapAttrChecker = sub {
530     my ($self, $attr) = @_;
531     ## MUST be a valid hashed ID reference to a |map| element
532     my $value = $attr->value;
533     if ($value =~ s/^#//) {
534     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
535     push @{$self->{usemap}}, [$value => $attr];
536     } else {
537     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
538     }
539     ## NOTE: Space characters in hashed ID references are conforming.
540     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
541     }; # $HTMLUsemapAttrChecker
542    
543     my $HTMLTargetAttrChecker = sub {
544     my ($self, $attr) = @_;
545     my $value = $attr->value;
546     if ($value =~ /^_/) {
547     $value = lc $value; ## ISSUE: ASCII case-insentitive?
548     unless ({
549     _self => 1, _parent => 1, _top => 1,
550     }->{$value}) {
551     $self->{onerror}->(node => $attr,
552     type => 'reserved browsing context name');
553     }
554     } else {
555 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
556 wakaba 1.1 }
557     }; # $HTMLTargetAttrChecker
558    
559 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
560     my ($self, $attr) = @_;
561    
562     ## ISSUE: Namespace resolution?
563    
564     my $value = $attr->value;
565    
566     require Whatpm::CSS::SelectorsParser;
567     my $p = Whatpm::CSS::SelectorsParser->new;
568     $p->{pseudo_class}->{$_} = 1 for qw/
569     active checked disabled empty enabled first-child first-of-type
570     focus hover indeterminate last-child last-of-type link only-child
571     only-of-type root target visited
572     lang nth-child nth-last-child nth-of-type nth-last-of-type not
573     -manakai-contains -manakai-current
574     /;
575    
576     $p->{pseudo_element}->{$_} = 1 for qw/
577     after before first-letter first-line
578     /;
579    
580     $p->{must_level} = $self->{must_level};
581     $p->{onerror} = sub {
582     my %opt = @_;
583     $opt{type} = 'selectors:'.$opt{type};
584     $self->{onerror}->(%opt, node => $attr);
585     };
586     $p->parse_string ($value);
587     }; # $HTMLSelectorsAttrChecker
588    
589 wakaba 1.1 my $HTMLAttrChecker = {
590 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
591 wakaba 1.1 id => sub {
592     ## NOTE: |map| has its own variant of |id=""| checker
593     my ($self, $attr) = @_;
594     my $value = $attr->value;
595     if (length $value > 0) {
596     if ($self->{id}->{$value}) {
597     $self->{onerror}->(node => $attr, type => 'duplicate ID');
598     push @{$self->{id}->{$value}}, $attr;
599     } else {
600     $self->{id}->{$value} = [$attr];
601     }
602     if ($value =~ /[\x09-\x0D\x20]/) {
603     $self->{onerror}->(node => $attr, type => 'space in ID');
604     }
605     } else {
606     ## NOTE: MUST contain at least one character
607     $self->{onerror}->(node => $attr, type => 'empty attribute value');
608     }
609     },
610     title => sub {}, ## NOTE: No conformance creteria
611     lang => sub {
612     my ($self, $attr) = @_;
613 wakaba 1.6 my $value = $attr->value;
614     if ($value eq '') {
615     #
616     } else {
617     require Whatpm::LangTag;
618     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
619     my %opt = @_;
620     my $type = 'LangTag:'.$opt{type};
621     $type .= ':' . $opt{subtag} if defined $opt{subtag};
622     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
623     level => $opt{level});
624     });
625     }
626 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
627     unless ($attr->owner_document->manakai_is_html) {
628     $self->{onerror}->(node => $attr, type => 'in XML:lang');
629     }
630 wakaba 1.6
631     ## TODO: test data
632 wakaba 1.1 },
633     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
634     class => sub {
635     my ($self, $attr) = @_;
636     my %word;
637     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
638     unless ($word{$word}) {
639     $word{$word} = 1;
640     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
641     } else {
642     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
643     }
644     }
645     },
646     contextmenu => sub {
647     my ($self, $attr) = @_;
648     my $value = $attr->value;
649     push @{$self->{contextmenu}}, [$value => $attr];
650     ## ISSUE: "The value must be the ID of a menu element in the DOM."
651     ## What is "in the DOM"? A menu Element node that is not part
652     ## of the Document tree is in the DOM? A menu Element node that
653     ## belong to another Document tree is in the DOM?
654     },
655 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
656 wakaba 1.56 ## TODO: repeat, repeat-start, repeat-min, repeat-max, repeat-template ## TODO: global
657 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
658 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
659     ## TODO: ref, template, registrationmark
660 wakaba 1.1 };
661    
662 wakaba 1.49 my %HTMLAttrStatus = (
663 wakaba 1.50 class => FEATURE_HTML5_DEFAULT,
664     contenteditable => FEATURE_HTML5_DEFAULT,
665     contextmenu => FEATURE_HTML5_WD,
666     dir => FEATURE_HTML5_DEFAULT,
667     draggable => FEATURE_HTML5_LC,
668     id => FEATURE_HTML5_DEFAULT,
669     irrelevant => FEATURE_HTML5_WD,
670     lang => FEATURE_HTML5_DEFAULT,
671     ref => FEATURE_HTML5_AT_RISK,
672     registrationmark => FEATURE_HTML5_AT_RISK,
673 wakaba 1.60 repeat => FEATURE_WF2,
674     'repeat-max' => FEATURE_WF2,
675     'repeat-min' => FEATURE_WF2,
676     'repeat-start' => FEATURE_WF2,
677     'repeat-template' => FEATURE_WF2,
678 wakaba 1.58 role => FEATURE_HTML5_ROLE,
679 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
680     template => FEATURE_HTML5_AT_RISK,
681     title => FEATURE_HTML5_DEFAULT,
682 wakaba 1.49 );
683    
684     my %HTMLM12NCommonAttrStatus = (
685 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
686     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
687     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
688     onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
689     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
690     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
691     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
692     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
693     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
694     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
695     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
696     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
697     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
698 wakaba 1.55 style => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
699     FEATURE_M12N10_REC,
700 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
701 wakaba 1.49 );
702    
703 wakaba 1.1 for (qw/
704     onabort onbeforeunload onblur onchange onclick oncontextmenu
705     ondblclick ondrag ondragend ondragenter ondragleave ondragover
706     ondragstart ondrop onerror onfocus onkeydown onkeypress
707     onkeyup onload onmessage onmousedown onmousemove onmouseout
708     onmouseover onmouseup onmousewheel onresize onscroll onselect
709     onsubmit onunload
710     /) {
711     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
712 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
713 wakaba 1.1 }
714    
715     my $GetHTMLAttrsChecker = sub {
716     my $element_specific_checker = shift;
717 wakaba 1.49 my $element_specific_status = shift;
718 wakaba 1.1 return sub {
719 wakaba 1.40 my ($self, $item, $element_state) = @_;
720     for my $attr (@{$item->{node}->attributes}) {
721 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
722     $attr_ns = '' unless defined $attr_ns;
723     my $attr_ln = $attr->manakai_local_name;
724     my $checker;
725     if ($attr_ns eq '') {
726     $checker = $element_specific_checker->{$attr_ln}
727 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
728 wakaba 1.1 }
729     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
730 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
731 wakaba 1.1 if ($checker) {
732 wakaba 1.40 $checker->($self, $attr, $item);
733 wakaba 1.49 } elsif ($attr_ns eq '') {
734 wakaba 1.54 #
735 wakaba 1.1 } else {
736     $self->{onerror}->(node => $attr, level => 'unsupported',
737     type => 'attribute');
738 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
739     }
740     if ($attr_ns eq '') {
741     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
742 wakaba 1.1 }
743 wakaba 1.49 ## TODO: global attribute
744 wakaba 1.1 }
745     };
746     }; # $GetHTMLAttrsChecker
747    
748 wakaba 1.40 my %HTMLChecker = (
749     %Whatpm::ContentChecker::AnyChecker,
750 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
751 wakaba 1.40 );
752    
753     my %HTMLEmptyChecker = (
754     %HTMLChecker,
755     check_child_element => sub {
756     my ($self, $item, $child_el, $child_nsuri, $child_ln,
757     $child_is_transparent, $element_state) = @_;
758     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
759     $self->{onerror}->(node => $child_el,
760     type => 'element not allowed:minus',
761     level => $self->{must_level});
762     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
763     #
764     } else {
765     $self->{onerror}->(node => $child_el,
766     type => 'element not allowed:empty',
767     level => $self->{must_level});
768     }
769     },
770     check_child_text => sub {
771     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
772     if ($has_significant) {
773     $self->{onerror}->(node => $child_node,
774     type => 'character not allowed:empty',
775     level => $self->{must_level});
776     }
777     },
778     );
779    
780     my %HTMLTextChecker = (
781     %HTMLChecker,
782     check_child_element => sub {
783     my ($self, $item, $child_el, $child_nsuri, $child_ln,
784     $child_is_transparent, $element_state) = @_;
785     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
786     $self->{onerror}->(node => $child_el,
787     type => 'element not allowed:minus',
788     level => $self->{must_level});
789     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
790     #
791     } else {
792     $self->{onerror}->(node => $child_el, type => 'element not allowed');
793     }
794     },
795     );
796    
797 wakaba 1.58 ## TODO: Rename as "FlowContent" (HTML5 revision 1261)
798 wakaba 1.40 my %HTMLProseContentChecker = (
799     %HTMLChecker,
800     check_child_element => sub {
801     my ($self, $item, $child_el, $child_nsuri, $child_ln,
802     $child_is_transparent, $element_state) = @_;
803     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
804     $self->{onerror}->(node => $child_el,
805     type => 'element not allowed:minus',
806     level => $self->{must_level});
807     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
808     #
809     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
810     if ($element_state->{has_non_style} or
811     not $child_el->has_attribute_ns (undef, 'scoped')) {
812     $self->{onerror}->(node => $child_el,
813     type => 'element not allowed:prose style',
814     level => $self->{must_level});
815     }
816     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
817 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
818 wakaba 1.40 } else {
819     $element_state->{has_non_style} = 1;
820     $self->{onerror}->(node => $child_el,
821     type => 'element not allowed:prose',
822     level => $self->{must_level})
823     }
824     },
825     check_child_text => sub {
826     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
827     if ($has_significant) {
828     $element_state->{has_non_style} = 1;
829     }
830     },
831     check_end => sub {
832     my ($self, $item, $element_state) = @_;
833     if ($element_state->{has_significant}) {
834 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
835 wakaba 1.40 } elsif ($item->{transparent}) {
836     #
837     } else {
838     $self->{onerror}->(node => $item->{node},
839     level => $self->{should_level},
840     type => 'no significant content');
841     }
842     },
843     );
844    
845     my %HTMLPhrasingContentChecker = (
846     %HTMLChecker,
847     check_child_element => sub {
848     my ($self, $item, $child_el, $child_nsuri, $child_ln,
849     $child_is_transparent, $element_state) = @_;
850     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
851     $self->{onerror}->(node => $child_el,
852     type => 'element not allowed:minus',
853     level => $self->{must_level});
854     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
855     #
856     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
857     #
858     } else {
859     $self->{onerror}->(node => $child_el,
860     type => 'element not allowed:phrasing',
861     level => $self->{must_level});
862     }
863     },
864     check_end => $HTMLProseContentChecker{check_end},
865     ## NOTE: The definition for |li| assumes that the only differences
866     ## between prose and phrasing content checkers are |check_child_element|
867     ## and |check_child_text|.
868     );
869    
870     my %HTMLTransparentChecker = %HTMLProseContentChecker;
871     ## ISSUE: Significant content rule should be applied to transparent element
872 wakaba 1.46 ## with parent?
873 wakaba 1.40
874 wakaba 1.1 our $Element;
875     our $ElementDefault;
876    
877     $Element->{$HTML_NS}->{''} = {
878 wakaba 1.40 %HTMLChecker,
879 wakaba 1.1 };
880    
881     $Element->{$HTML_NS}->{html} = {
882 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
883 wakaba 1.1 is_root => 1,
884 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
885 wakaba 1.16 manifest => $HTMLURIAttrChecker,
886 wakaba 1.1 xmlns => sub {
887     my ($self, $attr) = @_;
888     my $value = $attr->value;
889     unless ($value eq $HTML_NS) {
890     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
891     }
892     unless ($attr->owner_document->manakai_is_html) {
893     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
894     ## TODO: Test
895     }
896     },
897 wakaba 1.49 }, {
898     %HTMLAttrStatus,
899 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
900     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
901     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
902     manifest => FEATURE_HTML5_DEFAULT,
903 wakaba 1.49 version => FEATURE_M12N10_REC,
904 wakaba 1.50 xmlns => FEATURE_HTML5_DEFAULT,
905 wakaba 1.1 }),
906 wakaba 1.40 check_start => sub {
907     my ($self, $item, $element_state) = @_;
908     $element_state->{phase} = 'before head';
909     },
910     check_child_element => sub {
911     my ($self, $item, $child_el, $child_nsuri, $child_ln,
912     $child_is_transparent, $element_state) = @_;
913     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
914     $self->{onerror}->(node => $child_el,
915     type => 'element not allowed:minus',
916     level => $self->{must_level});
917     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
918     #
919     } elsif ($element_state->{phase} eq 'before head') {
920     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
921     $element_state->{phase} = 'after head';
922     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
923     $self->{onerror}->(node => $child_el,
924     type => 'ps element missing:head');
925     $element_state->{phase} = 'after body';
926     } else {
927     $self->{onerror}->(node => $child_el,
928     type => 'element not allowed');
929     }
930     } elsif ($element_state->{phase} eq 'after head') {
931     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
932     $element_state->{phase} = 'after body';
933     } else {
934     $self->{onerror}->(node => $child_el,
935     type => 'element not allowed');
936     }
937     } elsif ($element_state->{phase} eq 'after body') {
938     $self->{onerror}->(node => $child_el,
939     type => 'element not allowed');
940     } else {
941     die "check_child_element: Bad |html| phase: $element_state->{phase}";
942     }
943     },
944     check_child_text => sub {
945     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
946     if ($has_significant) {
947     $self->{onerror}->(node => $child_node,
948     type => 'character not allowed');
949     }
950     },
951     check_end => sub {
952     my ($self, $item, $element_state) = @_;
953     if ($element_state->{phase} eq 'after body') {
954     #
955     } elsif ($element_state->{phase} eq 'before head') {
956     $self->{onerror}->(node => $item->{node},
957     type => 'child element missing:head');
958     $self->{onerror}->(node => $item->{node},
959     type => 'child element missing:body');
960     } elsif ($element_state->{phase} eq 'after head') {
961     $self->{onerror}->(node => $item->{node},
962     type => 'child element missing:body');
963     } else {
964     die "check_end: Bad |html| phase: $element_state->{phase}";
965     }
966 wakaba 1.1
967 wakaba 1.40 $HTMLChecker{check_end}->(@_);
968     },
969     };
970 wakaba 1.25
971 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
972 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
973     check_attrs => $GetHTMLAttrsChecker->({}, {
974     %HTMLAttrStatus,
975 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
976     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
977     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
978 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
979     }),
980 wakaba 1.40 check_child_element => sub {
981     my ($self, $item, $child_el, $child_nsuri, $child_ln,
982     $child_is_transparent, $element_state) = @_;
983     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
984     $self->{onerror}->(node => $child_el,
985     type => 'element not allowed:minus',
986     level => $self->{must_level});
987     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
988     #
989     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
990     unless ($element_state->{has_title}) {
991     $element_state->{has_title} = 1;
992     } else {
993     $self->{onerror}->(node => $child_el,
994     type => 'element not allowed:head title',
995     level => $self->{must_level});
996     }
997     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
998     if ($child_el->has_attribute_ns (undef, 'scoped')) {
999     $self->{onerror}->(node => $child_el,
1000     type => 'element not allowed:head style',
1001     level => $self->{must_level});
1002 wakaba 1.1 }
1003 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1004     #
1005    
1006     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1007     ## a |meta| element with none of |charset|, |name|,
1008     ## or |http-equiv| attribute is not allowed. It is non-conforming
1009     ## anyway.
1010 wakaba 1.56
1011     ## TODO: |form| MUST be empty and in XML [WF2].
1012 wakaba 1.40 } else {
1013     $self->{onerror}->(node => $child_el,
1014     type => 'element not allowed:metadata',
1015     level => $self->{must_level});
1016     }
1017     $element_state->{in_head_original} = $self->{flag}->{in_head};
1018     $self->{flag}->{in_head} = 1;
1019     },
1020     check_child_text => sub {
1021     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1022     if ($has_significant) {
1023     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1024 wakaba 1.1 }
1025 wakaba 1.40 },
1026     check_end => sub {
1027     my ($self, $item, $element_state) = @_;
1028     unless ($element_state->{has_title}) {
1029     $self->{onerror}->(node => $item->{node},
1030     type => 'child element missing:title');
1031 wakaba 1.1 }
1032 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1033 wakaba 1.1
1034 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1035 wakaba 1.1 },
1036     };
1037    
1038 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1039     %HTMLTextChecker,
1040 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1041     check_attrs => $GetHTMLAttrsChecker->({}, {
1042     %HTMLAttrStatus,
1043 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1044     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1045     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1046 wakaba 1.49 }),
1047 wakaba 1.40 };
1048 wakaba 1.1
1049 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1050 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1051 wakaba 1.40 %HTMLEmptyChecker,
1052     check_attrs => sub {
1053     my ($self, $item, $element_state) = @_;
1054 wakaba 1.1
1055 wakaba 1.40 if ($self->{has_base}) {
1056     $self->{onerror}->(node => $item->{node},
1057     type => 'element not allowed:base');
1058     } else {
1059     $self->{has_base} = 1;
1060 wakaba 1.29 }
1061    
1062 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1063     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1064 wakaba 1.14
1065     if ($self->{has_uri_attr} and $has_href) {
1066 wakaba 1.4 ## ISSUE: Are these examples conforming?
1067     ## <head profile="a b c"><base href> (except for |profile|'s
1068     ## non-conformance)
1069     ## <title xml:base="relative"/><base href/> (maybe it should be)
1070     ## <unknown xmlns="relative"/><base href/> (assuming that
1071     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1072     ## <style>@import 'relative';</style><base href>
1073     ## <script>location.href = 'relative';</script><base href>
1074 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1075     ## an exception.
1076 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1077 wakaba 1.4 type => 'basehref after URI attribute');
1078     }
1079 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1080 wakaba 1.4 ## ISSUE: Are these examples conforming?
1081     ## <head><title xlink:href=""/><base target="name"/></head>
1082     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1083     ## (assuming that |xbl:xbl| is allowed before |base|)
1084     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1085     ## <link href=""/><base target="name"/>
1086     ## <link rel=unknown href=""><base target=name>
1087 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1088 wakaba 1.4 type => 'basetarget after hyperlink');
1089     }
1090    
1091 wakaba 1.14 if (not $has_href and not $has_target) {
1092 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1093 wakaba 1.14 type => 'attribute missing:href|target');
1094     }
1095    
1096 wakaba 1.4 return $GetHTMLAttrsChecker->({
1097     href => $HTMLURIAttrChecker,
1098     target => $HTMLTargetAttrChecker,
1099 wakaba 1.49 }, {
1100     %HTMLAttrStatus,
1101 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1102     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1103     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1104 wakaba 1.40 })->($self, $item, $element_state);
1105 wakaba 1.4 },
1106 wakaba 1.1 };
1107    
1108     $Element->{$HTML_NS}->{link} = {
1109 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1110 wakaba 1.40 %HTMLEmptyChecker,
1111     check_attrs => sub {
1112     my ($self, $item, $element_state) = @_;
1113 wakaba 1.1 $GetHTMLAttrsChecker->({
1114     href => $HTMLURIAttrChecker,
1115 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
1116 wakaba 1.1 media => $HTMLMQAttrChecker,
1117     hreflang => $HTMLLanguageTagAttrChecker,
1118     type => $HTMLIMTAttrChecker,
1119     ## NOTE: Though |title| has special semantics,
1120     ## syntactically same as the |title| as global attribute.
1121 wakaba 1.49 }, {
1122     %HTMLAttrStatus,
1123     %HTMLM12NCommonAttrStatus,
1124     charset => FEATURE_M12N10_REC,
1125 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1126     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1127     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1128     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1129     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1130 wakaba 1.49 rev => FEATURE_M12N10_REC,
1131     target => FEATURE_M12N10_REC,
1132 wakaba 1.50 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1133 wakaba 1.40 })->($self, $item, $element_state);
1134     if ($item->{node}->has_attribute_ns (undef, 'href')) {
1135     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
1136 wakaba 1.4 } else {
1137 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1138 wakaba 1.1 type => 'attribute missing:href');
1139     }
1140 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
1141     $self->{onerror}->(node => $item->{node},
1142 wakaba 1.1 type => 'attribute missing:rel');
1143     }
1144     },
1145     };
1146    
1147     $Element->{$HTML_NS}->{meta} = {
1148 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1149 wakaba 1.40 %HTMLEmptyChecker,
1150     check_attrs => sub {
1151     my ($self, $item, $element_state) = @_;
1152 wakaba 1.1 my $name_attr;
1153     my $http_equiv_attr;
1154     my $charset_attr;
1155     my $content_attr;
1156 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1157 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1158     $attr_ns = '' unless defined $attr_ns;
1159     my $attr_ln = $attr->manakai_local_name;
1160     my $checker;
1161     if ($attr_ns eq '') {
1162     if ($attr_ln eq 'content') {
1163     $content_attr = $attr;
1164     $checker = 1;
1165     } elsif ($attr_ln eq 'name') {
1166     $name_attr = $attr;
1167     $checker = 1;
1168     } elsif ($attr_ln eq 'http-equiv') {
1169     $http_equiv_attr = $attr;
1170     $checker = 1;
1171     } elsif ($attr_ln eq 'charset') {
1172     $charset_attr = $attr;
1173     $checker = 1;
1174     } else {
1175     $checker = $HTMLAttrChecker->{$attr_ln}
1176     || $AttrChecker->{$attr_ns}->{$attr_ln}
1177     || $AttrChecker->{$attr_ns}->{''};
1178     }
1179     } else {
1180     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1181     || $AttrChecker->{$attr_ns}->{''};
1182     }
1183     if ($checker) {
1184     $checker->($self, $attr) if ref $checker;
1185 wakaba 1.49 } elsif ($attr_ns eq '') {
1186 wakaba 1.54 #
1187 wakaba 1.1 } else {
1188     $self->{onerror}->(node => $attr, level => 'unsupported',
1189     type => 'attribute');
1190 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1191     }
1192    
1193     if ($attr_ns eq '') {
1194     $self->_attr_status_info ($attr, {
1195     %HTMLAttrStatus,
1196 wakaba 1.50 charset => FEATURE_HTML5_DEFAULT,
1197     content => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1198     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1199     'http-equiv' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1200     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1201     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1202     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1203 wakaba 1.49 scheme => FEATURE_M12N10_REC,
1204     }->{$attr_ln});
1205 wakaba 1.1 }
1206     }
1207    
1208     if (defined $name_attr) {
1209     if (defined $http_equiv_attr) {
1210     $self->{onerror}->(node => $http_equiv_attr,
1211     type => 'attribute not allowed');
1212     } elsif (defined $charset_attr) {
1213     $self->{onerror}->(node => $charset_attr,
1214     type => 'attribute not allowed');
1215     }
1216     my $metadata_name = $name_attr->value;
1217     my $metadata_value;
1218     if (defined $content_attr) {
1219     $metadata_value = $content_attr->value;
1220     } else {
1221 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1222 wakaba 1.1 type => 'attribute missing:content');
1223     $metadata_value = '';
1224     }
1225     } elsif (defined $http_equiv_attr) {
1226     if (defined $charset_attr) {
1227     $self->{onerror}->(node => $charset_attr,
1228     type => 'attribute not allowed');
1229     }
1230     unless (defined $content_attr) {
1231 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1232 wakaba 1.1 type => 'attribute missing:content');
1233     }
1234     } elsif (defined $charset_attr) {
1235     if (defined $content_attr) {
1236     $self->{onerror}->(node => $content_attr,
1237     type => 'attribute not allowed');
1238     }
1239     } else {
1240     if (defined $content_attr) {
1241     $self->{onerror}->(node => $content_attr,
1242     type => 'attribute not allowed');
1243 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1244 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1245     } else {
1246 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1247 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1248     }
1249     }
1250    
1251 wakaba 1.32 my $check_charset_decl = sub () {
1252 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1253 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1254     for my $el (@{$parent->child_nodes}) {
1255     next unless $el->node_type == 1; # ELEMENT_NODE
1256 wakaba 1.40 unless ($el eq $item->{node}) {
1257 wakaba 1.29 ## NOTE: Not the first child element.
1258 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1259 wakaba 1.32 type => 'element not allowed:meta charset',
1260     level => $self->{must_level});
1261 wakaba 1.29 }
1262     last;
1263     ## NOTE: Entity references are not supported.
1264     }
1265     } else {
1266 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1267 wakaba 1.32 type => 'element not allowed:meta charset',
1268     level => $self->{must_level});
1269 wakaba 1.29 }
1270    
1271 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1272     $self->{onerror}->(node => $item->{node},
1273 wakaba 1.32 type => 'in XML:charset',
1274     level => $self->{must_level});
1275 wakaba 1.1 }
1276 wakaba 1.32 }; # $check_charset_decl
1277 wakaba 1.21
1278 wakaba 1.32 my $check_charset = sub ($$) {
1279     my ($attr, $charset_value) = @_;
1280 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1281     ## is not explicitly spelled in the HTML5 spec, the Character Set
1282     ## registry of IANA, which is referenced from HTML5 spec, says that
1283     ## charset name is case-insensitive.
1284     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1285    
1286     require Message::Charset::Info;
1287     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1288 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1289 wakaba 1.21 if (defined $ic) {
1290     ## TODO: Test for this case
1291     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1292     if ($charset ne $ic_charset) {
1293 wakaba 1.32 $self->{onerror}->(node => $attr,
1294 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1295 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1296     level => $self->{must_level});
1297 wakaba 1.21 }
1298     } else {
1299     ## NOTE: MUST, but not checkable, since the document is not originally
1300     ## in serialized form (or the parser does not preserve the input
1301     ## encoding information).
1302 wakaba 1.32 $self->{onerror}->(node => $attr,
1303     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1304 wakaba 1.21 level => 'unsupported');
1305     }
1306    
1307     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1308     ## Syntactically valid and registered? What about x-charset names?
1309     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1310     ($charset_value)) {
1311 wakaba 1.32 $self->{onerror}->(node => $attr,
1312     type => 'charset:syntax error:'.$charset_value, ## TODO
1313     level => $self->{must_level});
1314 wakaba 1.21 }
1315    
1316     if ($charset) {
1317     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1318     ## with no "preferred MIME name" label)?
1319     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1320     if (($charset_status &
1321     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1322     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1323 wakaba 1.32 $self->{onerror}->(node => $attr,
1324 wakaba 1.21 type => 'charset:not preferred:'.
1325 wakaba 1.32 $charset_value, ## TODO
1326     level => $self->{must_level});
1327 wakaba 1.21 }
1328     if (($charset_status &
1329     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1330     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1331     if ($charset_value =~ /^x-/) {
1332 wakaba 1.32 $self->{onerror}->(node => $attr,
1333     type => 'charset:private:'.$charset_value, ## TODO
1334 wakaba 1.21 level => $self->{good_level});
1335     } else {
1336 wakaba 1.32 $self->{onerror}->(node => $attr,
1337 wakaba 1.21 type => 'charset:not registered:'.
1338 wakaba 1.32 $charset_value, ## TODO
1339 wakaba 1.21 level => $self->{good_level});
1340     }
1341     }
1342     } elsif ($charset_value =~ /^x-/) {
1343 wakaba 1.32 $self->{onerror}->(node => $attr,
1344     type => 'charset:private:'.$charset_value, ## TODO
1345 wakaba 1.21 level => $self->{good_level});
1346     } else {
1347 wakaba 1.32 $self->{onerror}->(node => $attr,
1348     type => 'charset:not registered:'.$charset_value, ## TODO
1349 wakaba 1.21 level => $self->{good_level});
1350     }
1351    
1352 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1353     $self->{onerror}->(node => $attr,
1354 wakaba 1.22 type => 'character reference in charset',
1355     level => $self->{must_level});
1356     }
1357 wakaba 1.32 }; # $check_charset
1358    
1359     ## TODO: metadata conformance
1360    
1361     ## TODO: pragma conformance
1362     if (defined $http_equiv_attr) { ## An enumerated attribute
1363     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1364     if ({
1365     'refresh' => 1,
1366     'default-style' => 1,
1367     }->{$keyword}) {
1368     #
1369 wakaba 1.33
1370     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1371 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1372 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
1373 wakaba 1.33
1374 wakaba 1.32 $check_charset_decl->();
1375     if ($content_attr) {
1376     my $content = $content_attr->value;
1377 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
1378     [\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
1379     =(.+)\z!sx) {
1380 wakaba 1.32 $check_charset->($content_attr, $1);
1381     } else {
1382     $self->{onerror}->(node => $content_attr,
1383     type => 'meta content-type syntax error',
1384     level => $self->{must_level});
1385     }
1386     }
1387     } else {
1388     $self->{onerror}->(node => $http_equiv_attr,
1389     type => 'enumerated:invalid');
1390     }
1391     }
1392    
1393     if (defined $charset_attr) {
1394     $check_charset_decl->();
1395     $check_charset->($charset_attr, $charset_attr->value);
1396 wakaba 1.1 }
1397     },
1398     };
1399    
1400     $Element->{$HTML_NS}->{style} = {
1401 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1402 wakaba 1.40 %HTMLChecker,
1403     check_attrs => $GetHTMLAttrsChecker->({
1404 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1405     media => $HTMLMQAttrChecker,
1406     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1407     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1408     ## not different
1409 wakaba 1.49 }, {
1410     %HTMLAttrStatus,
1411 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1412     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1413     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1414     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1415     scoped => FEATURE_HTML5_DEFAULT,
1416     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1417     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1418 wakaba 1.1 }),
1419 wakaba 1.40 check_start => sub {
1420     my ($self, $item, $element_state) = @_;
1421    
1422 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1423 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1424 wakaba 1.27 if (not defined $type or
1425     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1426 wakaba 1.40 $element_state->{allow_element} = 0;
1427     $element_state->{style_type} = 'text/css';
1428     } else {
1429     $element_state->{allow_element} = 1; # unknown
1430     $element_state->{style_type} = $type; ## TODO: $type normalization
1431     }
1432     },
1433     check_child_element => sub {
1434     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1435     $child_is_transparent, $element_state) = @_;
1436     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1437     $self->{onerror}->(node => $child_el,
1438     type => 'element not allowed:minus',
1439     level => $self->{must_level});
1440     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1441     #
1442     } elsif ($element_state->{allow_element}) {
1443     #
1444     } else {
1445     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1446     }
1447     },
1448     check_child_text => sub {
1449     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1450     $element_state->{text} .= $child_node->text_content;
1451     },
1452     check_end => sub {
1453     my ($self, $item, $element_state) = @_;
1454     if ($element_state->{style_type} eq 'text/css') {
1455     $self->{onsubdoc}->({s => $element_state->{text},
1456     container_node => $item->{node},
1457 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1458 wakaba 1.27 } else {
1459 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1460     type => 'style:'.$element_state->{style_type});
1461 wakaba 1.27 }
1462 wakaba 1.40
1463     $HTMLChecker{check_end}->(@_);
1464 wakaba 1.1 },
1465     };
1466 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1467 wakaba 1.1
1468     $Element->{$HTML_NS}->{body} = {
1469 wakaba 1.40 %HTMLProseContentChecker,
1470 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1471     check_attrs => $GetHTMLAttrsChecker->({}, {
1472     %HTMLAttrStatus,
1473     %HTMLM12NCommonAttrStatus,
1474     alink => FEATURE_M12N10_REC_DEPRECATED,
1475     background => FEATURE_M12N10_REC_DEPRECATED,
1476     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
1477 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1478 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
1479 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1480     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1481 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
1482     vlink => FEATURE_M12N10_REC_DEPRECATED,
1483     }),
1484 wakaba 1.1 };
1485    
1486     $Element->{$HTML_NS}->{section} = {
1487 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1488 wakaba 1.40 %HTMLProseContentChecker,
1489 wakaba 1.1 };
1490    
1491     $Element->{$HTML_NS}->{nav} = {
1492 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1493 wakaba 1.40 %HTMLProseContentChecker,
1494 wakaba 1.1 };
1495    
1496     $Element->{$HTML_NS}->{article} = {
1497 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1498 wakaba 1.40 %HTMLProseContentChecker,
1499 wakaba 1.1 };
1500    
1501     $Element->{$HTML_NS}->{blockquote} = {
1502 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1503 wakaba 1.40 %HTMLProseContentChecker,
1504     check_attrs => $GetHTMLAttrsChecker->({
1505 wakaba 1.1 cite => $HTMLURIAttrChecker,
1506 wakaba 1.49 }, {
1507     %HTMLAttrStatus,
1508     %HTMLM12NCommonAttrStatus,
1509 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1510     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1511 wakaba 1.1 }),
1512     };
1513    
1514     $Element->{$HTML_NS}->{aside} = {
1515 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1516 wakaba 1.40 %HTMLProseContentChecker,
1517 wakaba 1.1 };
1518    
1519     $Element->{$HTML_NS}->{h1} = {
1520 wakaba 1.40 %HTMLPhrasingContentChecker,
1521 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1522     check_attrs => $GetHTMLAttrsChecker->({}, {
1523     %HTMLAttrStatus,
1524     %HTMLM12NCommonAttrStatus,
1525     align => FEATURE_M12N10_REC_DEPRECATED,
1526 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1527 wakaba 1.49 }),
1528 wakaba 1.40 check_start => sub {
1529     my ($self, $item, $element_state) = @_;
1530     $self->{flag}->{has_hn} = 1;
1531 wakaba 1.1 },
1532     };
1533    
1534 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1535 wakaba 1.1
1536 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1537 wakaba 1.1
1538 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1539 wakaba 1.1
1540 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1541 wakaba 1.1
1542 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1543 wakaba 1.1
1544 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1545    
1546 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1547 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1548 wakaba 1.40 %HTMLProseContentChecker,
1549     check_start => sub {
1550     my ($self, $item, $element_state) = @_;
1551     $self->_add_minus_elements ($element_state,
1552     {$HTML_NS => {qw/header 1 footer 1/}},
1553 wakaba 1.58 $HTMLSectioningContent);
1554 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1555     $self->{flag}->{has_hn} = 0;
1556     },
1557     check_end => sub {
1558     my ($self, $item, $element_state) = @_;
1559     $self->_remove_minus_elements ($element_state);
1560     unless ($self->{flag}->{has_hn}) {
1561     $self->{onerror}->(node => $item->{node},
1562     type => 'element missing:hn');
1563     }
1564     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1565 wakaba 1.1
1566 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1567 wakaba 1.1 },
1568 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1569 wakaba 1.1 };
1570    
1571     $Element->{$HTML_NS}->{footer} = {
1572 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1573 wakaba 1.40 %HTMLProseContentChecker,
1574     check_start => sub {
1575     my ($self, $item, $element_state) = @_;
1576     $self->_add_minus_elements ($element_state,
1577     {$HTML_NS => {footer => 1}},
1578 wakaba 1.58 $HTMLSectioningContent,
1579 wakaba 1.57 $HTMLHeadingContent);
1580 wakaba 1.40 },
1581     check_end => sub {
1582     my ($self, $item, $element_state) = @_;
1583     $self->_remove_minus_elements ($element_state);
1584 wakaba 1.1
1585 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1586 wakaba 1.1 },
1587     };
1588    
1589     $Element->{$HTML_NS}->{address} = {
1590 wakaba 1.40 %HTMLProseContentChecker,
1591 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1592     check_attrs => $GetHTMLAttrsChecker->({}, {
1593     %HTMLAttrStatus,
1594     %HTMLM12NCommonAttrStatus,
1595 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1596 wakaba 1.49 }),
1597 wakaba 1.40 check_start => sub {
1598     my ($self, $item, $element_state) = @_;
1599     $self->_add_minus_elements ($element_state,
1600     {$HTML_NS => {footer => 1, address => 1}},
1601     $HTMLSectioningContent, $HTMLHeadingContent);
1602     },
1603     check_end => sub {
1604     my ($self, $item, $element_state) = @_;
1605     $self->_remove_minus_elements ($element_state);
1606 wakaba 1.29
1607 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1608 wakaba 1.29 },
1609 wakaba 1.1 };
1610    
1611     $Element->{$HTML_NS}->{p} = {
1612 wakaba 1.40 %HTMLPhrasingContentChecker,
1613 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1614     check_attrs => $GetHTMLAttrsChecker->({}, {
1615     %HTMLAttrStatus,
1616     %HTMLM12NCommonAttrStatus,
1617     align => FEATURE_M12N10_REC_DEPRECATED,
1618 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1619 wakaba 1.49 }),
1620 wakaba 1.1 };
1621    
1622     $Element->{$HTML_NS}->{hr} = {
1623 wakaba 1.40 %HTMLEmptyChecker,
1624 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1625     check_attrs => $GetHTMLAttrsChecker->({}, {
1626     %HTMLAttrStatus,
1627     %HTMLM12NCommonAttrStatus,
1628     align => FEATURE_M12N10_REC_DEPRECATED,
1629 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1630 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
1631     size => FEATURE_M12N10_REC_DEPRECATED,
1632     width => FEATURE_M12N10_REC_DEPRECATED,
1633     }),
1634 wakaba 1.1 };
1635    
1636     $Element->{$HTML_NS}->{br} = {
1637 wakaba 1.40 %HTMLEmptyChecker,
1638 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1639     check_attrs => $GetHTMLAttrsChecker->({}, {
1640     %HTMLAttrStatus,
1641 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1642 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
1643 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1644 wakaba 1.49 style => FEATURE_XHTML10_REC,
1645 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1646 wakaba 1.49 }),
1647 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1648     ## (This requirement is semantic so that we cannot check.)
1649 wakaba 1.1 };
1650    
1651     $Element->{$HTML_NS}->{dialog} = {
1652 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1653 wakaba 1.40 %HTMLChecker,
1654     check_start => sub {
1655     my ($self, $item, $element_state) = @_;
1656     $element_state->{phase} = 'before dt';
1657     },
1658     check_child_element => sub {
1659     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1660     $child_is_transparent, $element_state) = @_;
1661     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1662     $self->{onerror}->(node => $child_el,
1663     type => 'element not allowed:minus',
1664     level => $self->{must_level});
1665     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1666     #
1667     } elsif ($element_state->{phase} eq 'before dt') {
1668     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1669     $element_state->{phase} = 'before dd';
1670     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1671     $self->{onerror}
1672     ->(node => $child_el, type => 'ps element missing:dt');
1673     $element_state->{phase} = 'before dt';
1674     } else {
1675     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1676     }
1677     } elsif ($element_state->{phase} eq 'before dd') {
1678     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1679     $element_state->{phase} = 'before dt';
1680     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1681     $self->{onerror}
1682     ->(node => $child_el, type => 'ps element missing:dd');
1683     $element_state->{phase} = 'before dd';
1684     } else {
1685     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1686 wakaba 1.1 }
1687 wakaba 1.40 } else {
1688     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1689     }
1690     },
1691     check_child_text => sub {
1692     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1693     if ($has_significant) {
1694     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1695 wakaba 1.1 }
1696 wakaba 1.40 },
1697     check_end => sub {
1698     my ($self, $item, $element_state) = @_;
1699     if ($element_state->{phase} eq 'before dd') {
1700     $self->{onerror}->(node => $item->{node},
1701     type => 'child element missing:dd');
1702 wakaba 1.1 }
1703 wakaba 1.40
1704     $HTMLChecker{check_end}->(@_);
1705 wakaba 1.1 },
1706     };
1707    
1708     $Element->{$HTML_NS}->{pre} = {
1709 wakaba 1.40 %HTMLPhrasingContentChecker,
1710 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1711     check_attrs => $GetHTMLAttrsChecker->({}, {
1712     %HTMLAttrStatus,
1713     %HTMLM12NCommonAttrStatus,
1714 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1715 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
1716     }),
1717 wakaba 1.1 };
1718    
1719     $Element->{$HTML_NS}->{ol} = {
1720 wakaba 1.40 %HTMLChecker,
1721 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1722 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1723 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1724 wakaba 1.53 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
1725 wakaba 1.49 }, {
1726     %HTMLAttrStatus,
1727     %HTMLM12NCommonAttrStatus,
1728     compact => FEATURE_M12N10_REC_DEPRECATED,
1729 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1730 wakaba 1.53 reversed => FEATURE_HTML5_DEFAULT,
1731 wakaba 1.54 #start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
1732     start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1733 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1734 wakaba 1.1 }),
1735 wakaba 1.40 check_child_element => sub {
1736     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1737     $child_is_transparent, $element_state) = @_;
1738     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1739     $self->{onerror}->(node => $child_el,
1740     type => 'element not allowed:minus',
1741     level => $self->{must_level});
1742     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1743     #
1744     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1745     #
1746     } else {
1747     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1748 wakaba 1.1 }
1749 wakaba 1.40 },
1750     check_child_text => sub {
1751     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1752     if ($has_significant) {
1753     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1754 wakaba 1.1 }
1755     },
1756     };
1757    
1758     $Element->{$HTML_NS}->{ul} = {
1759 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1760 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1761     check_attrs => $GetHTMLAttrsChecker->({}, {
1762     %HTMLAttrStatus,
1763     %HTMLM12NCommonAttrStatus,
1764     compact => FEATURE_M12N10_REC_DEPRECATED,
1765 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1766 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1767     }),
1768 wakaba 1.1 };
1769    
1770     $Element->{$HTML_NS}->{li} = {
1771 wakaba 1.40 %HTMLProseContentChecker,
1772 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1773 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1774 wakaba 1.49 value => sub {
1775 wakaba 1.1 my ($self, $attr) = @_;
1776     my $parent = $attr->owner_element->manakai_parent_element;
1777     if (defined $parent) {
1778     my $parent_ns = $parent->namespace_uri;
1779     $parent_ns = '' unless defined $parent_ns;
1780     my $parent_ln = $parent->manakai_local_name;
1781     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1782     $self->{onerror}->(node => $attr, level => 'unsupported',
1783     type => 'attribute');
1784     }
1785     }
1786     $HTMLIntegerAttrChecker->($self, $attr);
1787 wakaba 1.49 }, ## TODO: test
1788     }, {
1789     %HTMLAttrStatus,
1790     %HTMLM12NCommonAttrStatus,
1791 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1792 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1793 wakaba 1.55 #value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
1794     # FEATURE_M12N10_REC_DEPRECATED,
1795     value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
1796     FEATURE_M12N10_REC,
1797 wakaba 1.1 }),
1798 wakaba 1.40 check_child_element => sub {
1799     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1800     $child_is_transparent, $element_state) = @_;
1801     if ($self->{flag}->{in_menu}) {
1802     $HTMLPhrasingContentChecker{check_child_element}->(@_);
1803     } else {
1804     $HTMLProseContentChecker{check_child_element}->(@_);
1805     }
1806     },
1807     check_child_text => sub {
1808     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1809     if ($self->{flag}->{in_menu}) {
1810     $HTMLPhrasingContentChecker{check_child_text}->(@_);
1811 wakaba 1.1 } else {
1812 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
1813 wakaba 1.1 }
1814     },
1815     };
1816    
1817     $Element->{$HTML_NS}->{dl} = {
1818 wakaba 1.40 %HTMLChecker,
1819 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1820     check_attrs => $GetHTMLAttrsChecker->({}, {
1821     %HTMLAttrStatus,
1822     %HTMLM12NCommonAttrStatus,
1823     compact => FEATURE_M12N10_REC_DEPRECATED,
1824 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1825 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1826     }),
1827 wakaba 1.40 check_start => sub {
1828     my ($self, $item, $element_state) = @_;
1829     $element_state->{phase} = 'before dt';
1830     },
1831     check_child_element => sub {
1832     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1833     $child_is_transparent, $element_state) = @_;
1834     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1835     $self->{onerror}->(node => $child_el,
1836     type => 'element not allowed:minus',
1837     level => $self->{must_level});
1838     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1839     #
1840     } elsif ($element_state->{phase} eq 'in dds') {
1841     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1842     #$element_state->{phase} = 'in dds';
1843     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1844     $element_state->{phase} = 'in dts';
1845     } else {
1846     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1847     }
1848     } elsif ($element_state->{phase} eq 'in dts') {
1849     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1850     #$element_state->{phase} = 'in dts';
1851     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1852     $element_state->{phase} = 'in dds';
1853     } else {
1854     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1855     }
1856     } elsif ($element_state->{phase} eq 'before dt') {
1857     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1858     $element_state->{phase} = 'in dts';
1859     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1860     $self->{onerror}
1861     ->(node => $child_el, type => 'ps element missing:dt');
1862     $element_state->{phase} = 'in dds';
1863     } else {
1864     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1865 wakaba 1.1 }
1866 wakaba 1.40 } else {
1867     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
1868 wakaba 1.1 }
1869 wakaba 1.40 },
1870     check_child_text => sub {
1871     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1872     if ($has_significant) {
1873     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1874     }
1875     },
1876     check_end => sub {
1877     my ($self, $item, $element_state) = @_;
1878     if ($element_state->{phase} eq 'in dts') {
1879     $self->{onerror}->(node => $item->{node},
1880     type => 'child element missing:dd');
1881 wakaba 1.1 }
1882    
1883 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1884 wakaba 1.1 },
1885     };
1886    
1887     $Element->{$HTML_NS}->{dt} = {
1888 wakaba 1.40 %HTMLPhrasingContentChecker,
1889 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1890     check_attrs => $GetHTMLAttrsChecker->({}, {
1891     %HTMLAttrStatus,
1892     %HTMLM12NCommonAttrStatus,
1893 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1894 wakaba 1.49 }),
1895 wakaba 1.1 };
1896    
1897     $Element->{$HTML_NS}->{dd} = {
1898 wakaba 1.40 %HTMLProseContentChecker,
1899 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1900     check_attrs => $GetHTMLAttrsChecker->({}, {
1901     %HTMLAttrStatus,
1902     %HTMLM12NCommonAttrStatus,
1903 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1904 wakaba 1.49 }),
1905 wakaba 1.1 };
1906    
1907     $Element->{$HTML_NS}->{a} = {
1908 wakaba 1.40 %HTMLPhrasingContentChecker,
1909 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1910 wakaba 1.40 check_attrs => sub {
1911     my ($self, $item, $element_state) = @_;
1912 wakaba 1.1 my %attr;
1913 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1914 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1915     $attr_ns = '' unless defined $attr_ns;
1916     my $attr_ln = $attr->manakai_local_name;
1917     my $checker;
1918     if ($attr_ns eq '') {
1919     $checker = {
1920     target => $HTMLTargetAttrChecker,
1921     href => $HTMLURIAttrChecker,
1922     ping => $HTMLSpaceURIsAttrChecker,
1923 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
1924 wakaba 1.1 media => $HTMLMQAttrChecker,
1925     hreflang => $HTMLLanguageTagAttrChecker,
1926     type => $HTMLIMTAttrChecker,
1927     }->{$attr_ln};
1928     if ($checker) {
1929     $attr{$attr_ln} = $attr;
1930     } else {
1931     $checker = $HTMLAttrChecker->{$attr_ln};
1932     }
1933     }
1934     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1935     || $AttrChecker->{$attr_ns}->{''};
1936     if ($checker) {
1937     $checker->($self, $attr) if ref $checker;
1938 wakaba 1.49 } elsif ($attr_ns eq '') {
1939 wakaba 1.54 #
1940 wakaba 1.1 } else {
1941     $self->{onerror}->(node => $attr, level => 'unsupported',
1942     type => 'attribute');
1943 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
1944 wakaba 1.1 }
1945 wakaba 1.49
1946     if ($attr_ns eq '') {
1947     $self->_attr_status_info ($attr, {
1948     %HTMLAttrStatus,
1949     %HTMLM12NCommonAttrStatus,
1950     accesskey => FEATURE_M12N10_REC,
1951     charset => FEATURE_M12N10_REC,
1952     coords => FEATURE_M12N10_REC,
1953 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1954     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1955     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1956     media => FEATURE_HTML5_DEFAULT,
1957 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
1958 wakaba 1.50 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1959     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1960     ping => FEATURE_HTML5_DEFAULT,
1961     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1962 wakaba 1.49 rev => FEATURE_M12N10_REC,
1963     shape => FEATURE_M12N10_REC,
1964 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1965     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1966     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1967 wakaba 1.49 }->{$attr_ln});
1968     }
1969 wakaba 1.1 }
1970    
1971 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
1972 wakaba 1.4 if (defined $attr{href}) {
1973     $self->{has_hyperlink_element} = 1;
1974 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
1975 wakaba 1.4 } else {
1976 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1977     if (defined $attr{$_}) {
1978     $self->{onerror}->(node => $attr{$_},
1979     type => 'attribute not allowed');
1980     }
1981     }
1982     }
1983     },
1984 wakaba 1.40 check_start => sub {
1985     my ($self, $item, $element_state) = @_;
1986     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
1987     },
1988     check_end => sub {
1989     my ($self, $item, $element_state) = @_;
1990     $self->_remove_minus_elements ($element_state);
1991 wakaba 1.59 delete $self->{flag}->{in_a_href}
1992     unless $element_state->{in_a_href_original};
1993 wakaba 1.1
1994 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1995 wakaba 1.1 },
1996     };
1997    
1998     $Element->{$HTML_NS}->{q} = {
1999 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2000 wakaba 1.40 %HTMLPhrasingContentChecker,
2001     check_attrs => $GetHTMLAttrsChecker->({
2002 wakaba 1.50 cite => $HTMLURIAttrChecker,
2003     }, {
2004 wakaba 1.49 %HTMLAttrStatus,
2005     %HTMLM12NCommonAttrStatus,
2006 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2007     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2008 wakaba 1.1 }),
2009     };
2010    
2011     $Element->{$HTML_NS}->{cite} = {
2012 wakaba 1.40 %HTMLPhrasingContentChecker,
2013 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2014     check_attrs => $GetHTMLAttrsChecker->({}, {
2015     %HTMLAttrStatus,
2016     %HTMLM12NCommonAttrStatus,
2017 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2018 wakaba 1.49 }),
2019 wakaba 1.1 };
2020    
2021     $Element->{$HTML_NS}->{em} = {
2022 wakaba 1.40 %HTMLPhrasingContentChecker,
2023 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2024     check_attrs => $GetHTMLAttrsChecker->({}, {
2025     %HTMLAttrStatus,
2026     %HTMLM12NCommonAttrStatus,
2027 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2028 wakaba 1.49 }),
2029 wakaba 1.1 };
2030    
2031     $Element->{$HTML_NS}->{strong} = {
2032 wakaba 1.40 %HTMLPhrasingContentChecker,
2033 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2034     check_attrs => $GetHTMLAttrsChecker->({}, {
2035     %HTMLAttrStatus,
2036     %HTMLM12NCommonAttrStatus,
2037 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2038 wakaba 1.49 }),
2039 wakaba 1.1 };
2040    
2041     $Element->{$HTML_NS}->{small} = {
2042 wakaba 1.40 %HTMLPhrasingContentChecker,
2043 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2044     check_attrs => $GetHTMLAttrsChecker->({}, {
2045     %HTMLAttrStatus,
2046     %HTMLM12NCommonAttrStatus,
2047 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2048 wakaba 1.49 }),
2049 wakaba 1.1 };
2050    
2051 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
2052     %HTMLPhrasingContentChecker,
2053     status => FEATURE_M12N10_REC,
2054     check_attrs => $GetHTMLAttrsChecker->({}, {
2055     %HTMLAttrStatus,
2056     %HTMLM12NCommonAttrStatus,
2057     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2058     }),
2059     };
2060    
2061 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
2062 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2063 wakaba 1.40 %HTMLPhrasingContentChecker,
2064 wakaba 1.1 };
2065    
2066     $Element->{$HTML_NS}->{dfn} = {
2067 wakaba 1.40 %HTMLPhrasingContentChecker,
2068 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2069     check_attrs => $GetHTMLAttrsChecker->({}, {
2070     %HTMLAttrStatus,
2071     %HTMLM12NCommonAttrStatus,
2072 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2073 wakaba 1.49 }),
2074 wakaba 1.40 check_start => sub {
2075     my ($self, $item, $element_state) = @_;
2076     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
2077 wakaba 1.1
2078 wakaba 1.40 my $node = $item->{node};
2079 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
2080     unless (defined $term) {
2081     for my $child (@{$node->child_nodes}) {
2082     if ($child->node_type == 1) { # ELEMENT_NODE
2083     if (defined $term) {
2084     undef $term;
2085     last;
2086     } elsif ($child->manakai_local_name eq 'abbr') {
2087     my $nsuri = $child->namespace_uri;
2088     if (defined $nsuri and $nsuri eq $HTML_NS) {
2089     my $attr = $child->get_attribute_node_ns (undef, 'title');
2090     if ($attr) {
2091     $term = $attr->value;
2092     }
2093     }
2094     }
2095     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2096     ## TEXT_NODE or CDATA_SECTION_NODE
2097     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2098     next;
2099     }
2100     undef $term;
2101     last;
2102     }
2103     }
2104     unless (defined $term) {
2105     $term = $node->text_content;
2106     }
2107     }
2108     if ($self->{term}->{$term}) {
2109     $self->{onerror}->(node => $node, type => 'duplicate term');
2110     push @{$self->{term}->{$term}}, $node;
2111     } else {
2112     $self->{term}->{$term} = [$node];
2113     }
2114     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2115     ## has |title|.
2116 wakaba 1.40 },
2117     check_end => sub {
2118     my ($self, $item, $element_state) = @_;
2119     $self->_remove_minus_elements ($element_state);
2120 wakaba 1.1
2121 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2122 wakaba 1.1 },
2123     };
2124    
2125     $Element->{$HTML_NS}->{abbr} = {
2126 wakaba 1.40 %HTMLPhrasingContentChecker,
2127 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2128     check_attrs => $GetHTMLAttrsChecker->({}, {
2129     %HTMLAttrStatus,
2130     %HTMLM12NCommonAttrStatus,
2131 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2132 wakaba 1.49 }),
2133     };
2134    
2135     $Element->{$HTML_NS}->{acronym} = {
2136     %HTMLPhrasingContentChecker,
2137     status => FEATURE_M12N10_REC,
2138     check_attrs => $GetHTMLAttrsChecker->({}, {
2139     %HTMLAttrStatus,
2140     %HTMLM12NCommonAttrStatus,
2141 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2142 wakaba 1.49 }),
2143 wakaba 1.1 };
2144    
2145     $Element->{$HTML_NS}->{time} = {
2146 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2147 wakaba 1.40 %HTMLPhrasingContentChecker,
2148     check_attrs => $GetHTMLAttrsChecker->({
2149 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
2150 wakaba 1.49 }, {
2151     %HTMLAttrStatus,
2152     %HTMLM12NCommonAttrStatus,
2153 wakaba 1.50 datetime => FEATURE_HTML5_DEFAULT,
2154 wakaba 1.1 }),
2155     ## TODO: Write tests
2156 wakaba 1.40 check_end => sub {
2157     my ($self, $item, $element_state) = @_;
2158 wakaba 1.1
2159 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
2160 wakaba 1.1 my $input;
2161     my $reg_sp;
2162     my $input_node;
2163     if ($attr) {
2164     $input = $attr->value;
2165     $reg_sp = qr/[\x09-\x0D\x20]*/;
2166     $input_node = $attr;
2167     } else {
2168 wakaba 1.40 $input = $item->{node}->text_content;
2169 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
2170 wakaba 1.40 $input_node = $item->{node};
2171 wakaba 1.1
2172     ## ISSUE: What is the definition for "successfully extracts a date
2173     ## or time"? If the algorithm says the string is invalid but
2174     ## return some date or time, is it "successfully"?
2175     }
2176    
2177     my $hour;
2178     my $minute;
2179     my $second;
2180     if ($input =~ /
2181     \A
2182     [\x09-\x0D\x20]*
2183     ([0-9]+) # 1
2184     (?>
2185     -([0-9]+) # 2
2186     -([0-9]+) # 3
2187     [\x09-\x0D\x20]*
2188     (?>
2189     T
2190     [\x09-\x0D\x20]*
2191     )?
2192     ([0-9]+) # 4
2193     :([0-9]+) # 5
2194     (?>
2195     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2196     )?
2197     [\x09-\x0D\x20]*
2198     (?>
2199     Z
2200     [\x09-\x0D\x20]*
2201     |
2202     [+-]([0-9]+):([0-9]+) # 7, 8
2203     [\x09-\x0D\x20]*
2204     )?
2205     \z
2206     |
2207     :([0-9]+) # 9
2208     (?>
2209     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2210     )?
2211     [\x09-\x0D\x20]*\z
2212     )
2213     /x) {
2214     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2215     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2216     length $4 != 2 or length $5 != 2) {
2217     $self->{onerror}->(node => $input_node,
2218     type => 'dateortime:syntax error');
2219     }
2220    
2221     if (1 <= $2 and $2 <= 12) {
2222     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2223     if $3 < 1 or
2224     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2225     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2226     if $2 == 2 and $3 == 29 and
2227     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2228     } else {
2229     $self->{onerror}->(node => $input_node,
2230     type => 'datetime:bad month');
2231     }
2232    
2233     ($hour, $minute, $second) = ($4, $5, $6);
2234    
2235     if (defined $7) { ## [+-]hh:mm
2236     if (length $7 != 2 or length $8 != 2) {
2237     $self->{onerror}->(node => $input_node,
2238     type => 'dateortime:syntax error');
2239     }
2240    
2241     $self->{onerror}->(node => $input_node,
2242     type => 'datetime:bad timezone hour')
2243     if $7 > 23;
2244     $self->{onerror}->(node => $input_node,
2245     type => 'datetime:bad timezone minute')
2246     if $8 > 59;
2247     }
2248     } else { ## hh:mm
2249     if (length $1 != 2 or length $9 != 2) {
2250     $self->{onerror}->(node => $input_node,
2251     type => qq'dateortime:syntax error');
2252     }
2253    
2254     ($hour, $minute, $second) = ($1, $9, $10);
2255     }
2256    
2257     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2258     if $hour > 23;
2259     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2260     if $minute > 59;
2261    
2262     if (defined $second) { ## s
2263     ## NOTE: Integer part of second don't have to have length of two.
2264    
2265     if (substr ($second, 0, 1) eq '.') {
2266     $self->{onerror}->(node => $input_node,
2267     type => 'dateortime:syntax error');
2268     }
2269    
2270     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2271     if $second >= 60;
2272     }
2273     } else {
2274     $self->{onerror}->(node => $input_node,
2275     type => 'dateortime:syntax error');
2276     }
2277    
2278 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2279 wakaba 1.1 },
2280     };
2281    
2282     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2283 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2284 wakaba 1.40 %HTMLPhrasingContentChecker,
2285     check_attrs => $GetHTMLAttrsChecker->({
2286 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2287     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2288     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2289     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2290     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2291     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2292 wakaba 1.50 }, {
2293     %HTMLAttrStatus,
2294     high => FEATURE_HTML5_DEFAULT,
2295     low => FEATURE_HTML5_DEFAULT,
2296     max => FEATURE_HTML5_DEFAULT,
2297     min => FEATURE_HTML5_DEFAULT,
2298     optimum => FEATURE_HTML5_DEFAULT,
2299     value => FEATURE_HTML5_DEFAULT,
2300 wakaba 1.1 }),
2301     };
2302    
2303     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2304 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2305 wakaba 1.40 %HTMLPhrasingContentChecker,
2306     check_attrs => $GetHTMLAttrsChecker->({
2307 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2308     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2309 wakaba 1.50 }, {
2310     %HTMLAttrStatus,
2311     max => FEATURE_HTML5_DEFAULT,
2312     value => FEATURE_HTML5_DEFAULT,
2313 wakaba 1.1 }),
2314     };
2315    
2316     $Element->{$HTML_NS}->{code} = {
2317 wakaba 1.40 %HTMLPhrasingContentChecker,
2318 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2319     check_attrs => $GetHTMLAttrsChecker->({}, {
2320     %HTMLAttrStatus,
2321     %HTMLM12NCommonAttrStatus,
2322 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2323 wakaba 1.49 }),
2324 wakaba 1.1 };
2325    
2326     $Element->{$HTML_NS}->{var} = {
2327 wakaba 1.40 %HTMLPhrasingContentChecker,
2328 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2329     check_attrs => $GetHTMLAttrsChecker->({}, {
2330     %HTMLAttrStatus,
2331     %HTMLM12NCommonAttrStatus,
2332 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2333 wakaba 1.49 }),
2334 wakaba 1.1 };
2335    
2336     $Element->{$HTML_NS}->{samp} = {
2337 wakaba 1.40 %HTMLPhrasingContentChecker,
2338 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2339     check_attrs => $GetHTMLAttrsChecker->({}, {
2340     %HTMLAttrStatus,
2341     %HTMLM12NCommonAttrStatus,
2342 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2343 wakaba 1.49 }),
2344 wakaba 1.1 };
2345    
2346     $Element->{$HTML_NS}->{kbd} = {
2347 wakaba 1.40 %HTMLPhrasingContentChecker,
2348 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2349     check_attrs => $GetHTMLAttrsChecker->({}, {
2350     %HTMLAttrStatus,
2351     %HTMLM12NCommonAttrStatus,
2352 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2353 wakaba 1.49 }),
2354 wakaba 1.1 };
2355    
2356     $Element->{$HTML_NS}->{sub} = {
2357 wakaba 1.40 %HTMLPhrasingContentChecker,
2358 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2359     check_attrs => $GetHTMLAttrsChecker->({}, {
2360     %HTMLAttrStatus,
2361     %HTMLM12NCommonAttrStatus,
2362 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2363 wakaba 1.49 }),
2364 wakaba 1.1 };
2365    
2366 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
2367 wakaba 1.1
2368     $Element->{$HTML_NS}->{span} = {
2369 wakaba 1.40 %HTMLPhrasingContentChecker,
2370 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2371     check_attrs => $GetHTMLAttrsChecker->({}, {
2372     %HTMLAttrStatus,
2373     %HTMLM12NCommonAttrStatus,
2374     datafld => FEATURE_HTML4_REC_RESERVED,
2375     dataformatas => FEATURE_HTML4_REC_RESERVED,
2376     datasrc => FEATURE_HTML4_REC_RESERVED,
2377 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2378 wakaba 1.49 }),
2379 wakaba 1.1 };
2380    
2381     $Element->{$HTML_NS}->{i} = {
2382 wakaba 1.40 %HTMLPhrasingContentChecker,
2383 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2384     check_attrs => $GetHTMLAttrsChecker->({}, {
2385     %HTMLAttrStatus,
2386     %HTMLM12NCommonAttrStatus,
2387 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2388 wakaba 1.49 }),
2389 wakaba 1.1 };
2390    
2391 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
2392    
2393     $Element->{$HTML_NS}->{tt} = $Element->{$HTML_NS}->{big};
2394    
2395     $Element->{$HTML_NS}->{s} = {
2396 wakaba 1.40 %HTMLPhrasingContentChecker,
2397 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
2398 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
2399     %HTMLAttrStatus,
2400     %HTMLM12NCommonAttrStatus,
2401 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2402 wakaba 1.49 }),
2403 wakaba 1.1 };
2404    
2405 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
2406    
2407     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
2408    
2409 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
2410 wakaba 1.40 %HTMLPhrasingContentChecker,
2411 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2412 wakaba 1.40 check_attrs => sub {
2413     my ($self, $item, $element_state) = @_;
2414 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
2415     %HTMLAttrStatus,
2416 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2417     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2418     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2419 wakaba 1.49 style => FEATURE_XHTML10_REC,
2420 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2421     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2422 wakaba 1.49 })->($self, $item, $element_state);
2423 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
2424     $self->{onerror}->(node => $item->{node},
2425     type => 'attribute missing:dir');
2426 wakaba 1.1 }
2427     },
2428     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2429     };
2430    
2431 wakaba 1.29 =pod
2432    
2433     ## TODO:
2434    
2435     +
2436     + <p>Partly because of the confusion described above, authors are
2437     + strongly recommended to always mark up all paragraphs with the
2438     + <code>p</code> element, and to not have any <code>ins</code> or
2439     + <code>del</code> elements that cross across any <span
2440     + title="paragraph">implied paragraphs</span>.</p>
2441     +
2442     (An informative note)
2443    
2444     <p><code>ins</code> elements should not cross <span
2445     + title="paragraph">implied paragraph</span> boundaries.</p>
2446     (normative)
2447    
2448     + <p><code>del</code> elements should not cross <span
2449     + title="paragraph">implied paragraph</span> boundaries.</p>
2450     (normative)
2451    
2452     =cut
2453    
2454 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2455 wakaba 1.40 %HTMLTransparentChecker,
2456 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2457 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2458 wakaba 1.1 cite => $HTMLURIAttrChecker,
2459     datetime => $HTMLDatetimeAttrChecker,
2460 wakaba 1.49 }, {
2461     %HTMLAttrStatus,
2462     %HTMLM12NCommonAttrStatus,
2463 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2464     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2465     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2466 wakaba 1.1 }),
2467     };
2468    
2469     $Element->{$HTML_NS}->{del} = {
2470 wakaba 1.40 %HTMLTransparentChecker,
2471 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2472 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2473 wakaba 1.1 cite => $HTMLURIAttrChecker,
2474     datetime => $HTMLDatetimeAttrChecker,
2475 wakaba 1.49 }, {
2476     %HTMLAttrStatus,
2477     %HTMLM12NCommonAttrStatus,
2478 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2479     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2480     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2481 wakaba 1.1 }),
2482 wakaba 1.40 check_end => sub {
2483     my ($self, $item, $element_state) = @_;
2484     if ($element_state->{has_significant}) {
2485     ## NOTE: Significantness flag does not propagate.
2486     } elsif ($item->{transparent}) {
2487     #
2488     } else {
2489     $self->{onerror}->(node => $item->{node},
2490     level => $self->{should_level},
2491     type => 'no significant content');
2492     }
2493 wakaba 1.1 },
2494     };
2495    
2496 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
2497 wakaba 1.40 %HTMLProseContentChecker,
2498 wakaba 1.48 status => FEATURE_HTML5_FD,
2499 wakaba 1.53 ## NOTE: legend, Prose | Prose, legend?
2500 wakaba 1.41 check_child_element => sub {
2501     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2502     $child_is_transparent, $element_state) = @_;
2503     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2504     $self->{onerror}->(node => $child_el,
2505     type => 'element not allowed:minus',
2506     level => $self->{must_level});
2507     $element_state->{has_non_legend} = 1;
2508     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2509     #
2510     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
2511     if ($element_state->{has_legend_at_first}) {
2512     $self->{onerror}->(node => $child_el,
2513     type => 'element not allowed:figure legend',
2514     level => $self->{must_level});
2515     } elsif ($element_state->{has_legend}) {
2516     $self->{onerror}->(node => $element_state->{has_legend},
2517     type => 'element not allowed:figure legend',
2518     level => $self->{must_level});
2519     $element_state->{has_legend} = $child_el;
2520     } elsif ($element_state->{has_non_legend}) {
2521     $element_state->{has_legend} = $child_el;
2522     } else {
2523     $element_state->{has_legend_at_first} = 1;
2524 wakaba 1.35 }
2525 wakaba 1.41 delete $element_state->{has_non_legend};
2526     } else {
2527     $HTMLProseContentChecker{check_child_element}->(@_);
2528 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
2529 wakaba 1.41 }
2530     },
2531     check_child_text => sub {
2532     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2533     if ($has_significant) {
2534     $element_state->{has_non_legend} = 1;
2535 wakaba 1.35 }
2536 wakaba 1.41 },
2537     check_end => sub {
2538     my ($self, $item, $element_state) = @_;
2539 wakaba 1.35
2540 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2541     #
2542     } elsif ($element_state->{has_legend}) {
2543     if ($element_state->{has_non_legend}) {
2544     $self->{onerror}->(node => $element_state->{has_legend},
2545 wakaba 1.35 type => 'element not allowed:figure legend',
2546     level => $self->{must_level});
2547     }
2548     }
2549 wakaba 1.41
2550     $HTMLProseContentChecker{check_end}->(@_);
2551     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2552 wakaba 1.35 },
2553     };
2554 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2555 wakaba 1.1
2556     $Element->{$HTML_NS}->{img} = {
2557 wakaba 1.40 %HTMLEmptyChecker,
2558 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2559 wakaba 1.40 check_attrs => sub {
2560     my ($self, $item, $element_state) = @_;
2561 wakaba 1.1 $GetHTMLAttrsChecker->({
2562     alt => sub { }, ## NOTE: No syntactical requirement
2563     src => $HTMLURIAttrChecker,
2564     usemap => $HTMLUsemapAttrChecker,
2565     ismap => sub {
2566 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2567     if (not $self->{flag}->{in_a_href}) {
2568 wakaba 1.15 $self->{onerror}->(node => $attr,
2569 wakaba 1.59 type => 'attribute not allowed:ismap',
2570     level => $self->{must_level});
2571 wakaba 1.1 }
2572 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2573 wakaba 1.1 },
2574     ## TODO: height
2575     ## TODO: width
2576 wakaba 1.49 }, {
2577     %HTMLAttrStatus,
2578     %HTMLM12NCommonAttrStatus,
2579     align => FEATURE_M12N10_REC_DEPRECATED,
2580 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2581 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
2582 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2583 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
2584 wakaba 1.50 ismap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2585     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2586 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2587     name => FEATURE_M12N10_REC_DEPRECATED,
2588 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2589     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2590 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
2591 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2592 wakaba 1.40 })->($self, $item);
2593     unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2594     $self->{onerror}->(node => $item->{node},
2595 wakaba 1.37 type => 'attribute missing:alt',
2596     level => $self->{should_level});
2597 wakaba 1.1 }
2598 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2599     $self->{onerror}->(node => $item->{node},
2600     type => 'attribute missing:src');
2601 wakaba 1.1 }
2602     },
2603     };
2604    
2605     $Element->{$HTML_NS}->{iframe} = {
2606 wakaba 1.40 %HTMLTextChecker,
2607 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2608     ## NOTE: Not part of M12N10 Strict
2609 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2610 wakaba 1.1 src => $HTMLURIAttrChecker,
2611 wakaba 1.49 }, {
2612     %HTMLAttrStatus,
2613     %HTMLM12NCommonAttrStatus,
2614     align => FEATURE_XHTML10_REC,
2615 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2616 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
2617     height => FEATURE_M12N10_REC,
2618 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2619 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2620     marginheight => FEATURE_M12N10_REC,
2621     marginwidth => FEATURE_M12N10_REC,
2622     name => FEATURE_M12N10_REC_DEPRECATED,
2623     scrolling => FEATURE_M12N10_REC,
2624 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2625     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2626 wakaba 1.49 width => FEATURE_M12N10_REC,
2627 wakaba 1.1 }),
2628 wakaba 1.40 };
2629    
2630 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2631 wakaba 1.40 %HTMLEmptyChecker,
2632 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2633 wakaba 1.40 check_attrs => sub {
2634     my ($self, $item, $element_state) = @_;
2635 wakaba 1.1 my $has_src;
2636 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2637 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2638     $attr_ns = '' unless defined $attr_ns;
2639     my $attr_ln = $attr->manakai_local_name;
2640     my $checker;
2641     if ($attr_ns eq '') {
2642     if ($attr_ln eq 'src') {
2643     $checker = $HTMLURIAttrChecker;
2644     $has_src = 1;
2645     } elsif ($attr_ln eq 'type') {
2646     $checker = $HTMLIMTAttrChecker;
2647     } else {
2648     ## TODO: height
2649     ## TODO: width
2650     $checker = $HTMLAttrChecker->{$attr_ln}
2651     || sub { }; ## NOTE: Any local attribute is ok.
2652     }
2653     }
2654     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2655     || $AttrChecker->{$attr_ns}->{''};
2656     if ($checker) {
2657     $checker->($self, $attr);
2658 wakaba 1.50 } elsif ($attr_ns eq '') {
2659 wakaba 1.54 #
2660 wakaba 1.1 } else {
2661     $self->{onerror}->(node => $attr, level => 'unsupported',
2662     type => 'attribute');
2663 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
2664     }
2665    
2666     if ($attr_ns eq '') {
2667     my $status = {
2668     %HTMLAttrStatus,
2669     height => FEATURE_HTML5_DEFAULT,
2670     src => FEATURE_HTML5_DEFAULT,
2671     type => FEATURE_HTML5_DEFAULT,
2672     width => FEATURE_HTML5_DEFAULT,
2673     }->{$attr_ln};
2674     $self->_attr_status_info ($attr, $status) if $status;
2675 wakaba 1.1 }
2676     }
2677    
2678     unless ($has_src) {
2679 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2680 wakaba 1.1 type => 'attribute missing:src');
2681     }
2682     },
2683     };
2684    
2685 wakaba 1.49 ## TODO:
2686     ## {applet} FEATURE_M12N10_REC_DEPRECATED
2687     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
2688    
2689 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
2690 wakaba 1.40 %HTMLTransparentChecker,
2691 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2692 wakaba 1.40 check_attrs => sub {
2693     my ($self, $item, $element_state) = @_;
2694 wakaba 1.1 $GetHTMLAttrsChecker->({
2695     data => $HTMLURIAttrChecker,
2696     type => $HTMLIMTAttrChecker,
2697     usemap => $HTMLUsemapAttrChecker,
2698     ## TODO: width
2699     ## TODO: height
2700 wakaba 1.49 }, {
2701     %HTMLAttrStatus,
2702     %HTMLM12NCommonAttrStatus,
2703     align => FEATURE_XHTML10_REC,
2704     archive => FEATURE_M12N10_REC,
2705     border => FEATURE_XHTML10_REC,
2706     classid => FEATURE_M12N10_REC,
2707     codebase => FEATURE_M12N10_REC,
2708     codetype => FEATURE_M12N10_REC,
2709 wakaba 1.50 data => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2710 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
2711     dataformatas => FEATURE_HTML4_REC_RESERVED,
2712     datasrc => FEATURE_HTML4_REC_RESERVED,
2713     declare => FEATURE_M12N10_REC,
2714 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2715 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
2716 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2717 wakaba 1.49 name => FEATURE_M12N10_REC,
2718     standby => FEATURE_M12N10_REC,
2719 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2720     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2721     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2722 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
2723 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2724 wakaba 1.40 })->($self, $item);
2725     unless ($item->{node}->has_attribute_ns (undef, 'data')) {
2726     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
2727     $self->{onerror}->(node => $item->{node},
2728 wakaba 1.1 type => 'attribute missing:data|type');
2729     }
2730     }
2731     },
2732 wakaba 1.41 ## NOTE: param*, transparent (Prose)
2733     check_child_element => sub {
2734     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2735     $child_is_transparent, $element_state) = @_;
2736     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2737     $self->{onerror}->(node => $child_el,
2738     type => 'element not allowed:minus',
2739     level => $self->{must_level});
2740     $element_state->{has_non_legend} = 1;
2741     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2742     #
2743     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
2744     if ($element_state->{has_non_param}) {
2745     $self->{onerror}->(node => $child_el,
2746     type => 'element not allowed:prose',
2747     level => $self->{must_level});
2748 wakaba 1.39 }
2749 wakaba 1.41 } else {
2750     $HTMLProseContentChecker{check_child_element}->(@_);
2751     $element_state->{has_non_param} = 1;
2752 wakaba 1.39 }
2753 wakaba 1.25 },
2754 wakaba 1.41 check_child_text => sub {
2755     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2756     if ($has_significant) {
2757     $element_state->{has_non_param} = 1;
2758     }
2759 wakaba 1.42 },
2760     check_end => sub {
2761     my ($self, $item, $element_state) = @_;
2762     if ($element_state->{has_significant}) {
2763 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
2764 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
2765     ## NOTE: Transparent.
2766     } else {
2767     $self->{onerror}->(node => $item->{node},
2768     level => $self->{should_level},
2769     type => 'no significant content');
2770     }
2771     },
2772 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2773 wakaba 1.1 };
2774 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
2775     ## What about |<section><object data><style scoped></style>x</object></section>|?
2776     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
2777 wakaba 1.1
2778     $Element->{$HTML_NS}->{param} = {
2779 wakaba 1.40 %HTMLEmptyChecker,
2780 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2781 wakaba 1.40 check_attrs => sub {
2782     my ($self, $item, $element_state) = @_;
2783 wakaba 1.1 $GetHTMLAttrsChecker->({
2784     name => sub { },
2785     value => sub { },
2786 wakaba 1.49 }, {
2787     %HTMLAttrStatus,
2788 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2789     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2790 wakaba 1.49 type => FEATURE_M12N10_REC,
2791 wakaba 1.50 value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2792 wakaba 1.49 valuetype => FEATURE_M12N10_REC,
2793 wakaba 1.40 })->($self, $item);
2794     unless ($item->{node}->has_attribute_ns (undef, 'name')) {
2795     $self->{onerror}->(node => $item->{node},
2796 wakaba 1.1 type => 'attribute missing:name');
2797     }
2798 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
2799     $self->{onerror}->(node => $item->{node},
2800 wakaba 1.1 type => 'attribute missing:value');
2801     }
2802     },
2803     };
2804    
2805     $Element->{$HTML_NS}->{video} = {
2806 wakaba 1.40 %HTMLTransparentChecker,
2807 wakaba 1.48 status => FEATURE_HTML5_LC,
2808 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2809 wakaba 1.1 src => $HTMLURIAttrChecker,
2810     ## TODO: start, loopstart, loopend, end
2811     ## ISSUE: they MUST be "value time offset"s. Value?
2812 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2813 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2814     controls => $GetHTMLBooleanAttrChecker->('controls'),
2815 wakaba 1.59 poster => $HTMLURIAttrChecker,
2816 wakaba 1.42 ## TODO: width, height
2817 wakaba 1.50 }, {
2818     %HTMLAttrStatus,
2819     autoplay => FEATURE_HTML5_LC,
2820     controls => FEATURE_HTML5_LC,
2821     end => FEATURE_HTML5_LC,
2822     height => FEATURE_HTML5_LC,
2823     loopend => FEATURE_HTML5_LC,
2824     loopstart => FEATURE_HTML5_LC,
2825     playcount => FEATURE_HTML5_LC,
2826     poster => FEATURE_HTML5_LC,
2827     src => FEATURE_HTML5_LC,
2828     start => FEATURE_HTML5_LC,
2829     width => FEATURE_HTML5_LC,
2830 wakaba 1.1 }),
2831 wakaba 1.42 check_start => sub {
2832     my ($self, $item, $element_state) = @_;
2833     $element_state->{allow_source}
2834     = not $item->{node}->has_attribute_ns (undef, 'src');
2835     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
2836     ## NOTE: It might be set true by |check_element|.
2837     },
2838     check_child_element => sub {
2839     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2840     $child_is_transparent, $element_state) = @_;
2841     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2842     $self->{onerror}->(node => $child_el,
2843     type => 'element not allowed:minus',
2844     level => $self->{must_level});
2845     delete $element_state->{allow_source};
2846     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2847     #
2848     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
2849 wakaba 1.45 unless ($element_state->{allow_source}) {
2850 wakaba 1.42 $self->{onerror}->(node => $child_el,
2851     type => 'element not allowed:prose',
2852     level => $self->{must_level});
2853     }
2854 wakaba 1.45 $element_state->{has_source} = 1;
2855 wakaba 1.1 } else {
2856 wakaba 1.42 delete $element_state->{allow_source};
2857     $HTMLProseContentChecker{check_child_element}->(@_);
2858     }
2859     },
2860     check_child_text => sub {
2861     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2862     if ($has_significant) {
2863     delete $element_state->{allow_source};
2864     }
2865     $HTMLProseContentChecker{check_child_text}->(@_);
2866     },
2867     check_end => sub {
2868     my ($self, $item, $element_state) = @_;
2869     if ($element_state->{has_source} == -1) {
2870     $self->{onerror}->(node => $item->{node},
2871     type => 'element missing:source',
2872     level => $self->{must_level});
2873 wakaba 1.1 }
2874 wakaba 1.42
2875     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
2876 wakaba 1.1 },
2877     };
2878    
2879     $Element->{$HTML_NS}->{audio} = {
2880 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
2881 wakaba 1.48 status => FEATURE_HTML5_LC,
2882 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
2883     src => $HTMLURIAttrChecker,
2884     ## TODO: start, loopstart, loopend, end
2885     ## ISSUE: they MUST be "value time offset"s. Value?
2886     ## ISSUE: playcount has no conformance creteria
2887     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2888     controls => $GetHTMLBooleanAttrChecker->('controls'),
2889 wakaba 1.50 }, {
2890     %HTMLAttrStatus,
2891     autoplay => FEATURE_HTML5_LC,
2892     controls => FEATURE_HTML5_LC,
2893     end => FEATURE_HTML5_LC,
2894     loopend => FEATURE_HTML5_LC,
2895     loopstart => FEATURE_HTML5_LC,
2896     playcount => FEATURE_HTML5_LC,
2897     src => FEATURE_HTML5_LC,
2898     start => FEATURE_HTML5_LC,
2899 wakaba 1.42 }),
2900 wakaba 1.1 };
2901    
2902     $Element->{$HTML_NS}->{source} = {
2903 wakaba 1.40 %HTMLEmptyChecker,
2904 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2905 wakaba 1.40 check_attrs => sub {
2906     my ($self, $item, $element_state) = @_;
2907 wakaba 1.1 $GetHTMLAttrsChecker->({
2908     src => $HTMLURIAttrChecker,
2909     type => $HTMLIMTAttrChecker,
2910     media => $HTMLMQAttrChecker,
2911 wakaba 1.50 }, {
2912     %HTMLAttrStatus,
2913     media => FEATURE_HTML5_DEFAULT,
2914     src => FEATURE_HTML5_DEFAULT,
2915     type => FEATURE_HTML5_DEFAULT,
2916 wakaba 1.40 })->($self, $item, $element_state);
2917     unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2918     $self->{onerror}->(node => $item->{node},
2919 wakaba 1.1 type => 'attribute missing:src');
2920     }
2921     },
2922     };
2923    
2924     $Element->{$HTML_NS}->{canvas} = {
2925 wakaba 1.40 %HTMLTransparentChecker,
2926 wakaba 1.48 status => FEATURE_HTML5_LC,
2927 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2928 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2929     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2930 wakaba 1.50 }, {
2931     %HTMLAttrStatus,
2932     height => FEATURE_HTML5_LC,
2933     width => FEATURE_HTML5_LC,
2934 wakaba 1.1 }),
2935     };
2936    
2937     $Element->{$HTML_NS}->{map} = {
2938 wakaba 1.40 %HTMLProseContentChecker,
2939 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2940 wakaba 1.40 check_attrs => sub {
2941     my ($self, $item, $element_state) = @_;
2942 wakaba 1.4 my $has_id;
2943     $GetHTMLAttrsChecker->({
2944     id => sub {
2945     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2946     my ($self, $attr) = @_;
2947     my $value = $attr->value;
2948     if (length $value > 0) {
2949     if ($self->{id}->{$value}) {
2950     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2951     push @{$self->{id}->{$value}}, $attr;
2952     } else {
2953     $self->{id}->{$value} = [$attr];
2954     }
2955 wakaba 1.1 } else {
2956 wakaba 1.4 ## NOTE: MUST contain at least one character
2957     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2958 wakaba 1.1 }
2959 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2960     $self->{onerror}->(node => $attr, type => 'space in ID');
2961     }
2962     $self->{map}->{$value} ||= $attr;
2963     $has_id = 1;
2964     },
2965 wakaba 1.49 }, {
2966     %HTMLAttrStatus,
2967 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2968     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2969     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2970     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2971 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
2972 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2973     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2974     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2975     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2976     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2977     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2978     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2979     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2980     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2981     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2982     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2983 wakaba 1.40 })->($self, $item, $element_state);
2984     $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
2985 wakaba 1.4 unless $has_id;
2986     },
2987 wakaba 1.59 check_start => sub {
2988     my ($self, $item, $element_state) = @_;
2989     $element_state->{in_map_original} = $self->{flag}->{in_map};
2990     $self->{flag}->{in_map} = 1;
2991     },
2992     check_end => sub {
2993     my ($self, $item, $element_state) = @_;
2994     delete $self->{flag}->{in_map} unless $element_state->{in_map_original};
2995     $HTMLProseContentChecker{check_end}->(@_);
2996     },
2997 wakaba 1.1 };
2998    
2999     $Element->{$HTML_NS}->{area} = {
3000 wakaba 1.40 %HTMLEmptyChecker,
3001 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3002 wakaba 1.40 check_attrs => sub {
3003     my ($self, $item, $element_state) = @_;
3004 wakaba 1.1 my %attr;
3005     my $coords;
3006 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3007 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3008     $attr_ns = '' unless defined $attr_ns;
3009     my $attr_ln = $attr->manakai_local_name;
3010     my $checker;
3011     if ($attr_ns eq '') {
3012     $checker = {
3013     alt => sub { },
3014     ## NOTE: |alt| value has no conformance creteria.
3015     shape => $GetHTMLEnumeratedAttrChecker->({
3016     circ => -1, circle => 1,
3017     default => 1,
3018     poly => 1, polygon => -1,
3019     rect => 1, rectangle => -1,
3020     }),
3021     coords => sub {
3022     my ($self, $attr) = @_;
3023     my $value = $attr->value;
3024     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
3025     $coords = [split /,/, $value];
3026     } else {
3027     $self->{onerror}->(node => $attr,
3028     type => 'coords:syntax error');
3029     }
3030     },
3031     target => $HTMLTargetAttrChecker,
3032     href => $HTMLURIAttrChecker,
3033     ping => $HTMLSpaceURIsAttrChecker,
3034 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3035 wakaba 1.1 media => $HTMLMQAttrChecker,
3036     hreflang => $HTMLLanguageTagAttrChecker,
3037     type => $HTMLIMTAttrChecker,
3038     }->{$attr_ln};
3039     if ($checker) {
3040     $attr{$attr_ln} = $attr;
3041     } else {
3042     $checker = $HTMLAttrChecker->{$attr_ln};
3043     }
3044     }
3045     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3046     || $AttrChecker->{$attr_ns}->{''};
3047     if ($checker) {
3048     $checker->($self, $attr) if ref $checker;
3049 wakaba 1.49 } elsif ($attr_ns eq '') {
3050 wakaba 1.54 #
3051 wakaba 1.1 } else {
3052     $self->{onerror}->(node => $attr, level => 'unsupported',
3053     type => 'attribute');
3054     ## ISSUE: No comformance createria for unknown attributes in the spec
3055     }
3056 wakaba 1.49
3057     if ($attr_ns eq '') {
3058     $self->_attr_status_info ($attr, {
3059     %HTMLAttrStatus,
3060     %HTMLM12NCommonAttrStatus,
3061     accesskey => FEATURE_M12N10_REC,
3062 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3063     coords => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3064     href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3065 wakaba 1.54 hreflang => FEATURE_HTML5_DEFAULT,
3066 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3067     media => FEATURE_HTML5_DEFAULT,
3068 wakaba 1.49 nohref => FEATURE_M12N10_REC,
3069 wakaba 1.50 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3070     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3071     ping => FEATURE_HTML5_DEFAULT,
3072     rel => FEATURE_HTML5_DEFAULT,
3073     shape => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3074     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3075     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3076     type => FEATURE_HTML5_DEFAULT,
3077 wakaba 1.49 }->{$attr_ln});
3078     }
3079 wakaba 1.1 }
3080    
3081     if (defined $attr{href}) {
3082 wakaba 1.4 $self->{has_hyperlink_element} = 1;
3083 wakaba 1.1 unless (defined $attr{alt}) {
3084 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3085 wakaba 1.1 type => 'attribute missing:alt');
3086     }
3087     } else {
3088     for (qw/target ping rel media hreflang type alt/) {
3089     if (defined $attr{$_}) {
3090     $self->{onerror}->(node => $attr{$_},
3091     type => 'attribute not allowed');
3092     }
3093     }
3094     }
3095    
3096     my $shape = 'rectangle';
3097     if (defined $attr{shape}) {
3098     $shape = {
3099     circ => 'circle', circle => 'circle',
3100     default => 'default',
3101     poly => 'polygon', polygon => 'polygon',
3102     rect => 'rectangle', rectangle => 'rectangle',
3103     }->{lc $attr{shape}->value} || 'rectangle';
3104     ## TODO: ASCII lowercase?
3105     }
3106    
3107     if ($shape eq 'circle') {
3108     if (defined $attr{coords}) {
3109     if (defined $coords) {
3110     if (@$coords == 3) {
3111     if ($coords->[2] < 0) {
3112     $self->{onerror}->(node => $attr{coords},
3113     type => 'coords:out of range:2');
3114     }
3115     } else {
3116     $self->{onerror}->(node => $attr{coords},
3117     type => 'coords:number:3:'.@$coords);
3118     }
3119     } else {
3120     ## NOTE: A syntax error has been reported.
3121     }
3122     } else {
3123 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3124 wakaba 1.1 type => 'attribute missing:coords');
3125     }
3126     } elsif ($shape eq 'default') {
3127     if (defined $attr{coords}) {
3128     $self->{onerror}->(node => $attr{coords},
3129     type => 'attribute not allowed');
3130     }
3131     } elsif ($shape eq 'polygon') {
3132     if (defined $attr{coords}) {
3133     if (defined $coords) {
3134     if (@$coords >= 6) {
3135     unless (@$coords % 2 == 0) {
3136     $self->{onerror}->(node => $attr{coords},
3137     type => 'coords:number:even:'.@$coords);
3138     }
3139     } else {
3140     $self->{onerror}->(node => $attr{coords},
3141     type => 'coords:number:>=6:'.@$coords);
3142     }
3143     } else {
3144     ## NOTE: A syntax error has been reported.
3145     }
3146     } else {
3147 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3148 wakaba 1.1 type => 'attribute missing:coords');
3149     }
3150     } elsif ($shape eq 'rectangle') {
3151     if (defined $attr{coords}) {
3152     if (defined $coords) {
3153     if (@$coords == 4) {
3154     unless ($coords->[0] < $coords->[2]) {
3155     $self->{onerror}->(node => $attr{coords},
3156     type => 'coords:out of range:0');
3157     }
3158     unless ($coords->[1] < $coords->[3]) {
3159     $self->{onerror}->(node => $attr{coords},
3160     type => 'coords:out of range:1');
3161     }
3162     } else {
3163     $self->{onerror}->(node => $attr{coords},
3164     type => 'coords:number:4:'.@$coords);
3165     }
3166     } else {
3167     ## NOTE: A syntax error has been reported.
3168     }
3169     } else {
3170 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3171 wakaba 1.1 type => 'attribute missing:coords');
3172     }
3173     }
3174     },
3175 wakaba 1.59 check_start => sub {
3176     my ($self, $item, $element_state) = @_;
3177     unless ($self->{flag}->{in_map} or
3178     not $item->{node}->manakai_parent_element) {
3179     $self->{onerror}->(node => $item->{node},
3180     type => 'element not allowed:area',
3181     level => $self->{must_level});
3182     }
3183     },
3184 wakaba 1.1 };
3185    
3186     $Element->{$HTML_NS}->{table} = {
3187 wakaba 1.40 %HTMLChecker,
3188 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3189     check_attrs => $GetHTMLAttrsChecker->({}, {
3190     %HTMLAttrStatus,
3191     %HTMLM12NCommonAttrStatus,
3192     align => FEATURE_M12N10_REC_DEPRECATED,
3193     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3194     border => FEATURE_M12N10_REC,
3195     cellpadding => FEATURE_M12N10_REC,
3196     cellspacing => FEATURE_M12N10_REC,
3197     datafld => FEATURE_HTML4_REC_RESERVED,
3198     dataformatas => FEATURE_HTML4_REC_RESERVED,
3199     datapagesize => FEATURE_M12N10_REC,
3200     datasrc => FEATURE_HTML4_REC_RESERVED,
3201     frame => FEATURE_M12N10_REC,
3202 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3203 wakaba 1.49 rules => FEATURE_M12N10_REC,
3204     summary => FEATURE_M12N10_REC,
3205     width => FEATURE_M12N10_REC,
3206     }),
3207 wakaba 1.40 check_start => sub {
3208     my ($self, $item, $element_state) = @_;
3209     $element_state->{phase} = 'before caption';
3210     },
3211     check_child_element => sub {
3212     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3213     $child_is_transparent, $element_state) = @_;
3214     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3215     $self->{onerror}->(node => $child_el,
3216     type => 'element not allowed:minus',
3217     level => $self->{must_level});
3218     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3219     #
3220     } elsif ($element_state->{phase} eq 'in tbodys') {
3221     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3222     #$element_state->{phase} = 'in tbodys';
3223     } elsif (not $element_state->{has_tfoot} and
3224     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3225     $element_state->{phase} = 'after tfoot';
3226     $element_state->{has_tfoot} = 1;
3227     } else {
3228     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3229     }
3230     } elsif ($element_state->{phase} eq 'in trs') {
3231     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3232     #$element_state->{phase} = 'in trs';
3233     } elsif (not $element_state->{has_tfoot} and
3234     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3235     $element_state->{phase} = 'after tfoot';
3236     $element_state->{has_tfoot} = 1;
3237     } else {
3238     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3239     }
3240     } elsif ($element_state->{phase} eq 'after thead') {
3241     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3242     $element_state->{phase} = 'in tbodys';
3243     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3244     $element_state->{phase} = 'in trs';
3245     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3246     $element_state->{phase} = 'in tbodys';
3247     $element_state->{has_tfoot} = 1;
3248     } else {
3249     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3250     }
3251     } elsif ($element_state->{phase} eq 'in colgroup') {
3252     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3253     $element_state->{phase} = 'in colgroup';
3254     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3255     $element_state->{phase} = 'after thead';
3256     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3257     $element_state->{phase} = 'in tbodys';
3258     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3259     $element_state->{phase} = 'in trs';
3260     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3261     $element_state->{phase} = 'in tbodys';
3262     $element_state->{has_tfoot} = 1;
3263     } else {
3264     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3265     }
3266     } elsif ($element_state->{phase} eq 'before caption') {
3267     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
3268     $element_state->{phase} = 'in colgroup';
3269     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3270     $element_state->{phase} = 'in colgroup';
3271     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3272     $element_state->{phase} = 'after thead';
3273     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3274     $element_state->{phase} = 'in tbodys';
3275     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3276     $element_state->{phase} = 'in trs';
3277     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3278     $element_state->{phase} = 'in tbodys';
3279     $element_state->{has_tfoot} = 1;
3280     } else {
3281     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3282     }
3283     } elsif ($element_state->{phase} eq 'after tfoot') {
3284     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3285     } else {
3286     die "check_child_element: Bad |table| phase: $element_state->{phase}";
3287     }
3288     },
3289     check_child_text => sub {
3290     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3291     if ($has_significant) {
3292     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3293 wakaba 1.1 }
3294 wakaba 1.40 },
3295     check_end => sub {
3296     my ($self, $item, $element_state) = @_;
3297 wakaba 1.1
3298     ## Table model errors
3299     require Whatpm::HTMLTable;
3300 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
3301 wakaba 1.1 my %opt = @_;
3302     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
3303     });
3304 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
3305 wakaba 1.1
3306 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3307 wakaba 1.1 },
3308     };
3309    
3310     $Element->{$HTML_NS}->{caption} = {
3311 wakaba 1.40 %HTMLPhrasingContentChecker,
3312 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3313     check_attrs => $GetHTMLAttrsChecker->({}, {
3314     %HTMLAttrStatus,
3315     %HTMLM12NCommonAttrStatus,
3316     align => FEATURE_M12N10_REC_DEPRECATED,
3317 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3318 wakaba 1.49 }),
3319 wakaba 1.1 };
3320    
3321     $Element->{$HTML_NS}->{colgroup} = {
3322 wakaba 1.40 %HTMLEmptyChecker,
3323 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3324 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3325 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3326     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
3327     ## TODO: "attribute not supported" if |col|.
3328     ## ISSUE: MUST NOT if any |col|?
3329     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
3330 wakaba 1.49 }, {
3331     %HTMLAttrStatus,
3332     %HTMLM12NCommonAttrStatus,
3333     align => FEATURE_M12N10_REC,
3334     char => FEATURE_M12N10_REC,
3335     charoff => FEATURE_M12N10_REC,
3336 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3337     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3338 wakaba 1.49 valign => FEATURE_M12N10_REC,
3339     width => FEATURE_M12N10_REC,
3340 wakaba 1.1 }),
3341 wakaba 1.40 check_child_element => sub {
3342     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3343     $child_is_transparent, $element_state) = @_;
3344     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3345     $self->{onerror}->(node => $child_el,
3346     type => 'element not allowed:minus',
3347     level => $self->{must_level});
3348     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3349     #
3350     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
3351     #
3352     } else {
3353     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3354     }
3355     },
3356     check_child_text => sub {
3357     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3358     if ($has_significant) {
3359     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3360 wakaba 1.1 }
3361     },
3362     };
3363    
3364     $Element->{$HTML_NS}->{col} = {
3365 wakaba 1.40 %HTMLEmptyChecker,
3366 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3367 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3368 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3369 wakaba 1.49 }, {
3370     %HTMLAttrStatus,
3371     %HTMLM12NCommonAttrStatus,
3372     align => FEATURE_M12N10_REC,
3373     char => FEATURE_M12N10_REC,
3374     charoff => FEATURE_M12N10_REC,
3375 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3376     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3377 wakaba 1.49 valign => FEATURE_M12N10_REC,
3378     width => FEATURE_M12N10_REC,
3379 wakaba 1.1 }),
3380     };
3381    
3382     $Element->{$HTML_NS}->{tbody} = {
3383 wakaba 1.40 %HTMLChecker,
3384 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3385     check_attrs => $GetHTMLAttrsChecker->({}, {
3386     %HTMLAttrStatus,
3387     %HTMLM12NCommonAttrStatus,
3388     align => FEATURE_M12N10_REC,
3389     char => FEATURE_M12N10_REC,
3390     charoff => FEATURE_M12N10_REC,
3391 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3392 wakaba 1.49 valign => FEATURE_M12N10_REC,
3393     }),
3394 wakaba 1.40 check_child_element => sub {
3395     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3396     $child_is_transparent, $element_state) = @_;
3397     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3398     $self->{onerror}->(node => $child_el,
3399     type => 'element not allowed:minus',
3400     level => $self->{must_level});
3401     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3402     #
3403     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3404     $element_state->{has_tr} = 1;
3405     } else {
3406     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3407     }
3408     },
3409     check_child_text => sub {
3410     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3411     if ($has_significant) {
3412     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3413 wakaba 1.1 }
3414 wakaba 1.40 },
3415     check_end => sub {
3416     my ($self, $item, $element_state) = @_;
3417     unless ($element_state->{has_tr}) {
3418     $self->{onerror}->(node => $item->{node},
3419     type => 'child element missing:tr');
3420 wakaba 1.1 }
3421 wakaba 1.40
3422     $HTMLChecker{check_end}->(@_);
3423 wakaba 1.1 },
3424     };
3425    
3426     $Element->{$HTML_NS}->{thead} = {
3427 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3428 wakaba 1.1 };
3429    
3430     $Element->{$HTML_NS}->{tfoot} = {
3431 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3432 wakaba 1.1 };
3433    
3434     $Element->{$HTML_NS}->{tr} = {
3435 wakaba 1.40 %HTMLChecker,
3436 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3437     check_attrs => $GetHTMLAttrsChecker->({}, {
3438     %HTMLAttrStatus,
3439     %HTMLM12NCommonAttrStatus,
3440     align => FEATURE_M12N10_REC,
3441     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3442     char => FEATURE_M12N10_REC,
3443     charoff => FEATURE_M12N10_REC,
3444 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3445 wakaba 1.49 valign => FEATURE_M12N10_REC,
3446     }),
3447 wakaba 1.40 check_child_element => sub {
3448     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3449     $child_is_transparent, $element_state) = @_;
3450     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3451     $self->{onerror}->(node => $child_el,
3452     type => 'element not allowed:minus',
3453     level => $self->{must_level});
3454     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3455     #
3456     } elsif ($child_nsuri eq $HTML_NS and
3457     ($child_ln eq 'td' or $child_ln eq 'th')) {
3458     $element_state->{has_cell} = 1;
3459     } else {
3460     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3461     }
3462     },
3463     check_child_text => sub {
3464     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3465     if ($has_significant) {
3466     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3467 wakaba 1.1 }
3468 wakaba 1.40 },
3469     check_end => sub {
3470     my ($self, $item, $element_state) = @_;
3471     unless ($element_state->{has_cell}) {
3472     $self->{onerror}->(node => $item->{node},
3473     type => 'child element missing:td|th');
3474 wakaba 1.1 }
3475 wakaba 1.40
3476     $HTMLChecker{check_end}->(@_);
3477 wakaba 1.1 },
3478     };
3479    
3480     $Element->{$HTML_NS}->{td} = {
3481 wakaba 1.40 %HTMLProseContentChecker,
3482 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3483 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3484 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3485     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3486 wakaba 1.49 }, {
3487     %HTMLAttrStatus,
3488     %HTMLM12NCommonAttrStatus,
3489     abbr => FEATURE_M12N10_REC,
3490     align => FEATURE_M12N10_REC,
3491     axis => FEATURE_M12N10_REC,
3492     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3493     char => FEATURE_M12N10_REC,
3494     charoff => FEATURE_M12N10_REC,
3495 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3496 wakaba 1.49 headers => FEATURE_M12N10_REC,
3497     height => FEATURE_M12N10_REC_DEPRECATED,
3498 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3499 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3500 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3501 wakaba 1.49 scope => FEATURE_M12N10_REC,
3502     valign => FEATURE_M12N10_REC,
3503     width => FEATURE_M12N10_REC_DEPRECATED,
3504 wakaba 1.1 }),
3505     };
3506    
3507     $Element->{$HTML_NS}->{th} = {
3508 wakaba 1.40 %HTMLPhrasingContentChecker,
3509 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3510 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3511 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3512     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3513     scope => $GetHTMLEnumeratedAttrChecker
3514     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3515 wakaba 1.49 }, {
3516     %HTMLAttrStatus,
3517     %HTMLM12NCommonAttrStatus,
3518     abbr => FEATURE_M12N10_REC,
3519     align => FEATURE_M12N10_REC,
3520     axis => FEATURE_M12N10_REC,
3521     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3522     char => FEATURE_M12N10_REC,
3523     charoff => FEATURE_M12N10_REC,
3524 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3525 wakaba 1.49 headers => FEATURE_M12N10_REC,
3526     height => FEATURE_M12N10_REC_DEPRECATED,
3527 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3528 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3529 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3530     scope => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3531 wakaba 1.49 valign => FEATURE_M12N10_REC,
3532     width => FEATURE_M12N10_REC_DEPRECATED,
3533 wakaba 1.1 }),
3534     };
3535    
3536 wakaba 1.52 my $AttrCheckerNotImplemented = sub {
3537     my ($self, $attr) = @_;
3538     $self->{onerror}->(node => $attr, level => 'unsupported',
3539     type => 'attribute');
3540     };
3541    
3542     $Element->{$HTML_NS}->{form} = {
3543 wakaba 1.56 %HTMLProseContentChecker, ## NOTE: Flow* [WF2]
3544     ## TODO: form in form is allowed in XML [WF2]
3545 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3546     check_attrs => $GetHTMLAttrsChecker->({
3547 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3548 wakaba 1.52 'accept-charset' => $AttrCheckerNotImplemented, ## TODO: Charsets
3549     action => $HTMLURIAttrChecker, ## TODO: "User agent behavior for a value other than HTTP URI is undefined" [HTML4]
3550 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3551     enctype => $HTMLIMTAttrChecker, ## TODO: "multipart/form-data" should be used when type=file is used [HTML4] ## TODO: MUST NOT parameter [WF2]
3552     method => $GetHTMLEnumeratedAttrChecker->({
3553     get => 1, post => 1, put => 1, delete => 1,
3554     }),
3555 wakaba 1.52 ## NOTE: "get" SHOULD be used for idempotent submittion,
3556     ## "post" SHOULD be used otherwise [HTML4]. This cannot be tested.
3557     name => sub { }, # CDATA in HTML4 ## TODO: must be same as |id| (informative!) [XHTML10]
3558 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
3559     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3560 wakaba 1.52 target => $HTMLTargetAttrChecker,
3561     ## TODO: Warn for combination whose behavior is not defined.
3562     }, {
3563     %HTMLAttrStatus,
3564     %HTMLM12NCommonAttrStatus,
3565 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3566 wakaba 1.52 'accept-charset' => FEATURE_M12N10_REC,
3567 wakaba 1.56 action => FEATURE_WF2 | FEATURE_M12N10_REC,
3568     data => FEATURE_WF2,
3569     enctype => FEATURE_WF2 | FEATURE_M12N10_REC,
3570 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3571 wakaba 1.56 method => FEATURE_WF2 | FEATURE_M12N10_REC,
3572 wakaba 1.52 name => FEATURE_M12N10_REC_DEPRECATED,
3573 wakaba 1.56 onreceived => FEATURE_WF2,
3574 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3575     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3576 wakaba 1.56 replace => FEATURE_WF2,
3577 wakaba 1.52 target => FEATURE_M12N10_REC,
3578     }),
3579     ## TODO: Tests
3580     ## TODO: Tests for <nest/> in <form>
3581     };
3582    
3583     $Element->{$HTML_NS}->{fieldset} = {
3584     %HTMLProseContentChecker, ## NOTE: legend, %Flow; ## TODO: legend
3585     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3586 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3587     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3588     ## TODO: form [WF2]
3589     }, {
3590 wakaba 1.52 %HTMLAttrStatus,
3591     %HTMLM12NCommonAttrStatus,
3592 wakaba 1.56 disabled => FEATURE_WF2,
3593     form => FEATURE_WF2,
3594 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3595     }),
3596     ## TODO: Tests
3597     ## TODO: Tests for <nest/> in <fieldset>
3598     };
3599    
3600     $Element->{$HTML_NS}->{input} = {
3601 wakaba 1.56 %HTMLEmptyChecker, ## MUST [WF2]
3602 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3603     check_attrs => $GetHTMLAttrsChecker->({
3604 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3605 wakaba 1.52 accesskey => $AttrCheckerNotImplemented, ## TODO: Character
3606     ## TODO: "Note. Authors should consider the input method of the expected reader when specifying an accesskey." [HTML4]
3607     ## "We recommend that authors include the access key in label text or wherever the access key is to apply." [HTML4]
3608 wakaba 1.56 action => $HTMLURIAttrChecker,
3609 wakaba 1.52 align => $GetHTMLEnumeratedAttrChecker->({
3610     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
3611     }),
3612     alt => sub {}, ## NOTE: Text [M12N] ## TODO: |alt| should be provided for |type=image| [HTML4]
3613     ## NOTE: HTML4 has a "should" for accessibility, which cannot be tested
3614     ## here.
3615 wakaba 1.56 autocomplete => $GetHTMLEnumeratedAttrChecker->({on => 1, off => 1}),
3616     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3617 wakaba 1.52 checked => $GetHTMLBooleanAttrChecker->('checked'),
3618     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3619 wakaba 1.56 enctype => $HTMLIMTAttrChecker,
3620     ## TODO: form [WF2]
3621     ## TODO: inputmode [WF2]
3622 wakaba 1.52 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
3623 wakaba 1.56 ## TODO: list [WF2]
3624     ## TODO: max [WF2]
3625 wakaba 1.52 maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3626 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
3627     get => 1, post => 1, put => 1, delete => 1,
3628     }),
3629     ## TODO: min [WF2]
3630 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3631     readonly => $GetHTMLBooleanAttrChecker->('readonly'),
3632 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3633     required => $GetHTMLBooleanAttrChecker->('required'),
3634 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3635     src => $HTMLURIAttrChecker,
3636 wakaba 1.56 ## TODO: step [WF2]
3637     target => $HTMLTargetAttrChecker,
3638     ## TODO: template
3639 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
3640     text => 1, password => 1, checkbox => 1, radio => 1, submit => 1,
3641     reset => 1, file => 1, hidden => 1, image => 1, button => 1,
3642 wakaba 1.56 ## [WF2]
3643     datatime => 1, 'datetime-local' => 1, date => 1, month => 1, week => 1,
3644     time => 1, number => 1, range => 1, email => 1, url => 1,
3645     add => 1, remove => 1, 'move-up' => 1, 'move-down' => 1,
3646 wakaba 1.52 }),
3647     usemap => $HTMLUsemapAttrChecker,
3648 wakaba 1.56 value => sub {}, ## NOTE: CDATA [M12N] ## TODO: "optional except when the type attribute has the value "radio" or "checkbox"" [HTML4] ## TODO: constraints [WF2]
3649     ## TODO: "authors should ensure that in each set of radio buttons that one is initially "on"." [HTML4] [WF2]
3650 wakaba 1.52 }, {
3651     %HTMLAttrStatus,
3652     %HTMLM12NCommonAttrStatus,
3653 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3654 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
3655 wakaba 1.56 action => FEATURE_WF2,
3656 wakaba 1.52 align => FEATURE_M12N10_REC_DEPRECATED,
3657     alt => FEATURE_M12N10_REC,
3658 wakaba 1.56 autocomplete => FEATURE_WF2,
3659     autofocus => FEATURE_WF2,
3660 wakaba 1.52 checked => FEATURE_M12N10_REC,
3661     datafld => FEATURE_HTML4_REC_RESERVED,
3662     dataformatas => FEATURE_HTML4_REC_RESERVED,
3663     datasrc => FEATURE_HTML4_REC_RESERVED,
3664 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3665     form => FEATURE_WF2,
3666     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
3667 wakaba 1.52 ismap => FEATURE_M12N10_REC,
3668     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3669 wakaba 1.56 list => FEATURE_WF2,
3670     max => FEATURE_WF2,
3671     maxlength => FEATURE_WF2 | FEATURE_M12N10_REC,
3672     method => FEATURE_WF2,
3673     min => FEATURE_WF2,
3674 wakaba 1.52 name => FEATURE_M12N10_REC,
3675     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3676     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3677     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3678     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3679 wakaba 1.56 readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
3680     required => FEATURE_WF2,
3681     size => FEATURE_WF2_DEPRECATED | FEATURE_M12N10_REC,
3682 wakaba 1.52 src => FEATURE_M12N10_REC,
3683 wakaba 1.56 step => FEATURE_WF2,
3684 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3685 wakaba 1.56 template => FEATURE_WF2,
3686 wakaba 1.52 type => FEATURE_M12N10_REC,
3687     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
3688     value => FEATURE_M12N10_REC,
3689     }),
3690     ## TODO: Tests
3691     ## TODO: Tests for <nest/> in <input>
3692     };
3693    
3694 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
3695    
3696 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
3697     %HTMLProseContentChecker, ## NOTE: %Flow; - something [XHTML10]
3698     ## TODO: -A|%formctrl;|form|fieldset [HTML4]
3699     ## TODO: image map (img) in |button| is "illegal" [HTML4].
3700     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3701     check_attrs => $GetHTMLAttrsChecker->({
3702     accesskey => $AttrCheckerNotImplemented, ## TODO: Character
3703 wakaba 1.56 action => $HTMLURIAttrChecker,
3704     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3705 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3706 wakaba 1.56 ## TODO: form [WF2]
3707     method => $GetHTMLEnumeratedAttrChecker->({
3708     get => 1, post => 1, put => 1, delete => 1,
3709     }),
3710 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3711 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
3712     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3713     target => $HTMLTargetAttrChecker,
3714     ## TODO: template [WF2]
3715 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
3716     button => 1, submit => 1, reset => 1,
3717     }),
3718     value => sub {}, ## NOTE: CDATA [M12N]
3719     }, {
3720     %HTMLAttrStatus,
3721     %HTMLM12NCommonAttrStatus,
3722     accesskey => FEATURE_M12N10_REC,
3723 wakaba 1.56 action => FEATURE_WF2,
3724     autofocus => FEATURE_WF2,
3725 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
3726     dataformatas => FEATURE_HTML4_REC_RESERVED,
3727     datasrc => FEATURE_HTML4_REC_RESERVED,
3728 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3729     enctype => FEATURE_WF2,
3730     form => FEATURE_WF2,
3731 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3732 wakaba 1.56 method => FEATURE_WF2,
3733 wakaba 1.52 name => FEATURE_M12N10_REC,
3734     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3735     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3736 wakaba 1.56 oninvalid => FEATURE_WF2,
3737     replace => FEATURE_WF2,
3738 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3739 wakaba 1.56 target => FEATURE_WF2,
3740     template => FEATURE_WF2,
3741 wakaba 1.52 type => FEATURE_M12N10_REC,
3742     value => FEATURE_M12N10_REC,
3743     }),
3744     ## TODO: Tests
3745     ## TODO: Tests for <nest/> in <button>
3746     };
3747    
3748     $Element->{$HTML_NS}->{label} = {
3749     %HTMLPhrasingContentChecker, ## NOTE: %Inline - label [XHTML10] ## TODO: -label
3750 wakaba 1.56 ## TODO: At most one form control [WF2]
3751 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3752     check_attrs => $GetHTMLAttrsChecker->({
3753     accesskey => $AttrCheckerNotImplemented, ## TODO: Charcter
3754     for => $AttrCheckerNotImplemented, ## TODO: IDREF ## TODO: Must be |id| of control [HTML4] ## TODO: Or, "may only contain one control element"
3755     }, {
3756     %HTMLAttrStatus,
3757     %HTMLM12NCommonAttrStatus,
3758 wakaba 1.56 accesskey => FEATURE_WF2 | FEATURE_M12N10_REC,
3759 wakaba 1.52 for => FEATURE_M12N10_REC,
3760     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3761     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3762     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3763     }),
3764     ## TODO: Tests
3765     ## TODO: Tests for <nest/> in <label>
3766     };
3767    
3768     $Element->{$HTML_NS}->{select} = {
3769 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (optgroup|option)* [HTML4] + [WF2] ## TODO: SHOULD avoid empty and visible [WF2]
3770 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
3771     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
3772     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3773 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
3774 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
3775 wakaba 1.56 ## TODO: accesskey [WF2]
3776     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3777 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3778 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3779     ## TODO: form [WF2]
3780 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3781     name => sub {}, ## NOTE: CDATA [M12N]
3782 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
3783     ## TODO: pattern [WF2] ## TODO: |title| semantics
3784 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3785     }, {
3786     %HTMLAttrStatus,
3787     %HTMLM12NCommonAttrStatus,
3788 wakaba 1.56 accesskey => FEATURE_WF2,
3789     autofocus => FEATURE_WF2,
3790     data => FEATURE_WF2,
3791 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
3792     dataformatas => FEATURE_HTML4_REC_RESERVED,
3793     datasrc => FEATURE_HTML4_REC_RESERVED,
3794 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3795     form => FEATURE_WF2,
3796 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3797     multiple => FEATURE_M12N10_REC,
3798     name => FEATURE_M12N10_REC,
3799     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3800     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3801     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3802 wakaba 1.56 oninvalid => FEATURE_WF2,
3803     pattern => FEATURE_WF2,
3804 wakaba 1.52 size => FEATURE_M12N10_REC,
3805     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3806     }),
3807     ## TODO: Tests
3808     ## TODO: Tests for <nest/> in <select>
3809     };
3810 wakaba 1.1
3811 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
3812 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (transparent | option)*
3813     ## TODO: |option| child MUST be empty [WF2]
3814 wakaba 1.52 status => FEATURE_WF2,
3815 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3816     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3817     }, {
3818 wakaba 1.52 %HTMLAttrStatus,
3819 wakaba 1.56 data => FEATURE_WF2,
3820 wakaba 1.52 }),
3821     ## TODO: Tests
3822     ## TODO: Tests for <nest/> in <datalist>
3823     };
3824 wakaba 1.49
3825 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
3826 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (option|optgroup)* [HTML4] + [WF2] SHOULD avoid empty and visible [WF2]
3827 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3828     check_attrs => $GetHTMLAttrsChecker->({
3829     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3830     label => sub {}, ## NOTE: Text [M12N] ## TODO: required
3831     }, {
3832     %HTMLAttrStatus,
3833     %HTMLM12NCommonAttrStatus,
3834 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3835 wakaba 1.52 label => FEATURE_M12N10_REC,
3836     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3837     }),
3838     ## TODO: Tests
3839     ## TODO: Tests for <nest/> in <optgroup>
3840     };
3841    
3842     $Element->{$HTML_NS}->{option} = {
3843     %HTMLTextChecker,
3844     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3845     check_attrs => $GetHTMLAttrsChecker->({
3846     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3847     label => sub {}, ## NOTE: Text [M12N]
3848     selected => $GetHTMLBooleanAttrChecker->('selected'),
3849     value => sub {}, ## NOTE: CDATA [M12N]
3850     }, {
3851     %HTMLAttrStatus,
3852     %HTMLM12NCommonAttrStatus,
3853 wakaba 1.56 disabled => FEATURE_WF2, FEATURE_M12N10_REC,
3854 wakaba 1.52 label => FEATURE_M12N10_REC,
3855     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3856     selected => FEATURE_M12N10_REC,
3857     value => FEATURE_M12N10_REC,
3858     }),
3859     ## TODO: Tests
3860     ## TODO: Tests for <nest/> in <option>
3861     };
3862 wakaba 1.49
3863 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
3864     %HTMLTextChecker,
3865     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3866     check_attrs => $GetHTMLAttrsChecker->({
3867 wakaba 1.56 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type
3868 wakaba 1.52 accesskey => $AttrCheckerNotImplemented, ## TODO: Character
3869 wakaba 1.56 autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3870     cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## TODO: SHOULD if wrap=hard [WF2]
3871 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3872 wakaba 1.56 ## TODO: form [WF2]
3873     ## TODO: inputmode [WF2]
3874     maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3875 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3876 wakaba 1.56 ## TODO: pattern [WF2] ## TODO: |title| special semantics
3877 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
3878 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
3879     rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3880     oninvalid => $HTMLEventHandlerAttrChecker,
3881     wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
3882 wakaba 1.52 }, {
3883     %HTMLAttrStatus,
3884     %HTMLM12NCommonAttrStatus,
3885 wakaba 1.56 accept => FEATURE_WF2,
3886 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
3887 wakaba 1.56 autofocus => FEATURE_WF2,
3888 wakaba 1.52 cols => FEATURE_M12N10_REC,
3889     datafld => FEATURE_HTML4_REC_RESERVED,
3890 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
3891     datasrc => FEATURE_HTML4_REC_RESERVED,
3892 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3893     form => FEATURE_WF2,
3894     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
3895 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3896 wakaba 1.56 maxlength => FEATURE_WF2,
3897 wakaba 1.52 name => FEATURE_M12N10_REC,
3898     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3899     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3900     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3901 wakaba 1.56 oninvalid => FEATURE_WF2,
3902 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3903 wakaba 1.56 pattern => FEATURE_WF2,
3904     readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
3905     required => FEATURE_WF2,
3906 wakaba 1.52 rows => FEATURE_M12N10_REC,
3907     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3908 wakaba 1.56 wrap => FEATURE_WF2,
3909 wakaba 1.52 }),
3910     ## TODO: Tests
3911     ## TODO: Tests for <nest/> in <textarea>
3912     };
3913 wakaba 1.49
3914 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
3915 wakaba 1.56 %HTMLPhrasingContentChecker, ## Inline [WF2]
3916 wakaba 1.52 status => FEATURE_WF2,
3917 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3918     ## TODO: for [WF2]
3919     ## TODO: form [WF2]
3920     ## TODO: name [WF2]
3921     ## onformchange[WF2]
3922     ## onforminput[WF2]
3923     }, {
3924 wakaba 1.52 %HTMLAttrStatus,
3925 wakaba 1.56 for => FEATURE_WF2,
3926     form => FEATURE_WF2,
3927     name => FEATURE_WF2,
3928     onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
3929     onformchange => FEATURE_WF2,
3930     onforminput => FEATURE_WF2,
3931 wakaba 1.52 }),
3932     ## TODO: Tests
3933     ## TODO: Tests for <nest/> in <output>
3934 wakaba 1.56 ## NOTE: "The output element should be used when ..." [WF2]
3935 wakaba 1.52 };
3936    
3937     ## TODO: repetition template
3938    
3939     $Element->{$HTML_NS}->{isindex} = {
3940     %HTMLEmptyChecker,
3941 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
3942     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
3943 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
3944     prompt => sub {}, ## NOTE: Text [M12N]
3945     }, {
3946     %HTMLAttrStatus,
3947     class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3948     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3949     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3950     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3951     prompt => FEATURE_M12N10_REC_DEPRECATED,
3952     style => FEATURE_XHTML10_REC,
3953     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3954     }),
3955     ## TODO: Tests
3956     ## TODO: Tests for <nest/> in <isindex>
3957     };
3958 wakaba 1.49
3959 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
3960 wakaba 1.40 %HTMLChecker,
3961 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3962 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3963 wakaba 1.1 src => $HTMLURIAttrChecker,
3964     defer => $GetHTMLBooleanAttrChecker->('defer'),
3965     async => $GetHTMLBooleanAttrChecker->('async'),
3966     type => $HTMLIMTAttrChecker,
3967 wakaba 1.49 }, {
3968     %HTMLAttrStatus,
3969     %HTMLM12NCommonAttrStatus,
3970 wakaba 1.50 async => FEATURE_HTML5_DEFAULT,
3971 wakaba 1.49 charset => FEATURE_M12N10_REC,
3972 wakaba 1.50 defer => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3973 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
3974     for => FEATURE_HTML4_REC_RESERVED,
3975 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3976 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
3977 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3978     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3979 wakaba 1.9 }),
3980 wakaba 1.40 check_start => sub {
3981     my ($self, $item, $element_state) = @_;
3982 wakaba 1.1
3983 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
3984     $element_state->{must_be_empty} = 1;
3985 wakaba 1.1 } else {
3986     ## NOTE: No content model conformance in HTML5 spec.
3987 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
3988     my $language = $item->{node}->get_attribute_ns (undef, 'language');
3989 wakaba 1.1 if ((defined $type and $type eq '') or
3990     (defined $language and $language eq '')) {
3991     $type = 'text/javascript';
3992     } elsif (defined $type) {
3993     #
3994     } elsif (defined $language) {
3995     $type = 'text/' . $language;
3996     } else {
3997     $type = 'text/javascript';
3998     }
3999 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
4000     }
4001     },
4002     check_child_element => sub {
4003     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4004     $child_is_transparent, $element_state) = @_;
4005     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4006     $self->{onerror}->(node => $child_el,
4007     type => 'element not allowed:minus',
4008     level => $self->{must_level});
4009     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4010     #
4011     } else {
4012     if ($element_state->{must_be_empty}) {
4013     $self->{onerror}->(node => $child_el,
4014     type => 'element not allowed');
4015     }
4016     }
4017     },
4018     check_child_text => sub {
4019     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4020     if ($has_significant and
4021     $element_state->{must_be_empty}) {
4022     $self->{onerror}->(node => $child_node,
4023     type => 'character not allowed');
4024     }
4025     },
4026     check_end => sub {
4027     my ($self, $item, $element_state) = @_;
4028     unless ($element_state->{must_be_empty}) {
4029     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
4030     type => 'script:'.$element_state->{script_type});
4031     ## TODO: text/javascript support
4032    
4033     $HTMLChecker{check_end}->(@_);
4034 wakaba 1.1 }
4035     },
4036     };
4037 wakaba 1.25 ## ISSUE: Significant check and text child node
4038 wakaba 1.1
4039     ## NOTE: When script is disabled.
4040     $Element->{$HTML_NS}->{noscript} = {
4041 wakaba 1.40 %HTMLTransparentChecker,
4042 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4043     check_attrs => $GetHTMLAttrsChecker->({}, {
4044     %HTMLAttrStatus,
4045     %HTMLM12NCommonAttrStatus,
4046 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4047 wakaba 1.49 }),
4048 wakaba 1.40 check_start => sub {
4049     my ($self, $item, $element_state) = @_;
4050 wakaba 1.3
4051 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
4052     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
4053 wakaba 1.3 }
4054    
4055 wakaba 1.40 unless ($self->{flag}->{in_head}) {
4056     $self->_add_minus_elements ($element_state,
4057     {$HTML_NS => {noscript => 1}});
4058     }
4059 wakaba 1.3 },
4060 wakaba 1.40 check_child_element => sub {
4061     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4062     $child_is_transparent, $element_state) = @_;
4063     if ($self->{flag}->{in_head}) {
4064     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4065     $self->{onerror}->(node => $child_el,
4066     type => 'element not allowed:minus',
4067     level => $self->{must_level});
4068     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4069     #
4070     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
4071     #
4072     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
4073     if ($child_el->has_attribute_ns (undef, 'scoped')) {
4074     $self->{onerror}->(node => $child_el,
4075     type => 'element not allowed:head noscript',
4076     level => $self->{must_level});
4077     }
4078     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
4079 wakaba 1.47 my $http_equiv_attr
4080     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
4081     if ($http_equiv_attr) {
4082     ## TODO: case
4083     if (lc $http_equiv_attr->value eq 'content-type') {
4084 wakaba 1.40 $self->{onerror}->(node => $child_el,
4085 wakaba 1.34 type => 'element not allowed:head noscript',
4086     level => $self->{must_level});
4087 wakaba 1.47 } else {
4088     #
4089 wakaba 1.3 }
4090 wakaba 1.47 } else {
4091     $self->{onerror}->(node => $child_el,
4092     type => 'element not allowed:head noscript',
4093     level => $self->{must_level});
4094 wakaba 1.3 }
4095 wakaba 1.40 } else {
4096     $self->{onerror}->(node => $child_el,
4097     type => 'element not allowed:head noscript',
4098     level => $self->{must_level});
4099     }
4100     } else {
4101     $HTMLTransparentChecker{check_child_element}->(@_);
4102     }
4103     },
4104     check_child_text => sub {
4105     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4106     if ($self->{flag}->{in_head}) {
4107     if ($has_significant) {
4108     $self->{onerror}->(node => $child_node,
4109     type => 'character not allowed');
4110 wakaba 1.3 }
4111     } else {
4112 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
4113     }
4114     },
4115     check_end => sub {
4116     my ($self, $item, $element_state) = @_;
4117     $self->_remove_minus_elements ($element_state);
4118     if ($self->{flag}->{in_head}) {
4119     $HTMLChecker{check_end}->(@_);
4120     } else {
4121     $HTMLPhrasingContentChecker{check_end}->(@_);
4122 wakaba 1.3 }
4123 wakaba 1.1 },
4124     };
4125 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
4126 wakaba 1.1
4127     $Element->{$HTML_NS}->{'event-source'} = {
4128 wakaba 1.40 %HTMLEmptyChecker,
4129 wakaba 1.48 status => FEATURE_HTML5_LC,
4130 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4131 wakaba 1.1 src => $HTMLURIAttrChecker,
4132 wakaba 1.50 }, {
4133     %HTMLAttrStatus,
4134     src => FEATURE_HTML5_LC,
4135 wakaba 1.1 }),
4136     };
4137    
4138     $Element->{$HTML_NS}->{details} = {
4139 wakaba 1.40 %HTMLProseContentChecker,
4140 wakaba 1.48 status => FEATURE_HTML5_WD,
4141 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4142 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
4143 wakaba 1.50 }, {
4144     %HTMLAttrStatus,
4145 wakaba 1.59 open => FEATURE_HTML5_WD,
4146 wakaba 1.1 }),
4147 wakaba 1.43 ## NOTE: legend, Prose
4148     check_child_element => sub {
4149     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4150     $child_is_transparent, $element_state) = @_;
4151     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4152     $self->{onerror}->(node => $child_el,
4153     type => 'element not allowed:minus',
4154     level => $self->{must_level});
4155     $element_state->{has_non_legend} = 1;
4156     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4157     #
4158     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4159     if ($element_state->{has_non_legend}) {
4160     $self->{onerror}->(node => $child_el,
4161     type => 'element not allowed:details legend',
4162     level => $self->{must_level});
4163     }
4164     $element_state->{has_legend} = 1;
4165     $element_state->{has_non_legend} = 1;
4166     } else {
4167     $HTMLProseContentChecker{check_child_element}->(@_);
4168     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4169     ## ISSUE: |<details><object><legend>xx</legend></object>..</details>|
4170     ## is conforming?
4171     }
4172     },
4173     check_child_text => sub {
4174     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4175     if ($has_significant) {
4176     $element_state->{has_non_legend} = 1;
4177     }
4178     },
4179     check_end => sub {
4180     my ($self, $item, $element_state) = @_;
4181 wakaba 1.1
4182 wakaba 1.43 unless ($element_state->{has_legend}) {
4183     $self->{onerror}->(node => $item->{node},
4184     type => 'element missing:legend',
4185     level => $self->{must_level});
4186     }
4187    
4188     $HTMLProseContentChecker{check_end}->(@_);
4189     ## ISSUE: |<details><legend>aa</legend></details>| error?
4190 wakaba 1.1 },
4191     };
4192    
4193     $Element->{$HTML_NS}->{datagrid} = {
4194 wakaba 1.40 %HTMLProseContentChecker,
4195 wakaba 1.48 status => FEATURE_HTML5_WD,
4196 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4197 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4198     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
4199 wakaba 1.50 }, {
4200     %HTMLAttrStatus,
4201     disabled => FEATURE_HTML5_WD,
4202     multiple => FEATURE_HTML5_WD,
4203 wakaba 1.1 }),
4204 wakaba 1.40 check_start => sub {
4205     my ($self, $item, $element_state) = @_;
4206 wakaba 1.1
4207 wakaba 1.40 $self->_add_minus_elements ($element_state,
4208     {$HTML_NS => {a => 1, datagrid => 1}});
4209     $element_state->{phase} = 'any';
4210     },
4211     ## Prose -(text* table Prose*) | table | select | datalist | Empty
4212     check_child_element => sub {
4213     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4214     $child_is_transparent, $element_state) = @_;
4215     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4216     $self->{onerror}->(node => $child_el,
4217     type => 'element not allowed:minus',
4218     level => $self->{must_level});
4219     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4220     #
4221     } elsif ($element_state->{phase} eq 'prose') {
4222     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4223 wakaba 1.44 if (not $element_state->{has_element} and
4224 wakaba 1.40 $child_nsuri eq $HTML_NS and
4225     $child_ln eq 'table') {
4226     $self->{onerror}->(node => $child_el,
4227     type => 'element not allowed');
4228     } else {
4229 wakaba 1.8 #
4230 wakaba 1.1 }
4231 wakaba 1.40 } else {
4232     $self->{onerror}->(node => $child_el,
4233     type => 'element not allowed');
4234     }
4235 wakaba 1.43 $element_state->{has_element} = 1;
4236 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
4237     if ($child_nsuri eq $HTML_NS and
4238     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
4239     $element_state->{phase} = 'none';
4240     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4241     $element_state->{has_element} = 1;
4242     $element_state->{phase} = 'prose';
4243 wakaba 1.43 ## TODO: transparent?
4244 wakaba 1.40 } else {
4245     $self->{onerror}->(node => $child_el,
4246     type => 'element not allowed');
4247     }
4248     } elsif ($element_state->{phase} eq 'none') {
4249     $self->{onerror}->(node => $child_el,
4250     type => 'element not allowed');
4251     } else {
4252     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
4253     }
4254     },
4255     check_child_text => sub {
4256     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4257     if ($has_significant) {
4258     if ($element_state->{phase} eq 'prose') {
4259     #
4260     } elsif ($element_state->{phase} eq 'any') {
4261     $element_state->{phase} = 'prose';
4262     } else {
4263     $self->{onerror}->(node => $child_node,
4264     type => 'character not allowed');
4265 wakaba 1.1 }
4266     }
4267 wakaba 1.40 },
4268     check_end => sub {
4269     my ($self, $item, $element_state) = @_;
4270     $self->_remove_minus_elements ($element_state);
4271 wakaba 1.1
4272 wakaba 1.40 if ($element_state->{phase} eq 'none') {
4273     $HTMLChecker{check_end}->(@_);
4274     } else {
4275     $HTMLPhrasingContentChecker{check_end}->(@_);
4276     }
4277     },
4278 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
4279     ## are not disallowed (assuming that form control contents are also
4280     ## prose content).
4281 wakaba 1.1 };
4282    
4283     $Element->{$HTML_NS}->{command} = {
4284 wakaba 1.40 %HTMLEmptyChecker,
4285 wakaba 1.48 status => FEATURE_HTML5_WD,
4286 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4287 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
4288     default => $GetHTMLBooleanAttrChecker->('default'),
4289     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4290     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
4291     icon => $HTMLURIAttrChecker,
4292     label => sub { }, ## NOTE: No conformance creteria
4293     radiogroup => sub { }, ## NOTE: No conformance creteria
4294     type => sub {
4295     my ($self, $attr) = @_;
4296     my $value = $attr->value;
4297     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
4298     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
4299     }
4300     },
4301 wakaba 1.50 }, {
4302     %HTMLAttrStatus,
4303     checked => FEATURE_HTML5_WD,
4304     default => FEATURE_HTML5_WD,
4305     disabled => FEATURE_HTML5_WD,
4306     hidden => FEATURE_HTML5_WD,
4307     icon => FEATURE_HTML5_WD,
4308     label => FEATURE_HTML5_WD,
4309     radiogroup => FEATURE_HTML5_WD,
4310     type => FEATURE_HTML5_WD,
4311 wakaba 1.1 }),
4312     };
4313    
4314     $Element->{$HTML_NS}->{menu} = {
4315 wakaba 1.40 %HTMLPhrasingContentChecker,
4316 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
4317     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
4318     ## NOTE: We don't want any |menu| element warned as deprecated.
4319 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4320 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
4321     id => sub {
4322     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
4323     my ($self, $attr) = @_;
4324     my $value = $attr->value;
4325     if (length $value > 0) {
4326     if ($self->{id}->{$value}) {
4327     $self->{onerror}->(node => $attr, type => 'duplicate ID');
4328     push @{$self->{id}->{$value}}, $attr;
4329     } else {
4330     $self->{id}->{$value} = [$attr];
4331     }
4332     } else {
4333     ## NOTE: MUST contain at least one character
4334     $self->{onerror}->(node => $attr, type => 'empty attribute value');
4335     }
4336     if ($value =~ /[\x09-\x0D\x20]/) {
4337     $self->{onerror}->(node => $attr, type => 'space in ID');
4338     }
4339     $self->{menu}->{$value} ||= $attr;
4340     ## ISSUE: <menu id=""><p contextmenu=""> match?
4341     },
4342     label => sub { }, ## NOTE: No conformance creteria
4343     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
4344 wakaba 1.49 }, {
4345     %HTMLAttrStatus,
4346     %HTMLM12NCommonAttrStatus,
4347 wakaba 1.50 autosubmit => FEATURE_HTML5_WD,
4348 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
4349 wakaba 1.50 label => FEATURE_HTML5_WD,
4350     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4351     type => FEATURE_HTML5_WD,
4352 wakaba 1.1 }),
4353 wakaba 1.40 check_start => sub {
4354     my ($self, $item, $element_state) = @_;
4355     $element_state->{phase} = 'li or phrasing';
4356     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
4357     $self->{flag}->{in_menu} = 1;
4358     },
4359     check_child_element => sub {
4360     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4361     $child_is_transparent, $element_state) = @_;
4362     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4363     $self->{onerror}->(node => $child_el,
4364     type => 'element not allowed:minus',
4365     level => $self->{must_level});
4366     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4367     #
4368     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
4369     if ($element_state->{phase} eq 'li') {
4370     #
4371     } elsif ($element_state->{phase} eq 'li or phrasing') {
4372     $element_state->{phase} = 'li';
4373     } else {
4374     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4375     }
4376     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4377     if ($element_state->{phase} eq 'phrasing') {
4378     #
4379     } elsif ($element_state->{phase} eq 'li or phrasing') {
4380     $element_state->{phase} = 'phrasing';
4381     } else {
4382     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4383     }
4384     } else {
4385     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4386     }
4387     },
4388     check_child_text => sub {
4389     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4390     if ($has_significant) {
4391     if ($element_state->{phase} eq 'phrasing') {
4392     #
4393     } elsif ($element_state->{phase} eq 'li or phrasing') {
4394     $element_state->{phase} = 'phrasing';
4395     } else {
4396     $self->{onerror}->(node => $child_node,
4397     type => 'character not allowed');
4398 wakaba 1.1 }
4399     }
4400 wakaba 1.40 },
4401     check_end => sub {
4402     my ($self, $item, $element_state) = @_;
4403     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
4404    
4405     if ($element_state->{phase} eq 'li') {
4406     $HTMLChecker{check_end}->(@_);
4407     } else { # 'phrasing' or 'li or phrasing'
4408     $HTMLPhrasingContentChecker{check_end}->(@_);
4409 wakaba 1.1 }
4410     },
4411 wakaba 1.8 };
4412    
4413     $Element->{$HTML_NS}->{datatemplate} = {
4414 wakaba 1.40 %HTMLChecker,
4415 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4416 wakaba 1.40 check_child_element => sub {
4417     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4418     $child_is_transparent, $element_state) = @_;
4419     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4420     $self->{onerror}->(node => $child_el,
4421     type => 'element not allowed:minus',
4422     level => $self->{must_level});
4423     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4424     #
4425     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
4426     #
4427     } else {
4428     $self->{onerror}->(node => $child_el,
4429     type => 'element not allowed:datatemplate');
4430     }
4431     },
4432     check_child_text => sub {
4433     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4434     if ($has_significant) {
4435     $self->{onerror}->(node => $child_node, type => 'character not allowed');
4436 wakaba 1.8 }
4437     },
4438     is_xml_root => 1,
4439     };
4440    
4441     $Element->{$HTML_NS}->{rule} = {
4442 wakaba 1.40 %HTMLChecker,
4443 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4444 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4445 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
4446 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
4447 wakaba 1.50 }, {
4448     %HTMLAttrStatus,
4449     condition => FEATURE_HTML5_AT_RISK,
4450     mode => FEATURE_HTML5_AT_RISK,
4451 wakaba 1.8 }),
4452 wakaba 1.40 check_start => sub {
4453     my ($self, $item, $element_state) = @_;
4454     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
4455     },
4456     check_child_element => sub { },
4457     check_child_text => sub { },
4458     check_end => sub {
4459     my ($self, $item, $element_state) = @_;
4460     $self->_remove_plus_elements ($element_state);
4461     $HTMLChecker{check_end}->(@_);
4462 wakaba 1.8 },
4463     ## NOTE: "MAY be anything that, when the parent |datatemplate|
4464     ## is applied to some conforming data, results in a conforming DOM tree.":
4465     ## We don't check against this.
4466     };
4467    
4468     $Element->{$HTML_NS}->{nest} = {
4469 wakaba 1.40 %HTMLEmptyChecker,
4470 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4471 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4472 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
4473     mode => sub {
4474     my ($self, $attr) = @_;
4475     my $value = $attr->value;
4476     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
4477     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
4478     }
4479     },
4480 wakaba 1.50 }, {
4481     %HTMLAttrStatus,
4482     filter => FEATURE_HTML5_AT_RISK,
4483     mode => FEATURE_HTML5_AT_RISK,
4484 wakaba 1.8 }),
4485 wakaba 1.1 };
4486    
4487     $Element->{$HTML_NS}->{legend} = {
4488 wakaba 1.40 %HTMLPhrasingContentChecker,
4489 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4490 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4491     # accesskey => $AttrCheckerNotImplemented, ## TODO: Character ## TODO: This attribute is not part of HTML5
4492     # align => $GetHTMLEnumeratedAttrChecker->({
4493     # top => 1, bottom => 1, left => 1, right => 1,
4494     # }),
4495     }, {
4496 wakaba 1.49 %HTMLAttrStatus,
4497     %HTMLM12NCommonAttrStatus,
4498     accesskey => FEATURE_M12N10_REC,
4499     align => FEATURE_M12N10_REC_DEPRECATED,
4500 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4501 wakaba 1.49 }),
4502 wakaba 1.1 };
4503    
4504     $Element->{$HTML_NS}->{div} = {
4505 wakaba 1.40 %HTMLProseContentChecker,
4506 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4507     check_attrs => $GetHTMLAttrsChecker->({}, {
4508     %HTMLAttrStatus,
4509     %HTMLM12NCommonAttrStatus,
4510     align => FEATURE_M12N10_REC_DEPRECATED,
4511     datafld => FEATURE_HTML4_REC_RESERVED,
4512     dataformatas => FEATURE_HTML4_REC_RESERVED,
4513     datasrc => FEATURE_HTML4_REC_RESERVED,
4514 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4515 wakaba 1.49 }),
4516 wakaba 1.1 };
4517    
4518     $Element->{$HTML_NS}->{font} = {
4519 wakaba 1.40 %HTMLTransparentChecker,
4520 wakaba 1.50 status => FEATURE_HTML5_AT_RISK | FEATURE_M12N10_REC_DEPRECATED,
4521 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({ ## TODO
4522     }, {
4523     %HTMLAttrStatus,
4524 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4525 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
4526 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4527 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
4528 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4529     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4530 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
4531 wakaba 1.50 style => FEATURE_HTML5_AT_RISK | FEATURE_XHTML10_REC,
4532     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4533 wakaba 1.49 }),
4534 wakaba 1.1 };
4535 wakaba 1.49
4536     ## TODO: frameset FEATURE_M12N10_REC
4537     ## class title id cols rows onload onunload style(x10)
4538     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
4539     ## noframes Common, lang(xhtml10)
4540    
4541     ## TODO: deprecated:
4542     ## basefont color face id size
4543     ## center Common lang(xhtml10)
4544     ## dir Common compat lang(xhtml10)
4545    
4546     ## TODO: CR: ruby rb rt rp rbc rtc @rbspan
4547 wakaba 1.56
4548     =pod
4549    
4550     WF2: Documents MUST comply to [CHARMOD].
4551     WF2: Vencor extensions MUST NOT be used.
4552    
4553     =cut
4554 wakaba 1.1
4555     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
4556    
4557     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24