/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.58 - (hide annotations) (download)
Sat Mar 1 00:42:53 2008 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.57: +34 -11 lines
++ whatpm/t/ChangeLog	1 Mar 2008 00:26:59 -0000
2008-03-01  Wakaba  <wakaba@suika.fam.cx>

	* tokenizer-test-1.test: Updated (HTML5 recision 1286).

	* content-model-2.dat: Updated (HTML5 revision 1275).

++ whatpm/Whatpm/ChangeLog	1 Mar 2008 00:19:36 -0000
2008-03-01  Wakaba  <wakaba@suika.fam.cx>

	* _NamedEntityList.pm: Updated (HTML5 revision 1286).

	* HTML.pm.src: |charset| in |content| attribute is
	case-insensitive (HTML5 revision 1270).

++ whatpm/Whatpm/HTML/ChangeLog	1 Mar 2008 00:07:44 -0000
2008-03-01  Wakaba  <wakaba@suika.fam.cx>

	* Serializer.pm (get_inner_html): Escape NBSP (HTML5 revision
	1277).

++ whatpm/Whatpm/ContentChecker/ChangeLog	29 Feb 2008 23:29:54 -0000
2008-03-01  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Sectioning root category added.  |blockquote|
	is no longer a sectioning content.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.58 sub FEATURE_HTML5_ROLE () {
8     Whatpm::ContentChecker::FEATURE_STATUS_WD
9     ## TODO: svg:*/@role
10     }
11    
12 wakaba 1.54 sub FEATURE_HTML5_LC () {
13     Whatpm::ContentChecker::FEATURE_STATUS_LC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16     sub FEATURE_HTML5_AT_RISK () {
17     Whatpm::ContentChecker::FEATURE_STATUS_WD |
18     Whatpm::ContentChecker::FEATURE_ALLOWED
19     }
20     sub FEATURE_HTML5_WD () {
21     Whatpm::ContentChecker::FEATURE_STATUS_WD |
22     Whatpm::ContentChecker::FEATURE_ALLOWED
23     }
24     sub FEATURE_HTML5_FD () {
25     Whatpm::ContentChecker::FEATURE_STATUS_WD |
26     Whatpm::ContentChecker::FEATURE_ALLOWED
27     }
28     sub FEATURE_HTML5_DEFAULT () {
29     Whatpm::ContentChecker::FEATURE_STATUS_WD |
30     Whatpm::ContentChecker::FEATURE_ALLOWED
31 wakaba 1.49 }
32 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
33     ## NOTE: Was part of HTML5, but was dropped.
34 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
35     }
36 wakaba 1.54 sub FEATURE_WF2 () {
37     Whatpm::ContentChecker::FEATURE_STATUS_LC |
38     Whatpm::ContentChecker::FEATURE_ALLOWED
39     }
40 wakaba 1.56 sub FEATURE_WF2_DEPRECATED () {
41     Whatpm::ContentChecker::FEATURE_STATUS_LC
42     ## NOTE: MUST NOT be used.
43     }
44 wakaba 1.49
45 wakaba 1.58 ## TODO: RDFa LC
46    
47     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
48     ## attribute can be used- the only requirements for that matter is:
49     ## "the attribute MUST be referenced using its namespace-qualified form" (and
50     ## this is a host language conformance!).
51    
52 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
53     ## NOTE: Only additions to M12N10_REC are marked.
54     Whatpm::ContentChecker::FEATURE_STATUS_CR
55     }
56     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
57     Whatpm::ContentChecker::FEATURE_STATUS_CR |
58     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
59     }
60    
61 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
62     ## It contains a number of problems. (However, again, it's a REC!)
63 wakaba 1.54 sub FEATURE_M12N10_REC () {
64     ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process suck!
65     Whatpm::ContentChecker::FEATURE_STATUS_REC
66     }
67     sub FEATURE_M12N10_REC_DEPRECATED () {
68     Whatpm::ContentChecker::FEATURE_STATUS_REC |
69     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
70     }
71 wakaba 1.58 ## NOTE: XHTML M12N 1.1 is a LC at the time of writing and no
72     ## addition from 1.0.
73 wakaba 1.49
74     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
75     ## (second edition). Only missing attributes from M12N10 abstract
76     ## definition are added.
77 wakaba 1.54 sub FEATURE_XHTML10_REC () {
78     Whatpm::ContentChecker::FEATURE_STATUS_CR
79     }
80    
81 wakaba 1.58 ## TODO: ISO-HTML
82    
83 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
84     ## 4.01). Only missing attributes from XHTML10 are added.
85 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
86     Whatpm::ContentChecker::FEATURE_STATUS_WD
87     }
88    
89     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
90     ## rather than presentational attributes (deprecated or not deprecated).
91 wakaba 1.48
92 wakaba 1.58 ## TODO: HTML 3.2 REC
93     ## TODO: HTML 2.x RFC
94     ## TODO: HTML 2.0 RFC
95     ## TODO: Other HTML RFCs
96    
97 wakaba 1.29 ## December 2007 HTML5 Classification
98    
99     my $HTMLMetadataContent = {
100     $HTML_NS => {
101     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
102     'event-source' => 1, command => 1, datatemplate => 1,
103     ## NOTE: A |meta| with no |name| element is not allowed as
104     ## a metadata content other than |head| element.
105     meta => 1,
106 wakaba 1.56 ## NOTE: Only when empty [WF2]
107     form => 1,
108 wakaba 1.29 },
109     ## NOTE: RDF is mentioned in the HTML5 spec.
110     ## TODO: Other RDF elements?
111     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
112     };
113    
114     my $HTMLProseContent = {
115     $HTML_NS => {
116     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
117     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
118     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
119     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
120     details => 1, ## ISSUE: "Prose element" in spec.
121     datagrid => 1, ## ISSUE: "Prose element" in spec.
122     datatemplate => 1,
123     div => 1, ## ISSUE: No category in spec.
124     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
125     ## Additionally, it must be before any other element or
126     ## non-inter-element-whitespace text node.
127     style => 1,
128    
129 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
130 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
131     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
132     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
133     command => 1, font => 1,
134     a => 1,
135     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
136     ## NOTE: |area| is allowed only as a descendant of |map|.
137     area => 1,
138    
139     ins => 1, del => 1,
140    
141     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
142     menu => 1,
143    
144     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
145     canvas => 1,
146     },
147    
148     ## NOTE: Embedded
149     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
150     q<http://www.w3.org/2000/svg> => {svg => 1},
151     };
152    
153 wakaba 1.58 my $HTMLSectioningContent = {
154 wakaba 1.57 $HTML_NS => {
155     section => 1, nav => 1, article => 1, aside => 1,
156     ## NOTE: |body| is only allowed in |html| element.
157     body => 1,
158     },
159     };
160    
161 wakaba 1.58 my $HTMLSectioningRoot = {
162 wakaba 1.29 $HTML_NS => {
163 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
164 wakaba 1.29 },
165     };
166    
167     my $HTMLHeadingContent = {
168     $HTML_NS => {
169     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
170     },
171     };
172    
173     my $HTMLPhrasingContent = {
174     ## NOTE: All phrasing content is also prose content.
175     $HTML_NS => {
176 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
177 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
178     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
179     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
180     command => 1, font => 1,
181     a => 1,
182     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
183     ## NOTE: |area| is allowed only as a descendant of |map|.
184     area => 1,
185    
186     ## NOTE: Transparent.
187     ins => 1, del => 1,
188    
189     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
190     menu => 1,
191    
192     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
193     canvas => 1,
194 wakaba 1.56
195     ## NOTE: WF2
196     input => 1, ## NOTE: type=hidden
197     datalist => 1, ## NOTE: block | where |select| allowed
198 wakaba 1.29 },
199    
200     ## NOTE: Embedded
201     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
202     q<http://www.w3.org/2000/svg> => {svg => 1},
203    
204     ## NOTE: And non-inter-element-whitespace text nodes.
205     };
206    
207 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
208 wakaba 1.29
209     my $HTMLInteractiveContent = {
210     $HTML_NS => {
211     a => 1,
212 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
213 wakaba 1.29 },
214     };
215    
216 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
217     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
218    
219     ## -- Common attribute syntacx checkers
220    
221 wakaba 1.1 our $AttrChecker;
222    
223     my $GetHTMLEnumeratedAttrChecker = sub {
224     my $states = shift; # {value => conforming ? 1 : -1}
225     return sub {
226     my ($self, $attr) = @_;
227     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
228     if ($states->{$value} > 0) {
229     #
230     } elsif ($states->{$value}) {
231     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
232     } else {
233     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
234     }
235     };
236     }; # $GetHTMLEnumeratedAttrChecker
237    
238     my $GetHTMLBooleanAttrChecker = sub {
239     my $local_name = shift;
240     return sub {
241     my ($self, $attr) = @_;
242     my $value = $attr->value;
243     unless ($value eq $local_name or $value eq '') {
244     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
245     }
246     };
247     }; # $GetHTMLBooleanAttrChecker
248    
249 wakaba 1.8 ## Unordered set of space-separated tokens
250 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
251 wakaba 1.8 my ($self, $attr) = @_;
252     my %word;
253     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
254     unless ($word{$word}) {
255     $word{$word} = 1;
256     } else {
257     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
258     }
259     }
260 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
261 wakaba 1.8
262 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
263     ## whose allowed values are defined by the section on link types)
264     my $HTMLLinkTypesAttrChecker = sub {
265 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
266 wakaba 1.1 my %word;
267     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
268     unless ($word{$word}) {
269     $word{$word} = 1;
270 wakaba 1.18 } elsif ($word eq 'up') {
271     #
272 wakaba 1.1 } else {
273     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
274     }
275     }
276     ## NOTE: Case sensitive match (since HTML5 spec does not say link
277     ## types are case-insensitive and it says "The value should not
278     ## be confusingly similar to any other defined value (e.g.
279     ## differing only in case).").
280     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
281     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
282     ## values to be used conformingly.
283     require Whatpm::_LinkTypeList;
284     our $LinkType;
285     for my $word (keys %word) {
286     my $def = $LinkType->{$word};
287     if (defined $def) {
288     if ($def->{status} eq 'accepted') {
289     if (defined $def->{effect}->[$a_or_area]) {
290     #
291     } else {
292     $self->{onerror}->(node => $attr,
293     type => 'link type:bad context:'.$word);
294     }
295     } elsif ($def->{status} eq 'proposal') {
296     $self->{onerror}->(node => $attr, level => 's',
297     type => 'link type:proposed:'.$word);
298 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
299     #
300     } else {
301     $self->{onerror}->(node => $attr,
302     type => 'link type:bad context:'.$word);
303     }
304 wakaba 1.1 } else { # rejected or synonym
305     $self->{onerror}->(node => $attr,
306     type => 'link type:non-conforming:'.$word);
307     }
308 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
309     if ($word eq 'alternate') {
310     #
311     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
312     $todo->{has_hyperlink_link_type} = 1;
313     }
314     }
315 wakaba 1.1 if ($def->{unique}) {
316     unless ($self->{has_link_type}->{$word}) {
317     $self->{has_link_type}->{$word} = 1;
318     } else {
319     $self->{onerror}->(node => $attr,
320     type => 'link type:duplicate:'.$word);
321     }
322     }
323     } else {
324     $self->{onerror}->(node => $attr, level => 'unsupported',
325     type => 'link type:'.$word);
326     }
327     }
328 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
329     if $word{alternate} and not $word{stylesheet};
330 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
331     ## says that using both X-Pingback: header field and HTML
332     ## <link rel=pingback> is deprecated and if both appears they
333     ## SHOULD contain exactly the same value.
334     ## ISSUE: Pingback 1.0 specification defines the exact representation
335     ## of its link element, which cannot be tested by the current arch.
336     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
337     ## include any string that matches to the pattern for the rel=pingback link,
338     ## which again inpossible to test.
339     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
340 wakaba 1.12
341     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
342 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
343     ## then they SHOULD be described in different paragraphs.".
344 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
345 wakaba 1.20
346     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
347 wakaba 1.1
348     ## URI (or IRI)
349     my $HTMLURIAttrChecker = sub {
350     my ($self, $attr) = @_;
351     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
352     my $value = $attr->value;
353     Whatpm::URIChecker->check_iri_reference ($value, sub {
354     my %opt = @_;
355     $self->{onerror}->(node => $attr, level => $opt{level},
356     type => 'URI::'.$opt{type}.
357     (defined $opt{position} ? ':'.$opt{position} : ''));
358     });
359 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
360 wakaba 1.1 }; # $HTMLURIAttrChecker
361    
362     ## A space separated list of one or more URIs (or IRIs)
363     my $HTMLSpaceURIsAttrChecker = sub {
364     my ($self, $attr) = @_;
365     my $i = 0;
366     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
367     Whatpm::URIChecker->check_iri_reference ($value, sub {
368     my %opt = @_;
369     $self->{onerror}->(node => $attr, level => $opt{level},
370 wakaba 1.2 type => 'URIs:'.':'.
371     $opt{type}.':'.$i.
372 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
373     });
374     $i++;
375     }
376     ## ISSUE: Relative references?
377     ## ISSUE: Leading or trailing white spaces are conformant?
378     ## ISSUE: A sequence of white space characters are conformant?
379     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
380     ## NOTE: Duplication seems not an error.
381 wakaba 1.4 $self->{has_uri_attr} = 1;
382 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
383    
384     my $HTMLDatetimeAttrChecker = sub {
385     my ($self, $attr) = @_;
386     my $value = $attr->value;
387     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
388     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
389     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
390     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
391     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
392     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
393     if $d < 1 or
394     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
395     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
396     if $M == 2 and $d == 29 and
397     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
398     } else {
399     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
400     }
401     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
402     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
403     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
404     if defined $s and $s > 59;
405     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
406     if $zh > 23;
407     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
408     if $zm > 59;
409     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
410     } else {
411     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
412     }
413     }; # $HTMLDatetimeAttrChecker
414    
415     my $HTMLIntegerAttrChecker = sub {
416     my ($self, $attr) = @_;
417     my $value = $attr->value;
418     unless ($value =~ /\A-?[0-9]+\z/) {
419     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
420     }
421     }; # $HTMLIntegerAttrChecker
422    
423     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
424     my $range_check = shift;
425     return sub {
426     my ($self, $attr) = @_;
427     my $value = $attr->value;
428     if ($value =~ /\A[0-9]+\z/) {
429     unless ($range_check->($value + 0)) {
430     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
431     }
432     } else {
433     $self->{onerror}->(node => $attr,
434     type => 'nninteger:syntax error');
435     }
436     };
437     }; # $GetHTMLNonNegativeIntegerAttrChecker
438    
439     my $GetHTMLFloatingPointNumberAttrChecker = sub {
440     my $range_check = shift;
441     return sub {
442     my ($self, $attr) = @_;
443     my $value = $attr->value;
444     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
445     unless ($range_check->($value + 0)) {
446     $self->{onerror}->(node => $attr, type => 'float:out of range');
447     }
448     } else {
449     $self->{onerror}->(node => $attr,
450     type => 'float:syntax error');
451     }
452     };
453     }; # $GetHTMLFloatingPointNumberAttrChecker
454    
455     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
456     ## ISSUE: RFC 2046 does not define syntax of media types.
457     ## ISSUE: The definition of "a valid MIME type" is unknown.
458     ## Syntactical correctness?
459     my $HTMLIMTAttrChecker = sub {
460     my ($self, $attr) = @_;
461     my $value = $attr->value;
462     ## ISSUE: RFC 2045 Content-Type header field allows insertion
463     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
464     ## ISSUE: RFC 2231 extension? Maybe no.
465     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
466     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
467     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
468     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
469     my @type = ($1, $2);
470     my $param = $3;
471     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
472     if (defined $2) {
473     push @type, $1 => $2;
474     } else {
475     my $n = $1;
476     my $v = $2;
477     $v =~ s/\\(.)/$1/gs;
478     push @type, $n => $v;
479     }
480     }
481     require Whatpm::IMTChecker;
482     Whatpm::IMTChecker->check_imt (sub {
483     my %opt = @_;
484     $self->{onerror}->(node => $attr, level => $opt{level},
485     type => 'IMT:'.$opt{type});
486     }, @type);
487     } else {
488     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
489     }
490     }; # $HTMLIMTAttrChecker
491    
492     my $HTMLLanguageTagAttrChecker = sub {
493 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
494    
495 wakaba 1.1 my ($self, $attr) = @_;
496 wakaba 1.6 my $value = $attr->value;
497     require Whatpm::LangTag;
498     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
499     my %opt = @_;
500     my $type = 'LangTag:'.$opt{type};
501     $type .= ':' . $opt{subtag} if defined $opt{subtag};
502     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
503     level => $opt{level});
504     });
505 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
506 wakaba 1.6
507     ## TODO: testdata
508 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
509    
510     ## "A valid media query [MQ]"
511     my $HTMLMQAttrChecker = sub {
512     my ($self, $attr) = @_;
513     $self->{onerror}->(node => $attr, level => 'unsupported',
514     type => 'media query');
515     ## ISSUE: What is "a valid media query"?
516     }; # $HTMLMQAttrChecker
517    
518     my $HTMLEventHandlerAttrChecker = sub {
519     my ($self, $attr) = @_;
520     $self->{onerror}->(node => $attr, level => 'unsupported',
521     type => 'event handler');
522     ## TODO: MUST contain valid ECMAScript code matching the
523     ## ECMAScript |FunctionBody| production. [ECMA262]
524     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
525     ## ISSUE: Automatic semicolon insertion does not apply?
526     ## ISSUE: Other script languages?
527     }; # $HTMLEventHandlerAttrChecker
528    
529     my $HTMLUsemapAttrChecker = sub {
530     my ($self, $attr) = @_;
531     ## MUST be a valid hashed ID reference to a |map| element
532     my $value = $attr->value;
533     if ($value =~ s/^#//) {
534     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
535     push @{$self->{usemap}}, [$value => $attr];
536     } else {
537     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
538     }
539     ## NOTE: Space characters in hashed ID references are conforming.
540     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
541     }; # $HTMLUsemapAttrChecker
542    
543     my $HTMLTargetAttrChecker = sub {
544     my ($self, $attr) = @_;
545     my $value = $attr->value;
546     if ($value =~ /^_/) {
547     $value = lc $value; ## ISSUE: ASCII case-insentitive?
548     unless ({
549     _self => 1, _parent => 1, _top => 1,
550     }->{$value}) {
551     $self->{onerror}->(node => $attr,
552     type => 'reserved browsing context name');
553     }
554     } else {
555 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
556 wakaba 1.1 }
557     }; # $HTMLTargetAttrChecker
558    
559 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
560     my ($self, $attr) = @_;
561    
562     ## ISSUE: Namespace resolution?
563    
564     my $value = $attr->value;
565    
566     require Whatpm::CSS::SelectorsParser;
567     my $p = Whatpm::CSS::SelectorsParser->new;
568     $p->{pseudo_class}->{$_} = 1 for qw/
569     active checked disabled empty enabled first-child first-of-type
570     focus hover indeterminate last-child last-of-type link only-child
571     only-of-type root target visited
572     lang nth-child nth-last-child nth-of-type nth-last-of-type not
573     -manakai-contains -manakai-current
574     /;
575    
576     $p->{pseudo_element}->{$_} = 1 for qw/
577     after before first-letter first-line
578     /;
579    
580     $p->{must_level} = $self->{must_level};
581     $p->{onerror} = sub {
582     my %opt = @_;
583     $opt{type} = 'selectors:'.$opt{type};
584     $self->{onerror}->(%opt, node => $attr);
585     };
586     $p->parse_string ($value);
587     }; # $HTMLSelectorsAttrChecker
588    
589 wakaba 1.1 my $HTMLAttrChecker = {
590 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
591 wakaba 1.1 id => sub {
592     ## NOTE: |map| has its own variant of |id=""| checker
593     my ($self, $attr) = @_;
594     my $value = $attr->value;
595     if (length $value > 0) {
596     if ($self->{id}->{$value}) {
597     $self->{onerror}->(node => $attr, type => 'duplicate ID');
598     push @{$self->{id}->{$value}}, $attr;
599     } else {
600     $self->{id}->{$value} = [$attr];
601     }
602     if ($value =~ /[\x09-\x0D\x20]/) {
603     $self->{onerror}->(node => $attr, type => 'space in ID');
604     }
605     } else {
606     ## NOTE: MUST contain at least one character
607     $self->{onerror}->(node => $attr, type => 'empty attribute value');
608     }
609     },
610     title => sub {}, ## NOTE: No conformance creteria
611     lang => sub {
612     my ($self, $attr) = @_;
613 wakaba 1.6 my $value = $attr->value;
614     if ($value eq '') {
615     #
616     } else {
617     require Whatpm::LangTag;
618     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
619     my %opt = @_;
620     my $type = 'LangTag:'.$opt{type};
621     $type .= ':' . $opt{subtag} if defined $opt{subtag};
622     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
623     level => $opt{level});
624     });
625     }
626 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
627     unless ($attr->owner_document->manakai_is_html) {
628     $self->{onerror}->(node => $attr, type => 'in XML:lang');
629     }
630 wakaba 1.6
631     ## TODO: test data
632 wakaba 1.1 },
633     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
634     class => sub {
635     my ($self, $attr) = @_;
636     my %word;
637     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
638     unless ($word{$word}) {
639     $word{$word} = 1;
640     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
641     } else {
642     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
643     }
644     }
645     },
646     contextmenu => sub {
647     my ($self, $attr) = @_;
648     my $value = $attr->value;
649     push @{$self->{contextmenu}}, [$value => $attr];
650     ## ISSUE: "The value must be the ID of a menu element in the DOM."
651     ## What is "in the DOM"? A menu Element node that is not part
652     ## of the Document tree is in the DOM? A menu Element node that
653     ## belong to another Document tree is in the DOM?
654     },
655 wakaba 1.48 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'), ## TODO: status: Working Draft
656 wakaba 1.56 ## TODO: repeat, repeat-start, repeat-min, repeat-max, repeat-template ## TODO: global
657 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
658 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
659     ## TODO: ref, template, registrationmark
660 wakaba 1.1 };
661    
662 wakaba 1.49 my %HTMLAttrStatus = (
663 wakaba 1.50 class => FEATURE_HTML5_DEFAULT,
664     contenteditable => FEATURE_HTML5_DEFAULT,
665     contextmenu => FEATURE_HTML5_WD,
666     dir => FEATURE_HTML5_DEFAULT,
667     draggable => FEATURE_HTML5_LC,
668     id => FEATURE_HTML5_DEFAULT,
669     irrelevant => FEATURE_HTML5_WD,
670     lang => FEATURE_HTML5_DEFAULT,
671     ref => FEATURE_HTML5_AT_RISK,
672     registrationmark => FEATURE_HTML5_AT_RISK,
673 wakaba 1.58 role => FEATURE_HTML5_ROLE,
674 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
675     template => FEATURE_HTML5_AT_RISK,
676     title => FEATURE_HTML5_DEFAULT,
677 wakaba 1.49 );
678    
679     my %HTMLM12NCommonAttrStatus = (
680 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
681     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
682     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
683     onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
684     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
685     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
686     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
687     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
688     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
689     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
690     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
691     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
692     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
693 wakaba 1.55 style => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
694     FEATURE_M12N10_REC,
695 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
696 wakaba 1.49 );
697    
698 wakaba 1.1 for (qw/
699     onabort onbeforeunload onblur onchange onclick oncontextmenu
700     ondblclick ondrag ondragend ondragenter ondragleave ondragover
701     ondragstart ondrop onerror onfocus onkeydown onkeypress
702     onkeyup onload onmessage onmousedown onmousemove onmouseout
703     onmouseover onmouseup onmousewheel onresize onscroll onselect
704     onsubmit onunload
705     /) {
706     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
707 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
708 wakaba 1.1 }
709    
710     my $GetHTMLAttrsChecker = sub {
711     my $element_specific_checker = shift;
712 wakaba 1.49 my $element_specific_status = shift;
713 wakaba 1.1 return sub {
714 wakaba 1.40 my ($self, $item, $element_state) = @_;
715     for my $attr (@{$item->{node}->attributes}) {
716 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
717     $attr_ns = '' unless defined $attr_ns;
718     my $attr_ln = $attr->manakai_local_name;
719     my $checker;
720     if ($attr_ns eq '') {
721     $checker = $element_specific_checker->{$attr_ln}
722 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
723 wakaba 1.1 }
724     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
725 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
726 wakaba 1.1 if ($checker) {
727 wakaba 1.40 $checker->($self, $attr, $item);
728 wakaba 1.49 } elsif ($attr_ns eq '') {
729 wakaba 1.54 #
730 wakaba 1.1 } else {
731     $self->{onerror}->(node => $attr, level => 'unsupported',
732     type => 'attribute');
733 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
734     }
735     if ($attr_ns eq '') {
736     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
737 wakaba 1.1 }
738 wakaba 1.49 ## TODO: global attribute
739 wakaba 1.1 }
740     };
741     }; # $GetHTMLAttrsChecker
742    
743 wakaba 1.40 my %HTMLChecker = (
744     %Whatpm::ContentChecker::AnyChecker,
745 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
746 wakaba 1.40 );
747    
748     my %HTMLEmptyChecker = (
749     %HTMLChecker,
750     check_child_element => sub {
751     my ($self, $item, $child_el, $child_nsuri, $child_ln,
752     $child_is_transparent, $element_state) = @_;
753     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
754     $self->{onerror}->(node => $child_el,
755     type => 'element not allowed:minus',
756     level => $self->{must_level});
757     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
758     #
759     } else {
760     $self->{onerror}->(node => $child_el,
761     type => 'element not allowed:empty',
762     level => $self->{must_level});
763     }
764     },
765     check_child_text => sub {
766     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
767     if ($has_significant) {
768     $self->{onerror}->(node => $child_node,
769     type => 'character not allowed:empty',
770     level => $self->{must_level});
771     }
772     },
773     );
774    
775     my %HTMLTextChecker = (
776     %HTMLChecker,
777     check_child_element => sub {
778     my ($self, $item, $child_el, $child_nsuri, $child_ln,
779     $child_is_transparent, $element_state) = @_;
780     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
781     $self->{onerror}->(node => $child_el,
782     type => 'element not allowed:minus',
783     level => $self->{must_level});
784     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
785     #
786     } else {
787     $self->{onerror}->(node => $child_el, type => 'element not allowed');
788     }
789     },
790     );
791    
792 wakaba 1.58 ## TODO: Rename as "FlowContent" (HTML5 revision 1261)
793 wakaba 1.40 my %HTMLProseContentChecker = (
794     %HTMLChecker,
795     check_child_element => sub {
796     my ($self, $item, $child_el, $child_nsuri, $child_ln,
797     $child_is_transparent, $element_state) = @_;
798     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
799     $self->{onerror}->(node => $child_el,
800     type => 'element not allowed:minus',
801     level => $self->{must_level});
802     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
803     #
804     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
805     if ($element_state->{has_non_style} or
806     not $child_el->has_attribute_ns (undef, 'scoped')) {
807     $self->{onerror}->(node => $child_el,
808     type => 'element not allowed:prose style',
809     level => $self->{must_level});
810     }
811     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
812 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
813 wakaba 1.40 } else {
814     $element_state->{has_non_style} = 1;
815     $self->{onerror}->(node => $child_el,
816     type => 'element not allowed:prose',
817     level => $self->{must_level})
818     }
819     },
820     check_child_text => sub {
821     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
822     if ($has_significant) {
823     $element_state->{has_non_style} = 1;
824     }
825     },
826     check_end => sub {
827     my ($self, $item, $element_state) = @_;
828     if ($element_state->{has_significant}) {
829 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
830 wakaba 1.40 } elsif ($item->{transparent}) {
831     #
832     } else {
833     $self->{onerror}->(node => $item->{node},
834     level => $self->{should_level},
835     type => 'no significant content');
836     }
837     },
838     );
839    
840     my %HTMLPhrasingContentChecker = (
841     %HTMLChecker,
842     check_child_element => sub {
843     my ($self, $item, $child_el, $child_nsuri, $child_ln,
844     $child_is_transparent, $element_state) = @_;
845     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
846     $self->{onerror}->(node => $child_el,
847     type => 'element not allowed:minus',
848     level => $self->{must_level});
849     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
850     #
851     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
852     #
853     } else {
854     $self->{onerror}->(node => $child_el,
855     type => 'element not allowed:phrasing',
856     level => $self->{must_level});
857     }
858     },
859     check_end => $HTMLProseContentChecker{check_end},
860     ## NOTE: The definition for |li| assumes that the only differences
861     ## between prose and phrasing content checkers are |check_child_element|
862     ## and |check_child_text|.
863     );
864    
865     my %HTMLTransparentChecker = %HTMLProseContentChecker;
866     ## ISSUE: Significant content rule should be applied to transparent element
867 wakaba 1.46 ## with parent?
868 wakaba 1.40
869 wakaba 1.1 our $Element;
870     our $ElementDefault;
871    
872     $Element->{$HTML_NS}->{''} = {
873 wakaba 1.40 %HTMLChecker,
874 wakaba 1.1 };
875    
876     $Element->{$HTML_NS}->{html} = {
877 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
878 wakaba 1.1 is_root => 1,
879 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
880 wakaba 1.16 manifest => $HTMLURIAttrChecker,
881 wakaba 1.1 xmlns => sub {
882     my ($self, $attr) = @_;
883     my $value = $attr->value;
884     unless ($value eq $HTML_NS) {
885     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
886     }
887     unless ($attr->owner_document->manakai_is_html) {
888     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
889     ## TODO: Test
890     }
891     },
892 wakaba 1.49 }, {
893     %HTMLAttrStatus,
894 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
895     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
896     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
897     manifest => FEATURE_HTML5_DEFAULT,
898 wakaba 1.49 version => FEATURE_M12N10_REC,
899 wakaba 1.50 xmlns => FEATURE_HTML5_DEFAULT,
900 wakaba 1.1 }),
901 wakaba 1.40 check_start => sub {
902     my ($self, $item, $element_state) = @_;
903     $element_state->{phase} = 'before head';
904     },
905     check_child_element => sub {
906     my ($self, $item, $child_el, $child_nsuri, $child_ln,
907     $child_is_transparent, $element_state) = @_;
908     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
909     $self->{onerror}->(node => $child_el,
910     type => 'element not allowed:minus',
911     level => $self->{must_level});
912     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
913     #
914     } elsif ($element_state->{phase} eq 'before head') {
915     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
916     $element_state->{phase} = 'after head';
917     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
918     $self->{onerror}->(node => $child_el,
919     type => 'ps element missing:head');
920     $element_state->{phase} = 'after body';
921     } else {
922     $self->{onerror}->(node => $child_el,
923     type => 'element not allowed');
924     }
925     } elsif ($element_state->{phase} eq 'after head') {
926     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
927     $element_state->{phase} = 'after body';
928     } else {
929     $self->{onerror}->(node => $child_el,
930     type => 'element not allowed');
931     }
932     } elsif ($element_state->{phase} eq 'after body') {
933     $self->{onerror}->(node => $child_el,
934     type => 'element not allowed');
935     } else {
936     die "check_child_element: Bad |html| phase: $element_state->{phase}";
937     }
938     },
939     check_child_text => sub {
940     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
941     if ($has_significant) {
942     $self->{onerror}->(node => $child_node,
943     type => 'character not allowed');
944     }
945     },
946     check_end => sub {
947     my ($self, $item, $element_state) = @_;
948     if ($element_state->{phase} eq 'after body') {
949     #
950     } elsif ($element_state->{phase} eq 'before head') {
951     $self->{onerror}->(node => $item->{node},
952     type => 'child element missing:head');
953     $self->{onerror}->(node => $item->{node},
954     type => 'child element missing:body');
955     } elsif ($element_state->{phase} eq 'after head') {
956     $self->{onerror}->(node => $item->{node},
957     type => 'child element missing:body');
958     } else {
959     die "check_end: Bad |html| phase: $element_state->{phase}";
960     }
961 wakaba 1.1
962 wakaba 1.40 $HTMLChecker{check_end}->(@_);
963     },
964     };
965 wakaba 1.25
966 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
967 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
968     check_attrs => $GetHTMLAttrsChecker->({}, {
969     %HTMLAttrStatus,
970 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
971     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
972     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
973 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
974     }),
975 wakaba 1.40 check_child_element => sub {
976     my ($self, $item, $child_el, $child_nsuri, $child_ln,
977     $child_is_transparent, $element_state) = @_;
978     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
979     $self->{onerror}->(node => $child_el,
980     type => 'element not allowed:minus',
981     level => $self->{must_level});
982     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
983     #
984     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
985     unless ($element_state->{has_title}) {
986     $element_state->{has_title} = 1;
987     } else {
988     $self->{onerror}->(node => $child_el,
989     type => 'element not allowed:head title',
990     level => $self->{must_level});
991     }
992     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
993     if ($child_el->has_attribute_ns (undef, 'scoped')) {
994     $self->{onerror}->(node => $child_el,
995     type => 'element not allowed:head style',
996     level => $self->{must_level});
997 wakaba 1.1 }
998 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
999     #
1000    
1001     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1002     ## a |meta| element with none of |charset|, |name|,
1003     ## or |http-equiv| attribute is not allowed. It is non-conforming
1004     ## anyway.
1005 wakaba 1.56
1006     ## TODO: |form| MUST be empty and in XML [WF2].
1007 wakaba 1.40 } else {
1008     $self->{onerror}->(node => $child_el,
1009     type => 'element not allowed:metadata',
1010     level => $self->{must_level});
1011     }
1012     $element_state->{in_head_original} = $self->{flag}->{in_head};
1013     $self->{flag}->{in_head} = 1;
1014     },
1015     check_child_text => sub {
1016     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1017     if ($has_significant) {
1018     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1019 wakaba 1.1 }
1020 wakaba 1.40 },
1021     check_end => sub {
1022     my ($self, $item, $element_state) = @_;
1023     unless ($element_state->{has_title}) {
1024     $self->{onerror}->(node => $item->{node},
1025     type => 'child element missing:title');
1026 wakaba 1.1 }
1027 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1028 wakaba 1.1
1029 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1030 wakaba 1.1 },
1031     };
1032    
1033 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1034     %HTMLTextChecker,
1035 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1036     check_attrs => $GetHTMLAttrsChecker->({}, {
1037     %HTMLAttrStatus,
1038 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1039     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1040     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1041 wakaba 1.49 }),
1042 wakaba 1.40 };
1043 wakaba 1.1
1044 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1045 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1046 wakaba 1.40 %HTMLEmptyChecker,
1047     check_attrs => sub {
1048     my ($self, $item, $element_state) = @_;
1049 wakaba 1.1
1050 wakaba 1.40 if ($self->{has_base}) {
1051     $self->{onerror}->(node => $item->{node},
1052     type => 'element not allowed:base');
1053     } else {
1054     $self->{has_base} = 1;
1055 wakaba 1.29 }
1056    
1057 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1058     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1059 wakaba 1.14
1060     if ($self->{has_uri_attr} and $has_href) {
1061 wakaba 1.4 ## ISSUE: Are these examples conforming?
1062     ## <head profile="a b c"><base href> (except for |profile|'s
1063     ## non-conformance)
1064     ## <title xml:base="relative"/><base href/> (maybe it should be)
1065     ## <unknown xmlns="relative"/><base href/> (assuming that
1066     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1067     ## <style>@import 'relative';</style><base href>
1068     ## <script>location.href = 'relative';</script><base href>
1069 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1070     ## an exception.
1071 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1072 wakaba 1.4 type => 'basehref after URI attribute');
1073     }
1074 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1075 wakaba 1.4 ## ISSUE: Are these examples conforming?
1076     ## <head><title xlink:href=""/><base target="name"/></head>
1077     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1078     ## (assuming that |xbl:xbl| is allowed before |base|)
1079     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1080     ## <link href=""/><base target="name"/>
1081     ## <link rel=unknown href=""><base target=name>
1082 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1083 wakaba 1.4 type => 'basetarget after hyperlink');
1084     }
1085    
1086 wakaba 1.14 if (not $has_href and not $has_target) {
1087 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1088 wakaba 1.14 type => 'attribute missing:href|target');
1089     }
1090    
1091 wakaba 1.4 return $GetHTMLAttrsChecker->({
1092     href => $HTMLURIAttrChecker,
1093     target => $HTMLTargetAttrChecker,
1094 wakaba 1.49 }, {
1095     %HTMLAttrStatus,
1096 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1097     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1098     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1099 wakaba 1.40 })->($self, $item, $element_state);
1100 wakaba 1.4 },
1101 wakaba 1.1 };
1102    
1103     $Element->{$HTML_NS}->{link} = {
1104 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1105 wakaba 1.40 %HTMLEmptyChecker,
1106     check_attrs => sub {
1107     my ($self, $item, $element_state) = @_;
1108 wakaba 1.1 $GetHTMLAttrsChecker->({
1109     href => $HTMLURIAttrChecker,
1110 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
1111 wakaba 1.1 media => $HTMLMQAttrChecker,
1112     hreflang => $HTMLLanguageTagAttrChecker,
1113     type => $HTMLIMTAttrChecker,
1114     ## NOTE: Though |title| has special semantics,
1115     ## syntactically same as the |title| as global attribute.
1116 wakaba 1.49 }, {
1117     %HTMLAttrStatus,
1118     %HTMLM12NCommonAttrStatus,
1119     charset => FEATURE_M12N10_REC,
1120 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1121     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1122     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1123     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1124     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1125 wakaba 1.49 rev => FEATURE_M12N10_REC,
1126     target => FEATURE_M12N10_REC,
1127 wakaba 1.50 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1128 wakaba 1.40 })->($self, $item, $element_state);
1129     if ($item->{node}->has_attribute_ns (undef, 'href')) {
1130     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
1131 wakaba 1.4 } else {
1132 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1133 wakaba 1.1 type => 'attribute missing:href');
1134     }
1135 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
1136     $self->{onerror}->(node => $item->{node},
1137 wakaba 1.1 type => 'attribute missing:rel');
1138     }
1139     },
1140     };
1141    
1142     $Element->{$HTML_NS}->{meta} = {
1143 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1144 wakaba 1.40 %HTMLEmptyChecker,
1145     check_attrs => sub {
1146     my ($self, $item, $element_state) = @_;
1147 wakaba 1.1 my $name_attr;
1148     my $http_equiv_attr;
1149     my $charset_attr;
1150     my $content_attr;
1151 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1152 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1153     $attr_ns = '' unless defined $attr_ns;
1154     my $attr_ln = $attr->manakai_local_name;
1155     my $checker;
1156     if ($attr_ns eq '') {
1157     if ($attr_ln eq 'content') {
1158     $content_attr = $attr;
1159     $checker = 1;
1160     } elsif ($attr_ln eq 'name') {
1161     $name_attr = $attr;
1162     $checker = 1;
1163     } elsif ($attr_ln eq 'http-equiv') {
1164     $http_equiv_attr = $attr;
1165     $checker = 1;
1166     } elsif ($attr_ln eq 'charset') {
1167     $charset_attr = $attr;
1168     $checker = 1;
1169     } else {
1170     $checker = $HTMLAttrChecker->{$attr_ln}
1171     || $AttrChecker->{$attr_ns}->{$attr_ln}
1172     || $AttrChecker->{$attr_ns}->{''};
1173     }
1174     } else {
1175     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1176     || $AttrChecker->{$attr_ns}->{''};
1177     }
1178     if ($checker) {
1179     $checker->($self, $attr) if ref $checker;
1180 wakaba 1.49 } elsif ($attr_ns eq '') {
1181 wakaba 1.54 #
1182 wakaba 1.1 } else {
1183     $self->{onerror}->(node => $attr, level => 'unsupported',
1184     type => 'attribute');
1185 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1186     }
1187    
1188     if ($attr_ns eq '') {
1189     $self->_attr_status_info ($attr, {
1190     %HTMLAttrStatus,
1191 wakaba 1.50 charset => FEATURE_HTML5_DEFAULT,
1192     content => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1193     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1194     'http-equiv' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1195     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1196     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1197     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1198 wakaba 1.49 scheme => FEATURE_M12N10_REC,
1199     }->{$attr_ln});
1200 wakaba 1.1 }
1201     }
1202    
1203     if (defined $name_attr) {
1204     if (defined $http_equiv_attr) {
1205     $self->{onerror}->(node => $http_equiv_attr,
1206     type => 'attribute not allowed');
1207     } elsif (defined $charset_attr) {
1208     $self->{onerror}->(node => $charset_attr,
1209     type => 'attribute not allowed');
1210     }
1211     my $metadata_name = $name_attr->value;
1212     my $metadata_value;
1213     if (defined $content_attr) {
1214     $metadata_value = $content_attr->value;
1215     } else {
1216 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1217 wakaba 1.1 type => 'attribute missing:content');
1218     $metadata_value = '';
1219     }
1220     } elsif (defined $http_equiv_attr) {
1221     if (defined $charset_attr) {
1222     $self->{onerror}->(node => $charset_attr,
1223     type => 'attribute not allowed');
1224     }
1225     unless (defined $content_attr) {
1226 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1227 wakaba 1.1 type => 'attribute missing:content');
1228     }
1229     } elsif (defined $charset_attr) {
1230     if (defined $content_attr) {
1231     $self->{onerror}->(node => $content_attr,
1232     type => 'attribute not allowed');
1233     }
1234     } else {
1235     if (defined $content_attr) {
1236     $self->{onerror}->(node => $content_attr,
1237     type => 'attribute not allowed');
1238 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1239 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1240     } else {
1241 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1242 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1243     }
1244     }
1245    
1246 wakaba 1.32 my $check_charset_decl = sub () {
1247 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1248 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1249     for my $el (@{$parent->child_nodes}) {
1250     next unless $el->node_type == 1; # ELEMENT_NODE
1251 wakaba 1.40 unless ($el eq $item->{node}) {
1252 wakaba 1.29 ## NOTE: Not the first child element.
1253 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1254 wakaba 1.32 type => 'element not allowed:meta charset',
1255     level => $self->{must_level});
1256 wakaba 1.29 }
1257     last;
1258     ## NOTE: Entity references are not supported.
1259     }
1260     } else {
1261 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1262 wakaba 1.32 type => 'element not allowed:meta charset',
1263     level => $self->{must_level});
1264 wakaba 1.29 }
1265    
1266 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1267     $self->{onerror}->(node => $item->{node},
1268 wakaba 1.32 type => 'in XML:charset',
1269     level => $self->{must_level});
1270 wakaba 1.1 }
1271 wakaba 1.32 }; # $check_charset_decl
1272 wakaba 1.21
1273 wakaba 1.32 my $check_charset = sub ($$) {
1274     my ($attr, $charset_value) = @_;
1275 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1276     ## is not explicitly spelled in the HTML5 spec, the Character Set
1277     ## registry of IANA, which is referenced from HTML5 spec, says that
1278     ## charset name is case-insensitive.
1279     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1280    
1281     require Message::Charset::Info;
1282     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1283 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1284 wakaba 1.21 if (defined $ic) {
1285     ## TODO: Test for this case
1286     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1287     if ($charset ne $ic_charset) {
1288 wakaba 1.32 $self->{onerror}->(node => $attr,
1289 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1290 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1291     level => $self->{must_level});
1292 wakaba 1.21 }
1293     } else {
1294     ## NOTE: MUST, but not checkable, since the document is not originally
1295     ## in serialized form (or the parser does not preserve the input
1296     ## encoding information).
1297 wakaba 1.32 $self->{onerror}->(node => $attr,
1298     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1299 wakaba 1.21 level => 'unsupported');
1300     }
1301    
1302     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1303     ## Syntactically valid and registered? What about x-charset names?
1304     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1305     ($charset_value)) {
1306 wakaba 1.32 $self->{onerror}->(node => $attr,
1307     type => 'charset:syntax error:'.$charset_value, ## TODO
1308     level => $self->{must_level});
1309 wakaba 1.21 }
1310    
1311     if ($charset) {
1312     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1313     ## with no "preferred MIME name" label)?
1314     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1315     if (($charset_status &
1316     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1317     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1318 wakaba 1.32 $self->{onerror}->(node => $attr,
1319 wakaba 1.21 type => 'charset:not preferred:'.
1320 wakaba 1.32 $charset_value, ## TODO
1321     level => $self->{must_level});
1322 wakaba 1.21 }
1323     if (($charset_status &
1324     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1325     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1326     if ($charset_value =~ /^x-/) {
1327 wakaba 1.32 $self->{onerror}->(node => $attr,
1328     type => 'charset:private:'.$charset_value, ## TODO
1329 wakaba 1.21 level => $self->{good_level});
1330     } else {
1331 wakaba 1.32 $self->{onerror}->(node => $attr,
1332 wakaba 1.21 type => 'charset:not registered:'.
1333 wakaba 1.32 $charset_value, ## TODO
1334 wakaba 1.21 level => $self->{good_level});
1335     }
1336     }
1337     } elsif ($charset_value =~ /^x-/) {
1338 wakaba 1.32 $self->{onerror}->(node => $attr,
1339     type => 'charset:private:'.$charset_value, ## TODO
1340 wakaba 1.21 level => $self->{good_level});
1341     } else {
1342 wakaba 1.32 $self->{onerror}->(node => $attr,
1343     type => 'charset:not registered:'.$charset_value, ## TODO
1344 wakaba 1.21 level => $self->{good_level});
1345     }
1346    
1347 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1348     $self->{onerror}->(node => $attr,
1349 wakaba 1.22 type => 'character reference in charset',
1350     level => $self->{must_level});
1351     }
1352 wakaba 1.32 }; # $check_charset
1353    
1354     ## TODO: metadata conformance
1355    
1356     ## TODO: pragma conformance
1357     if (defined $http_equiv_attr) { ## An enumerated attribute
1358     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1359     if ({
1360     'refresh' => 1,
1361     'default-style' => 1,
1362     }->{$keyword}) {
1363     #
1364 wakaba 1.33
1365     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1366 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1367 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
1368 wakaba 1.33
1369 wakaba 1.32 $check_charset_decl->();
1370     if ($content_attr) {
1371     my $content = $content_attr->value;
1372 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
1373     [\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
1374     =(.+)\z!sx) {
1375 wakaba 1.32 $check_charset->($content_attr, $1);
1376     } else {
1377     $self->{onerror}->(node => $content_attr,
1378     type => 'meta content-type syntax error',
1379     level => $self->{must_level});
1380     }
1381     }
1382     } else {
1383     $self->{onerror}->(node => $http_equiv_attr,
1384     type => 'enumerated:invalid');
1385     }
1386     }
1387    
1388     if (defined $charset_attr) {
1389     $check_charset_decl->();
1390     $check_charset->($charset_attr, $charset_attr->value);
1391 wakaba 1.1 }
1392     },
1393     };
1394    
1395     $Element->{$HTML_NS}->{style} = {
1396 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1397 wakaba 1.40 %HTMLChecker,
1398     check_attrs => $GetHTMLAttrsChecker->({
1399 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1400     media => $HTMLMQAttrChecker,
1401     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1402     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1403     ## not different
1404 wakaba 1.49 }, {
1405     %HTMLAttrStatus,
1406 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1407     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1408     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1409     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1410     scoped => FEATURE_HTML5_DEFAULT,
1411     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1412     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1413 wakaba 1.1 }),
1414 wakaba 1.40 check_start => sub {
1415     my ($self, $item, $element_state) = @_;
1416    
1417 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1418 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1419 wakaba 1.27 if (not defined $type or
1420     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1421 wakaba 1.40 $element_state->{allow_element} = 0;
1422     $element_state->{style_type} = 'text/css';
1423     } else {
1424     $element_state->{allow_element} = 1; # unknown
1425     $element_state->{style_type} = $type; ## TODO: $type normalization
1426     }
1427     },
1428     check_child_element => sub {
1429     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1430     $child_is_transparent, $element_state) = @_;
1431     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1432     $self->{onerror}->(node => $child_el,
1433     type => 'element not allowed:minus',
1434     level => $self->{must_level});
1435     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1436     #
1437     } elsif ($element_state->{allow_element}) {
1438     #
1439     } else {
1440     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1441     }
1442     },
1443     check_child_text => sub {
1444     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1445     $element_state->{text} .= $child_node->text_content;
1446     },
1447     check_end => sub {
1448     my ($self, $item, $element_state) = @_;
1449     if ($element_state->{style_type} eq 'text/css') {
1450     $self->{onsubdoc}->({s => $element_state->{text},
1451     container_node => $item->{node},
1452 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1453 wakaba 1.27 } else {
1454 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1455     type => 'style:'.$element_state->{style_type});
1456 wakaba 1.27 }
1457 wakaba 1.40
1458     $HTMLChecker{check_end}->(@_);
1459 wakaba 1.1 },
1460     };
1461 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1462 wakaba 1.1
1463     $Element->{$HTML_NS}->{body} = {
1464 wakaba 1.40 %HTMLProseContentChecker,
1465 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1466     check_attrs => $GetHTMLAttrsChecker->({}, {
1467     %HTMLAttrStatus,
1468     %HTMLM12NCommonAttrStatus,
1469     alink => FEATURE_M12N10_REC_DEPRECATED,
1470     background => FEATURE_M12N10_REC_DEPRECATED,
1471     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
1472 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1473 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
1474 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1475     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1476 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
1477     vlink => FEATURE_M12N10_REC_DEPRECATED,
1478     }),
1479 wakaba 1.1 };
1480    
1481     $Element->{$HTML_NS}->{section} = {
1482 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1483 wakaba 1.40 %HTMLProseContentChecker,
1484 wakaba 1.1 };
1485    
1486     $Element->{$HTML_NS}->{nav} = {
1487 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1488 wakaba 1.40 %HTMLProseContentChecker,
1489 wakaba 1.1 };
1490    
1491     $Element->{$HTML_NS}->{article} = {
1492 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1493 wakaba 1.40 %HTMLProseContentChecker,
1494 wakaba 1.1 };
1495    
1496     $Element->{$HTML_NS}->{blockquote} = {
1497 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1498 wakaba 1.40 %HTMLProseContentChecker,
1499     check_attrs => $GetHTMLAttrsChecker->({
1500 wakaba 1.1 cite => $HTMLURIAttrChecker,
1501 wakaba 1.49 }, {
1502     %HTMLAttrStatus,
1503     %HTMLM12NCommonAttrStatus,
1504 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1505     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1506 wakaba 1.1 }),
1507     };
1508    
1509     $Element->{$HTML_NS}->{aside} = {
1510 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1511 wakaba 1.40 %HTMLProseContentChecker,
1512 wakaba 1.1 };
1513    
1514     $Element->{$HTML_NS}->{h1} = {
1515 wakaba 1.40 %HTMLPhrasingContentChecker,
1516 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1517     check_attrs => $GetHTMLAttrsChecker->({}, {
1518     %HTMLAttrStatus,
1519     %HTMLM12NCommonAttrStatus,
1520     align => FEATURE_M12N10_REC_DEPRECATED,
1521 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1522 wakaba 1.49 }),
1523 wakaba 1.40 check_start => sub {
1524     my ($self, $item, $element_state) = @_;
1525     $self->{flag}->{has_hn} = 1;
1526 wakaba 1.1 },
1527     };
1528    
1529 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1530 wakaba 1.1
1531 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1532 wakaba 1.1
1533 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1534 wakaba 1.1
1535 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1536 wakaba 1.1
1537 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1538 wakaba 1.1
1539 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1540    
1541 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1542 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1543 wakaba 1.40 %HTMLProseContentChecker,
1544     check_start => sub {
1545     my ($self, $item, $element_state) = @_;
1546     $self->_add_minus_elements ($element_state,
1547     {$HTML_NS => {qw/header 1 footer 1/}},
1548 wakaba 1.58 $HTMLSectioningContent);
1549 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1550     $self->{flag}->{has_hn} = 0;
1551     },
1552     check_end => sub {
1553     my ($self, $item, $element_state) = @_;
1554     $self->_remove_minus_elements ($element_state);
1555     unless ($self->{flag}->{has_hn}) {
1556     $self->{onerror}->(node => $item->{node},
1557     type => 'element missing:hn');
1558     }
1559     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1560 wakaba 1.1
1561 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1562 wakaba 1.1 },
1563 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1564 wakaba 1.1 };
1565    
1566     $Element->{$HTML_NS}->{footer} = {
1567 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1568 wakaba 1.40 %HTMLProseContentChecker,
1569     check_start => sub {
1570     my ($self, $item, $element_state) = @_;
1571     $self->_add_minus_elements ($element_state,
1572     {$HTML_NS => {footer => 1}},
1573 wakaba 1.58 $HTMLSectioningContent,
1574 wakaba 1.57 $HTMLHeadingContent);
1575 wakaba 1.40 },
1576     check_end => sub {
1577     my ($self, $item, $element_state) = @_;
1578     $self->_remove_minus_elements ($element_state);
1579 wakaba 1.1
1580 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1581 wakaba 1.1 },
1582     };
1583    
1584     $Element->{$HTML_NS}->{address} = {
1585 wakaba 1.40 %HTMLProseContentChecker,
1586 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1587     check_attrs => $GetHTMLAttrsChecker->({}, {
1588     %HTMLAttrStatus,
1589     %HTMLM12NCommonAttrStatus,
1590 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1591 wakaba 1.49 }),
1592 wakaba 1.40 check_start => sub {
1593     my ($self, $item, $element_state) = @_;
1594     $self->_add_minus_elements ($element_state,
1595     {$HTML_NS => {footer => 1, address => 1}},
1596     $HTMLSectioningContent, $HTMLHeadingContent);
1597     },
1598     check_end => sub {
1599     my ($self, $item, $element_state) = @_;
1600     $self->_remove_minus_elements ($element_state);
1601 wakaba 1.29
1602 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1603 wakaba 1.29 },
1604 wakaba 1.1 };
1605    
1606     $Element->{$HTML_NS}->{p} = {
1607 wakaba 1.40 %HTMLPhrasingContentChecker,
1608 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1609     check_attrs => $GetHTMLAttrsChecker->({}, {
1610     %HTMLAttrStatus,
1611     %HTMLM12NCommonAttrStatus,
1612     align => FEATURE_M12N10_REC_DEPRECATED,
1613 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1614 wakaba 1.49 }),
1615 wakaba 1.1 };
1616    
1617     $Element->{$HTML_NS}->{hr} = {
1618 wakaba 1.40 %HTMLEmptyChecker,
1619 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1620     check_attrs => $GetHTMLAttrsChecker->({}, {
1621     %HTMLAttrStatus,
1622     %HTMLM12NCommonAttrStatus,
1623     align => FEATURE_M12N10_REC_DEPRECATED,
1624 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1625 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
1626     size => FEATURE_M12N10_REC_DEPRECATED,
1627     width => FEATURE_M12N10_REC_DEPRECATED,
1628     }),
1629 wakaba 1.1 };
1630    
1631     $Element->{$HTML_NS}->{br} = {
1632 wakaba 1.40 %HTMLEmptyChecker,
1633 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1634     check_attrs => $GetHTMLAttrsChecker->({}, {
1635     %HTMLAttrStatus,
1636 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1637 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
1638 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1639 wakaba 1.49 style => FEATURE_XHTML10_REC,
1640 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1641 wakaba 1.49 }),
1642 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1643     ## (This requirement is semantic so that we cannot check.)
1644 wakaba 1.1 };
1645    
1646     $Element->{$HTML_NS}->{dialog} = {
1647 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1648 wakaba 1.40 %HTMLChecker,
1649     check_start => sub {
1650     my ($self, $item, $element_state) = @_;
1651     $element_state->{phase} = 'before dt';
1652     },
1653     check_child_element => sub {
1654     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1655     $child_is_transparent, $element_state) = @_;
1656     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1657     $self->{onerror}->(node => $child_el,
1658     type => 'element not allowed:minus',
1659     level => $self->{must_level});
1660     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1661     #
1662     } elsif ($element_state->{phase} eq 'before dt') {
1663     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1664     $element_state->{phase} = 'before dd';
1665     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1666     $self->{onerror}
1667     ->(node => $child_el, type => 'ps element missing:dt');
1668     $element_state->{phase} = 'before dt';
1669     } else {
1670     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1671     }
1672     } elsif ($element_state->{phase} eq 'before dd') {
1673     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1674     $element_state->{phase} = 'before dt';
1675     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1676     $self->{onerror}
1677     ->(node => $child_el, type => 'ps element missing:dd');
1678     $element_state->{phase} = 'before dd';
1679     } else {
1680     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1681 wakaba 1.1 }
1682 wakaba 1.40 } else {
1683     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1684     }
1685     },
1686     check_child_text => sub {
1687     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1688     if ($has_significant) {
1689     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1690 wakaba 1.1 }
1691 wakaba 1.40 },
1692     check_end => sub {
1693     my ($self, $item, $element_state) = @_;
1694     if ($element_state->{phase} eq 'before dd') {
1695     $self->{onerror}->(node => $item->{node},
1696     type => 'child element missing:dd');
1697 wakaba 1.1 }
1698 wakaba 1.40
1699     $HTMLChecker{check_end}->(@_);
1700 wakaba 1.1 },
1701     };
1702    
1703     $Element->{$HTML_NS}->{pre} = {
1704 wakaba 1.40 %HTMLPhrasingContentChecker,
1705 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1706     check_attrs => $GetHTMLAttrsChecker->({}, {
1707     %HTMLAttrStatus,
1708     %HTMLM12NCommonAttrStatus,
1709 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1710 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
1711     }),
1712 wakaba 1.1 };
1713    
1714     $Element->{$HTML_NS}->{ol} = {
1715 wakaba 1.40 %HTMLChecker,
1716 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1717 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1718 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1719 wakaba 1.53 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
1720 wakaba 1.49 }, {
1721     %HTMLAttrStatus,
1722     %HTMLM12NCommonAttrStatus,
1723     compact => FEATURE_M12N10_REC_DEPRECATED,
1724 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1725 wakaba 1.53 reversed => FEATURE_HTML5_DEFAULT,
1726 wakaba 1.54 #start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
1727     start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1728 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1729 wakaba 1.1 }),
1730 wakaba 1.40 check_child_element => sub {
1731     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1732     $child_is_transparent, $element_state) = @_;
1733     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1734     $self->{onerror}->(node => $child_el,
1735     type => 'element not allowed:minus',
1736     level => $self->{must_level});
1737     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1738     #
1739     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1740     #
1741     } else {
1742     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1743 wakaba 1.1 }
1744 wakaba 1.40 },
1745     check_child_text => sub {
1746     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1747     if ($has_significant) {
1748     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1749 wakaba 1.1 }
1750     },
1751     };
1752    
1753     $Element->{$HTML_NS}->{ul} = {
1754 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1755 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1756     check_attrs => $GetHTMLAttrsChecker->({}, {
1757     %HTMLAttrStatus,
1758     %HTMLM12NCommonAttrStatus,
1759     compact => FEATURE_M12N10_REC_DEPRECATED,
1760 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1761 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1762     }),
1763 wakaba 1.1 };
1764    
1765     $Element->{$HTML_NS}->{li} = {
1766 wakaba 1.40 %HTMLProseContentChecker,
1767 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1768 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1769 wakaba 1.49 value => sub {
1770 wakaba 1.1 my ($self, $attr) = @_;
1771     my $parent = $attr->owner_element->manakai_parent_element;
1772     if (defined $parent) {
1773     my $parent_ns = $parent->namespace_uri;
1774     $parent_ns = '' unless defined $parent_ns;
1775     my $parent_ln = $parent->manakai_local_name;
1776     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1777     $self->{onerror}->(node => $attr, level => 'unsupported',
1778     type => 'attribute');
1779     }
1780     }
1781     $HTMLIntegerAttrChecker->($self, $attr);
1782 wakaba 1.49 }, ## TODO: test
1783     }, {
1784     %HTMLAttrStatus,
1785     %HTMLM12NCommonAttrStatus,
1786 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1787 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1788 wakaba 1.55 #value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
1789     # FEATURE_M12N10_REC_DEPRECATED,
1790     value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
1791     FEATURE_M12N10_REC,
1792 wakaba 1.1 }),
1793 wakaba 1.40 check_child_element => sub {
1794     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1795     $child_is_transparent, $element_state) = @_;
1796     if ($self->{flag}->{in_menu}) {
1797     $HTMLPhrasingContentChecker{check_child_element}->(@_);
1798     } else {
1799     $HTMLProseContentChecker{check_child_element}->(@_);
1800     }
1801     },
1802     check_child_text => sub {
1803     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1804     if ($self->{flag}->{in_menu}) {
1805     $HTMLPhrasingContentChecker{check_child_text}->(@_);
1806 wakaba 1.1 } else {
1807 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
1808 wakaba 1.1 }
1809     },
1810     };
1811    
1812     $Element->{$HTML_NS}->{dl} = {
1813 wakaba 1.40 %HTMLChecker,
1814 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1815     check_attrs => $GetHTMLAttrsChecker->({}, {
1816     %HTMLAttrStatus,
1817     %HTMLM12NCommonAttrStatus,
1818     compact => FEATURE_M12N10_REC_DEPRECATED,
1819 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1820 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1821     }),
1822 wakaba 1.40 check_start => sub {
1823     my ($self, $item, $element_state) = @_;
1824     $element_state->{phase} = 'before dt';
1825     },
1826     check_child_element => sub {
1827     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1828     $child_is_transparent, $element_state) = @_;
1829     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1830     $self->{onerror}->(node => $child_el,
1831     type => 'element not allowed:minus',
1832     level => $self->{must_level});
1833     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1834     #
1835     } elsif ($element_state->{phase} eq 'in dds') {
1836     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1837     #$element_state->{phase} = 'in dds';
1838     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1839     $element_state->{phase} = 'in dts';
1840     } else {
1841     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1842     }
1843     } elsif ($element_state->{phase} eq 'in dts') {
1844     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1845     #$element_state->{phase} = 'in dts';
1846     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1847     $element_state->{phase} = 'in dds';
1848     } else {
1849     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1850     }
1851     } elsif ($element_state->{phase} eq 'before dt') {
1852     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1853     $element_state->{phase} = 'in dts';
1854     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1855     $self->{onerror}
1856     ->(node => $child_el, type => 'ps element missing:dt');
1857     $element_state->{phase} = 'in dds';
1858     } else {
1859     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1860 wakaba 1.1 }
1861 wakaba 1.40 } else {
1862     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
1863 wakaba 1.1 }
1864 wakaba 1.40 },
1865     check_child_text => sub {
1866     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1867     if ($has_significant) {
1868     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1869     }
1870     },
1871     check_end => sub {
1872     my ($self, $item, $element_state) = @_;
1873     if ($element_state->{phase} eq 'in dts') {
1874     $self->{onerror}->(node => $item->{node},
1875     type => 'child element missing:dd');
1876 wakaba 1.1 }
1877    
1878 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1879 wakaba 1.1 },
1880     };
1881    
1882     $Element->{$HTML_NS}->{dt} = {
1883 wakaba 1.40 %HTMLPhrasingContentChecker,
1884 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1885     check_attrs => $GetHTMLAttrsChecker->({}, {
1886     %HTMLAttrStatus,
1887     %HTMLM12NCommonAttrStatus,
1888 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1889 wakaba 1.49 }),
1890 wakaba 1.1 };
1891    
1892     $Element->{$HTML_NS}->{dd} = {
1893 wakaba 1.40 %HTMLProseContentChecker,
1894 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1895     check_attrs => $GetHTMLAttrsChecker->({}, {
1896     %HTMLAttrStatus,
1897     %HTMLM12NCommonAttrStatus,
1898 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1899 wakaba 1.49 }),
1900 wakaba 1.1 };
1901    
1902     $Element->{$HTML_NS}->{a} = {
1903 wakaba 1.40 %HTMLPhrasingContentChecker,
1904 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1905 wakaba 1.40 check_attrs => sub {
1906     my ($self, $item, $element_state) = @_;
1907 wakaba 1.1 my %attr;
1908 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1909 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1910     $attr_ns = '' unless defined $attr_ns;
1911     my $attr_ln = $attr->manakai_local_name;
1912     my $checker;
1913     if ($attr_ns eq '') {
1914     $checker = {
1915     target => $HTMLTargetAttrChecker,
1916     href => $HTMLURIAttrChecker,
1917     ping => $HTMLSpaceURIsAttrChecker,
1918 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
1919 wakaba 1.1 media => $HTMLMQAttrChecker,
1920     hreflang => $HTMLLanguageTagAttrChecker,
1921     type => $HTMLIMTAttrChecker,
1922     }->{$attr_ln};
1923     if ($checker) {
1924     $attr{$attr_ln} = $attr;
1925     } else {
1926     $checker = $HTMLAttrChecker->{$attr_ln};
1927     }
1928     }
1929     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1930     || $AttrChecker->{$attr_ns}->{''};
1931     if ($checker) {
1932     $checker->($self, $attr) if ref $checker;
1933 wakaba 1.49 } elsif ($attr_ns eq '') {
1934 wakaba 1.54 #
1935 wakaba 1.1 } else {
1936     $self->{onerror}->(node => $attr, level => 'unsupported',
1937     type => 'attribute');
1938 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
1939 wakaba 1.1 }
1940 wakaba 1.49
1941     if ($attr_ns eq '') {
1942     $self->_attr_status_info ($attr, {
1943     %HTMLAttrStatus,
1944     %HTMLM12NCommonAttrStatus,
1945     accesskey => FEATURE_M12N10_REC,
1946     charset => FEATURE_M12N10_REC,
1947     coords => FEATURE_M12N10_REC,
1948 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1949     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1950     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1951     media => FEATURE_HTML5_DEFAULT,
1952 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
1953 wakaba 1.50 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1954     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1955     ping => FEATURE_HTML5_DEFAULT,
1956     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1957 wakaba 1.49 rev => FEATURE_M12N10_REC,
1958     shape => FEATURE_M12N10_REC,
1959 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1960     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1961     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1962 wakaba 1.49 }->{$attr_ln});
1963     }
1964 wakaba 1.1 }
1965    
1966 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
1967 wakaba 1.4 if (defined $attr{href}) {
1968     $self->{has_hyperlink_element} = 1;
1969 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
1970 wakaba 1.4 } else {
1971 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1972     if (defined $attr{$_}) {
1973     $self->{onerror}->(node => $attr{$_},
1974     type => 'attribute not allowed');
1975     }
1976     }
1977     }
1978     },
1979 wakaba 1.40 check_start => sub {
1980     my ($self, $item, $element_state) = @_;
1981     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
1982     },
1983     check_end => sub {
1984     my ($self, $item, $element_state) = @_;
1985     $self->_remove_minus_elements ($element_state);
1986 wakaba 1.1
1987 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1988 wakaba 1.1 },
1989     };
1990    
1991     $Element->{$HTML_NS}->{q} = {
1992 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1993 wakaba 1.40 %HTMLPhrasingContentChecker,
1994     check_attrs => $GetHTMLAttrsChecker->({
1995 wakaba 1.50 cite => $HTMLURIAttrChecker,
1996     }, {
1997 wakaba 1.49 %HTMLAttrStatus,
1998     %HTMLM12NCommonAttrStatus,
1999 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2000     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2001 wakaba 1.1 }),
2002     };
2003    
2004     $Element->{$HTML_NS}->{cite} = {
2005 wakaba 1.40 %HTMLPhrasingContentChecker,
2006 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2007     check_attrs => $GetHTMLAttrsChecker->({}, {
2008     %HTMLAttrStatus,
2009     %HTMLM12NCommonAttrStatus,
2010 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2011 wakaba 1.49 }),
2012 wakaba 1.1 };
2013    
2014     $Element->{$HTML_NS}->{em} = {
2015 wakaba 1.40 %HTMLPhrasingContentChecker,
2016 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2017     check_attrs => $GetHTMLAttrsChecker->({}, {
2018     %HTMLAttrStatus,
2019     %HTMLM12NCommonAttrStatus,
2020 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2021 wakaba 1.49 }),
2022 wakaba 1.1 };
2023    
2024     $Element->{$HTML_NS}->{strong} = {
2025 wakaba 1.40 %HTMLPhrasingContentChecker,
2026 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2027     check_attrs => $GetHTMLAttrsChecker->({}, {
2028     %HTMLAttrStatus,
2029     %HTMLM12NCommonAttrStatus,
2030 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2031 wakaba 1.49 }),
2032 wakaba 1.1 };
2033    
2034     $Element->{$HTML_NS}->{small} = {
2035 wakaba 1.40 %HTMLPhrasingContentChecker,
2036 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2037     check_attrs => $GetHTMLAttrsChecker->({}, {
2038     %HTMLAttrStatus,
2039     %HTMLM12NCommonAttrStatus,
2040 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2041 wakaba 1.49 }),
2042 wakaba 1.1 };
2043    
2044 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
2045     %HTMLPhrasingContentChecker,
2046     status => FEATURE_M12N10_REC,
2047     check_attrs => $GetHTMLAttrsChecker->({}, {
2048     %HTMLAttrStatus,
2049     %HTMLM12NCommonAttrStatus,
2050     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2051     }),
2052     };
2053    
2054 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
2055 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2056 wakaba 1.40 %HTMLPhrasingContentChecker,
2057 wakaba 1.1 };
2058    
2059     $Element->{$HTML_NS}->{dfn} = {
2060 wakaba 1.40 %HTMLPhrasingContentChecker,
2061 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2062     check_attrs => $GetHTMLAttrsChecker->({}, {
2063     %HTMLAttrStatus,
2064     %HTMLM12NCommonAttrStatus,
2065 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2066 wakaba 1.49 }),
2067 wakaba 1.40 check_start => sub {
2068     my ($self, $item, $element_state) = @_;
2069     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
2070 wakaba 1.1
2071 wakaba 1.40 my $node = $item->{node};
2072 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
2073     unless (defined $term) {
2074     for my $child (@{$node->child_nodes}) {
2075     if ($child->node_type == 1) { # ELEMENT_NODE
2076     if (defined $term) {
2077     undef $term;
2078     last;
2079     } elsif ($child->manakai_local_name eq 'abbr') {
2080     my $nsuri = $child->namespace_uri;
2081     if (defined $nsuri and $nsuri eq $HTML_NS) {
2082     my $attr = $child->get_attribute_node_ns (undef, 'title');
2083     if ($attr) {
2084     $term = $attr->value;
2085     }
2086     }
2087     }
2088     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2089     ## TEXT_NODE or CDATA_SECTION_NODE
2090     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2091     next;
2092     }
2093     undef $term;
2094     last;
2095     }
2096     }
2097     unless (defined $term) {
2098     $term = $node->text_content;
2099     }
2100     }
2101     if ($self->{term}->{$term}) {
2102     $self->{onerror}->(node => $node, type => 'duplicate term');
2103     push @{$self->{term}->{$term}}, $node;
2104     } else {
2105     $self->{term}->{$term} = [$node];
2106     }
2107     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2108     ## has |title|.
2109 wakaba 1.40 },
2110     check_end => sub {
2111     my ($self, $item, $element_state) = @_;
2112     $self->_remove_minus_elements ($element_state);
2113 wakaba 1.1
2114 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2115 wakaba 1.1 },
2116     };
2117    
2118     $Element->{$HTML_NS}->{abbr} = {
2119 wakaba 1.40 %HTMLPhrasingContentChecker,
2120 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2121     check_attrs => $GetHTMLAttrsChecker->({}, {
2122     %HTMLAttrStatus,
2123     %HTMLM12NCommonAttrStatus,
2124 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2125 wakaba 1.49 }),
2126     };
2127    
2128     $Element->{$HTML_NS}->{acronym} = {
2129     %HTMLPhrasingContentChecker,
2130     status => FEATURE_M12N10_REC,
2131     check_attrs => $GetHTMLAttrsChecker->({}, {
2132     %HTMLAttrStatus,
2133     %HTMLM12NCommonAttrStatus,
2134 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2135 wakaba 1.49 }),
2136 wakaba 1.1 };
2137    
2138     $Element->{$HTML_NS}->{time} = {
2139 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2140 wakaba 1.40 %HTMLPhrasingContentChecker,
2141     check_attrs => $GetHTMLAttrsChecker->({
2142 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
2143 wakaba 1.49 }, {
2144     %HTMLAttrStatus,
2145     %HTMLM12NCommonAttrStatus,
2146 wakaba 1.50 datetime => FEATURE_HTML5_DEFAULT,
2147 wakaba 1.1 }),
2148     ## TODO: Write tests
2149 wakaba 1.40 check_end => sub {
2150     my ($self, $item, $element_state) = @_;
2151 wakaba 1.1
2152 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
2153 wakaba 1.1 my $input;
2154     my $reg_sp;
2155     my $input_node;
2156     if ($attr) {
2157     $input = $attr->value;
2158     $reg_sp = qr/[\x09-\x0D\x20]*/;
2159     $input_node = $attr;
2160     } else {
2161 wakaba 1.40 $input = $item->{node}->text_content;
2162 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
2163 wakaba 1.40 $input_node = $item->{node};
2164 wakaba 1.1
2165     ## ISSUE: What is the definition for "successfully extracts a date
2166     ## or time"? If the algorithm says the string is invalid but
2167     ## return some date or time, is it "successfully"?
2168     }
2169    
2170     my $hour;
2171     my $minute;
2172     my $second;
2173     if ($input =~ /
2174     \A
2175     [\x09-\x0D\x20]*
2176     ([0-9]+) # 1
2177     (?>
2178     -([0-9]+) # 2
2179     -([0-9]+) # 3
2180     [\x09-\x0D\x20]*
2181     (?>
2182     T
2183     [\x09-\x0D\x20]*
2184     )?
2185     ([0-9]+) # 4
2186     :([0-9]+) # 5
2187     (?>
2188     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2189     )?
2190     [\x09-\x0D\x20]*
2191     (?>
2192     Z
2193     [\x09-\x0D\x20]*
2194     |
2195     [+-]([0-9]+):([0-9]+) # 7, 8
2196     [\x09-\x0D\x20]*
2197     )?
2198     \z
2199     |
2200     :([0-9]+) # 9
2201     (?>
2202     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2203     )?
2204     [\x09-\x0D\x20]*\z
2205     )
2206     /x) {
2207     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2208     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2209     length $4 != 2 or length $5 != 2) {
2210     $self->{onerror}->(node => $input_node,
2211     type => 'dateortime:syntax error');
2212     }
2213    
2214     if (1 <= $2 and $2 <= 12) {
2215     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2216     if $3 < 1 or
2217     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2218     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2219     if $2 == 2 and $3 == 29 and
2220     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2221     } else {
2222     $self->{onerror}->(node => $input_node,
2223     type => 'datetime:bad month');
2224     }
2225    
2226     ($hour, $minute, $second) = ($4, $5, $6);
2227    
2228     if (defined $7) { ## [+-]hh:mm
2229     if (length $7 != 2 or length $8 != 2) {
2230     $self->{onerror}->(node => $input_node,
2231     type => 'dateortime:syntax error');
2232     }
2233    
2234     $self->{onerror}->(node => $input_node,
2235     type => 'datetime:bad timezone hour')
2236     if $7 > 23;
2237     $self->{onerror}->(node => $input_node,
2238     type => 'datetime:bad timezone minute')
2239     if $8 > 59;
2240     }
2241     } else { ## hh:mm
2242     if (length $1 != 2 or length $9 != 2) {
2243     $self->{onerror}->(node => $input_node,
2244     type => qq'dateortime:syntax error');
2245     }
2246    
2247     ($hour, $minute, $second) = ($1, $9, $10);
2248     }
2249    
2250     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2251     if $hour > 23;
2252     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2253     if $minute > 59;
2254    
2255     if (defined $second) { ## s
2256     ## NOTE: Integer part of second don't have to have length of two.
2257    
2258     if (substr ($second, 0, 1) eq '.') {
2259     $self->{onerror}->(node => $input_node,
2260     type => 'dateortime:syntax error');
2261     }
2262    
2263     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2264     if $second >= 60;
2265     }
2266     } else {
2267     $self->{onerror}->(node => $input_node,
2268     type => 'dateortime:syntax error');
2269     }
2270    
2271 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2272 wakaba 1.1 },
2273     };
2274    
2275     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2276 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2277 wakaba 1.40 %HTMLPhrasingContentChecker,
2278     check_attrs => $GetHTMLAttrsChecker->({
2279 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2280     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2281     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2282     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2283     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2284     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2285 wakaba 1.50 }, {
2286     %HTMLAttrStatus,
2287     high => FEATURE_HTML5_DEFAULT,
2288     low => FEATURE_HTML5_DEFAULT,
2289     max => FEATURE_HTML5_DEFAULT,
2290     min => FEATURE_HTML5_DEFAULT,
2291     optimum => FEATURE_HTML5_DEFAULT,
2292     value => FEATURE_HTML5_DEFAULT,
2293 wakaba 1.1 }),
2294     };
2295    
2296     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2297 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2298 wakaba 1.40 %HTMLPhrasingContentChecker,
2299     check_attrs => $GetHTMLAttrsChecker->({
2300 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2301     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2302 wakaba 1.50 }, {
2303     %HTMLAttrStatus,
2304     max => FEATURE_HTML5_DEFAULT,
2305     value => FEATURE_HTML5_DEFAULT,
2306 wakaba 1.1 }),
2307     };
2308    
2309     $Element->{$HTML_NS}->{code} = {
2310 wakaba 1.40 %HTMLPhrasingContentChecker,
2311 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2312     check_attrs => $GetHTMLAttrsChecker->({}, {
2313     %HTMLAttrStatus,
2314     %HTMLM12NCommonAttrStatus,
2315 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2316 wakaba 1.49 }),
2317 wakaba 1.1 };
2318    
2319     $Element->{$HTML_NS}->{var} = {
2320 wakaba 1.40 %HTMLPhrasingContentChecker,
2321 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2322     check_attrs => $GetHTMLAttrsChecker->({}, {
2323     %HTMLAttrStatus,
2324     %HTMLM12NCommonAttrStatus,
2325 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2326 wakaba 1.49 }),
2327 wakaba 1.1 };
2328    
2329     $Element->{$HTML_NS}->{samp} = {
2330 wakaba 1.40 %HTMLPhrasingContentChecker,
2331 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2332     check_attrs => $GetHTMLAttrsChecker->({}, {
2333     %HTMLAttrStatus,
2334     %HTMLM12NCommonAttrStatus,
2335 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2336 wakaba 1.49 }),
2337 wakaba 1.1 };
2338    
2339     $Element->{$HTML_NS}->{kbd} = {
2340 wakaba 1.40 %HTMLPhrasingContentChecker,
2341 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2342     check_attrs => $GetHTMLAttrsChecker->({}, {
2343     %HTMLAttrStatus,
2344     %HTMLM12NCommonAttrStatus,
2345 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2346 wakaba 1.49 }),
2347 wakaba 1.1 };
2348    
2349     $Element->{$HTML_NS}->{sub} = {
2350 wakaba 1.40 %HTMLPhrasingContentChecker,
2351 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2352     check_attrs => $GetHTMLAttrsChecker->({}, {
2353     %HTMLAttrStatus,
2354     %HTMLM12NCommonAttrStatus,
2355 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2356 wakaba 1.49 }),
2357 wakaba 1.1 };
2358    
2359 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
2360 wakaba 1.1
2361     $Element->{$HTML_NS}->{span} = {
2362 wakaba 1.40 %HTMLPhrasingContentChecker,
2363 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2364     check_attrs => $GetHTMLAttrsChecker->({}, {
2365     %HTMLAttrStatus,
2366     %HTMLM12NCommonAttrStatus,
2367     datafld => FEATURE_HTML4_REC_RESERVED,
2368     dataformatas => FEATURE_HTML4_REC_RESERVED,
2369     datasrc => FEATURE_HTML4_REC_RESERVED,
2370 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2371 wakaba 1.49 }),
2372 wakaba 1.1 };
2373    
2374     $Element->{$HTML_NS}->{i} = {
2375 wakaba 1.40 %HTMLPhrasingContentChecker,
2376 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2377     check_attrs => $GetHTMLAttrsChecker->({}, {
2378     %HTMLAttrStatus,
2379     %HTMLM12NCommonAttrStatus,
2380 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2381 wakaba 1.49 }),
2382 wakaba 1.1 };
2383    
2384 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
2385    
2386     $Element->{$HTML_NS}->{tt} = $Element->{$HTML_NS}->{big};
2387    
2388     $Element->{$HTML_NS}->{s} = {
2389 wakaba 1.40 %HTMLPhrasingContentChecker,
2390 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
2391 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
2392     %HTMLAttrStatus,
2393     %HTMLM12NCommonAttrStatus,
2394 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2395 wakaba 1.49 }),
2396 wakaba 1.1 };
2397    
2398 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
2399    
2400     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
2401    
2402 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
2403 wakaba 1.40 %HTMLPhrasingContentChecker,
2404 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2405 wakaba 1.40 check_attrs => sub {
2406     my ($self, $item, $element_state) = @_;
2407 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
2408     %HTMLAttrStatus,
2409 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2410     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2411     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2412 wakaba 1.49 style => FEATURE_XHTML10_REC,
2413 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2414     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2415 wakaba 1.49 })->($self, $item, $element_state);
2416 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
2417     $self->{onerror}->(node => $item->{node},
2418     type => 'attribute missing:dir');
2419 wakaba 1.1 }
2420     },
2421     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2422     };
2423    
2424 wakaba 1.29 =pod
2425    
2426     ## TODO:
2427    
2428     +
2429     + <p>Partly because of the confusion described above, authors are
2430     + strongly recommended to always mark up all paragraphs with the
2431     + <code>p</code> element, and to not have any <code>ins</code> or
2432     + <code>del</code> elements that cross across any <span
2433     + title="paragraph">implied paragraphs</span>.</p>
2434     +
2435     (An informative note)
2436    
2437     <p><code>ins</code> elements should not cross <span
2438     + title="paragraph">implied paragraph</span> boundaries.</p>
2439     (normative)
2440    
2441     + <p><code>del</code> elements should not cross <span
2442     + title="paragraph">implied paragraph</span> boundaries.</p>
2443     (normative)
2444    
2445     =cut
2446    
2447 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2448 wakaba 1.40 %HTMLTransparentChecker,
2449 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2450 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2451 wakaba 1.1 cite => $HTMLURIAttrChecker,
2452     datetime => $HTMLDatetimeAttrChecker,
2453 wakaba 1.49 }, {
2454     %HTMLAttrStatus,
2455     %HTMLM12NCommonAttrStatus,
2456 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2457     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2458     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2459 wakaba 1.1 }),
2460     };
2461    
2462     $Element->{$HTML_NS}->{del} = {
2463 wakaba 1.40 %HTMLTransparentChecker,
2464 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2465 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2466 wakaba 1.1 cite => $HTMLURIAttrChecker,
2467     datetime => $HTMLDatetimeAttrChecker,
2468 wakaba 1.49 }, {
2469     %HTMLAttrStatus,
2470     %HTMLM12NCommonAttrStatus,
2471 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2472     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2473     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2474 wakaba 1.1 }),
2475 wakaba 1.40 check_end => sub {
2476     my ($self, $item, $element_state) = @_;
2477     if ($element_state->{has_significant}) {
2478     ## NOTE: Significantness flag does not propagate.
2479     } elsif ($item->{transparent}) {
2480     #
2481     } else {
2482     $self->{onerror}->(node => $item->{node},
2483     level => $self->{should_level},
2484     type => 'no significant content');
2485     }
2486 wakaba 1.1 },
2487     };
2488    
2489 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
2490 wakaba 1.40 %HTMLProseContentChecker,
2491 wakaba 1.48 status => FEATURE_HTML5_FD,
2492 wakaba 1.53 ## NOTE: legend, Prose | Prose, legend?
2493 wakaba 1.41 check_child_element => sub {
2494     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2495     $child_is_transparent, $element_state) = @_;
2496     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2497     $self->{onerror}->(node => $child_el,
2498     type => 'element not allowed:minus',
2499     level => $self->{must_level});
2500     $element_state->{has_non_legend} = 1;
2501     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2502     #
2503     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
2504     if ($element_state->{has_legend_at_first}) {
2505     $self->{onerror}->(node => $child_el,
2506     type => 'element not allowed:figure legend',
2507     level => $self->{must_level});
2508     } elsif ($element_state->{has_legend}) {
2509     $self->{onerror}->(node => $element_state->{has_legend},
2510     type => 'element not allowed:figure legend',
2511     level => $self->{must_level});
2512     $element_state->{has_legend} = $child_el;
2513     } elsif ($element_state->{has_non_legend}) {
2514     $element_state->{has_legend} = $child_el;
2515     } else {
2516     $element_state->{has_legend_at_first} = 1;
2517 wakaba 1.35 }
2518 wakaba 1.41 delete $element_state->{has_non_legend};
2519     } else {
2520     $HTMLProseContentChecker{check_child_element}->(@_);
2521 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
2522 wakaba 1.41 }
2523     },
2524     check_child_text => sub {
2525     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2526     if ($has_significant) {
2527     $element_state->{has_non_legend} = 1;
2528 wakaba 1.35 }
2529 wakaba 1.41 },
2530     check_end => sub {
2531     my ($self, $item, $element_state) = @_;
2532 wakaba 1.35
2533 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2534     #
2535     } elsif ($element_state->{has_legend}) {
2536     if ($element_state->{has_non_legend}) {
2537     $self->{onerror}->(node => $element_state->{has_legend},
2538 wakaba 1.35 type => 'element not allowed:figure legend',
2539     level => $self->{must_level});
2540     }
2541     }
2542 wakaba 1.41
2543     $HTMLProseContentChecker{check_end}->(@_);
2544     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2545 wakaba 1.35 },
2546     };
2547 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2548 wakaba 1.1
2549     $Element->{$HTML_NS}->{img} = {
2550 wakaba 1.40 %HTMLEmptyChecker,
2551 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2552 wakaba 1.40 check_attrs => sub {
2553     my ($self, $item, $element_state) = @_;
2554 wakaba 1.1 $GetHTMLAttrsChecker->({
2555     alt => sub { }, ## NOTE: No syntactical requirement
2556     src => $HTMLURIAttrChecker,
2557     usemap => $HTMLUsemapAttrChecker,
2558     ismap => sub {
2559 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2560     if (not $self->{flag}->{in_a_href}) {
2561 wakaba 1.15 $self->{onerror}->(node => $attr,
2562     type => 'attribute not allowed:ismap');
2563 wakaba 1.1 }
2564 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2565 wakaba 1.1 },
2566     ## TODO: height
2567     ## TODO: width
2568 wakaba 1.49 }, {
2569     %HTMLAttrStatus,
2570     %HTMLM12NCommonAttrStatus,
2571     align => FEATURE_M12N10_REC_DEPRECATED,
2572 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2573 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
2574 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2575 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
2576 wakaba 1.50 ismap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2577     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2578 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2579     name => FEATURE_M12N10_REC_DEPRECATED,
2580 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2581     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2582 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
2583 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2584 wakaba 1.40 })->($self, $item);
2585     unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2586     $self->{onerror}->(node => $item->{node},
2587 wakaba 1.37 type => 'attribute missing:alt',
2588     level => $self->{should_level});
2589 wakaba 1.1 }
2590 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2591     $self->{onerror}->(node => $item->{node},
2592     type => 'attribute missing:src');
2593 wakaba 1.1 }
2594     },
2595     };
2596    
2597     $Element->{$HTML_NS}->{iframe} = {
2598 wakaba 1.40 %HTMLTextChecker,
2599 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2600     ## NOTE: Not part of M12N10 Strict
2601 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2602 wakaba 1.1 src => $HTMLURIAttrChecker,
2603 wakaba 1.49 }, {
2604     %HTMLAttrStatus,
2605     %HTMLM12NCommonAttrStatus,
2606     align => FEATURE_XHTML10_REC,
2607 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2608 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
2609     height => FEATURE_M12N10_REC,
2610 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2611 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2612     marginheight => FEATURE_M12N10_REC,
2613     marginwidth => FEATURE_M12N10_REC,
2614     name => FEATURE_M12N10_REC_DEPRECATED,
2615     scrolling => FEATURE_M12N10_REC,
2616 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2617     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2618 wakaba 1.49 width => FEATURE_M12N10_REC,
2619 wakaba 1.1 }),
2620 wakaba 1.40 };
2621    
2622 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2623 wakaba 1.40 %HTMLEmptyChecker,
2624 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2625 wakaba 1.40 check_attrs => sub {
2626     my ($self, $item, $element_state) = @_;
2627 wakaba 1.1 my $has_src;
2628 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2629 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2630     $attr_ns = '' unless defined $attr_ns;
2631     my $attr_ln = $attr->manakai_local_name;
2632     my $checker;
2633     if ($attr_ns eq '') {
2634     if ($attr_ln eq 'src') {
2635     $checker = $HTMLURIAttrChecker;
2636     $has_src = 1;
2637     } elsif ($attr_ln eq 'type') {
2638     $checker = $HTMLIMTAttrChecker;
2639     } else {
2640     ## TODO: height
2641     ## TODO: width
2642     $checker = $HTMLAttrChecker->{$attr_ln}
2643     || sub { }; ## NOTE: Any local attribute is ok.
2644     }
2645     }
2646     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2647     || $AttrChecker->{$attr_ns}->{''};
2648     if ($checker) {
2649     $checker->($self, $attr);
2650 wakaba 1.50 } elsif ($attr_ns eq '') {
2651 wakaba 1.54 #
2652 wakaba 1.1 } else {
2653     $self->{onerror}->(node => $attr, level => 'unsupported',
2654     type => 'attribute');
2655 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
2656     }
2657    
2658     if ($attr_ns eq '') {
2659     my $status = {
2660     %HTMLAttrStatus,
2661     height => FEATURE_HTML5_DEFAULT,
2662     src => FEATURE_HTML5_DEFAULT,
2663     type => FEATURE_HTML5_DEFAULT,
2664     width => FEATURE_HTML5_DEFAULT,
2665     }->{$attr_ln};
2666     $self->_attr_status_info ($attr, $status) if $status;
2667 wakaba 1.1 }
2668     }
2669    
2670     unless ($has_src) {
2671 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2672 wakaba 1.1 type => 'attribute missing:src');
2673     }
2674     },
2675     };
2676    
2677 wakaba 1.49 ## TODO:
2678     ## {applet} FEATURE_M12N10_REC_DEPRECATED
2679     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
2680    
2681 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
2682 wakaba 1.40 %HTMLTransparentChecker,
2683 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2684 wakaba 1.40 check_attrs => sub {
2685     my ($self, $item, $element_state) = @_;
2686 wakaba 1.1 $GetHTMLAttrsChecker->({
2687     data => $HTMLURIAttrChecker,
2688     type => $HTMLIMTAttrChecker,
2689     usemap => $HTMLUsemapAttrChecker,
2690     ## TODO: width
2691     ## TODO: height
2692 wakaba 1.49 }, {
2693     %HTMLAttrStatus,
2694     %HTMLM12NCommonAttrStatus,
2695     align => FEATURE_XHTML10_REC,
2696     archive => FEATURE_M12N10_REC,
2697     border => FEATURE_XHTML10_REC,
2698     classid => FEATURE_M12N10_REC,
2699     codebase => FEATURE_M12N10_REC,
2700     codetype => FEATURE_M12N10_REC,
2701 wakaba 1.50 data => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2702 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
2703     dataformatas => FEATURE_HTML4_REC_RESERVED,
2704     datasrc => FEATURE_HTML4_REC_RESERVED,
2705     declare => FEATURE_M12N10_REC,
2706 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2707 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
2708 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2709 wakaba 1.49 name => FEATURE_M12N10_REC,
2710     standby => FEATURE_M12N10_REC,
2711 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2712     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2713     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2714 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
2715 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2716 wakaba 1.40 })->($self, $item);
2717     unless ($item->{node}->has_attribute_ns (undef, 'data')) {
2718     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
2719     $self->{onerror}->(node => $item->{node},
2720 wakaba 1.1 type => 'attribute missing:data|type');
2721     }
2722     }
2723     },
2724 wakaba 1.41 ## NOTE: param*, transparent (Prose)
2725     check_child_element => sub {
2726     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2727     $child_is_transparent, $element_state) = @_;
2728     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2729     $self->{onerror}->(node => $child_el,
2730     type => 'element not allowed:minus',
2731     level => $self->{must_level});
2732     $element_state->{has_non_legend} = 1;
2733     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2734     #
2735     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
2736     if ($element_state->{has_non_param}) {
2737     $self->{onerror}->(node => $child_el,
2738     type => 'element not allowed:prose',
2739     level => $self->{must_level});
2740 wakaba 1.39 }
2741 wakaba 1.41 } else {
2742     $HTMLProseContentChecker{check_child_element}->(@_);
2743     $element_state->{has_non_param} = 1;
2744 wakaba 1.39 }
2745 wakaba 1.25 },
2746 wakaba 1.41 check_child_text => sub {
2747     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2748     if ($has_significant) {
2749     $element_state->{has_non_param} = 1;
2750     }
2751 wakaba 1.42 },
2752     check_end => sub {
2753     my ($self, $item, $element_state) = @_;
2754     if ($element_state->{has_significant}) {
2755 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
2756 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
2757     ## NOTE: Transparent.
2758     } else {
2759     $self->{onerror}->(node => $item->{node},
2760     level => $self->{should_level},
2761     type => 'no significant content');
2762     }
2763     },
2764 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2765 wakaba 1.1 };
2766 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
2767     ## What about |<section><object data><style scoped></style>x</object></section>|?
2768     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
2769 wakaba 1.1
2770     $Element->{$HTML_NS}->{param} = {
2771 wakaba 1.40 %HTMLEmptyChecker,
2772 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2773 wakaba 1.40 check_attrs => sub {
2774     my ($self, $item, $element_state) = @_;
2775 wakaba 1.1 $GetHTMLAttrsChecker->({
2776     name => sub { },
2777     value => sub { },
2778 wakaba 1.49 }, {
2779     %HTMLAttrStatus,
2780 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2781     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2782 wakaba 1.49 type => FEATURE_M12N10_REC,
2783 wakaba 1.50 value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2784 wakaba 1.49 valuetype => FEATURE_M12N10_REC,
2785 wakaba 1.40 })->($self, $item);
2786     unless ($item->{node}->has_attribute_ns (undef, 'name')) {
2787     $self->{onerror}->(node => $item->{node},
2788 wakaba 1.1 type => 'attribute missing:name');
2789     }
2790 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
2791     $self->{onerror}->(node => $item->{node},
2792 wakaba 1.1 type => 'attribute missing:value');
2793     }
2794     },
2795     };
2796    
2797     $Element->{$HTML_NS}->{video} = {
2798 wakaba 1.40 %HTMLTransparentChecker,
2799 wakaba 1.48 status => FEATURE_HTML5_LC,
2800 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2801 wakaba 1.1 src => $HTMLURIAttrChecker,
2802     ## TODO: start, loopstart, loopend, end
2803     ## ISSUE: they MUST be "value time offset"s. Value?
2804 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2805 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2806     controls => $GetHTMLBooleanAttrChecker->('controls'),
2807 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2808 wakaba 1.42 ## TODO: width, height
2809 wakaba 1.50 }, {
2810     %HTMLAttrStatus,
2811     autoplay => FEATURE_HTML5_LC,
2812     controls => FEATURE_HTML5_LC,
2813     end => FEATURE_HTML5_LC,
2814     height => FEATURE_HTML5_LC,
2815     loopend => FEATURE_HTML5_LC,
2816     loopstart => FEATURE_HTML5_LC,
2817     playcount => FEATURE_HTML5_LC,
2818     poster => FEATURE_HTML5_LC,
2819     src => FEATURE_HTML5_LC,
2820     start => FEATURE_HTML5_LC,
2821     width => FEATURE_HTML5_LC,
2822 wakaba 1.1 }),
2823 wakaba 1.42 check_start => sub {
2824     my ($self, $item, $element_state) = @_;
2825     $element_state->{allow_source}
2826     = not $item->{node}->has_attribute_ns (undef, 'src');
2827     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
2828     ## NOTE: It might be set true by |check_element|.
2829     },
2830     check_child_element => sub {
2831     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2832     $child_is_transparent, $element_state) = @_;
2833     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2834     $self->{onerror}->(node => $child_el,
2835     type => 'element not allowed:minus',
2836     level => $self->{must_level});
2837     delete $element_state->{allow_source};
2838     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2839     #
2840     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
2841 wakaba 1.45 unless ($element_state->{allow_source}) {
2842 wakaba 1.42 $self->{onerror}->(node => $child_el,
2843     type => 'element not allowed:prose',
2844     level => $self->{must_level});
2845     }
2846 wakaba 1.45 $element_state->{has_source} = 1;
2847 wakaba 1.1 } else {
2848 wakaba 1.42 delete $element_state->{allow_source};
2849     $HTMLProseContentChecker{check_child_element}->(@_);
2850     }
2851     },
2852     check_child_text => sub {
2853     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2854     if ($has_significant) {
2855     delete $element_state->{allow_source};
2856     }
2857     $HTMLProseContentChecker{check_child_text}->(@_);
2858     },
2859     check_end => sub {
2860     my ($self, $item, $element_state) = @_;
2861     if ($element_state->{has_source} == -1) {
2862     $self->{onerror}->(node => $item->{node},
2863     type => 'element missing:source',
2864     level => $self->{must_level});
2865 wakaba 1.1 }
2866 wakaba 1.42
2867     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
2868 wakaba 1.1 },
2869     };
2870    
2871     $Element->{$HTML_NS}->{audio} = {
2872 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
2873 wakaba 1.48 status => FEATURE_HTML5_LC,
2874 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
2875     src => $HTMLURIAttrChecker,
2876     ## TODO: start, loopstart, loopend, end
2877     ## ISSUE: they MUST be "value time offset"s. Value?
2878     ## ISSUE: playcount has no conformance creteria
2879     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2880     controls => $GetHTMLBooleanAttrChecker->('controls'),
2881 wakaba 1.50 }, {
2882     %HTMLAttrStatus,
2883     autoplay => FEATURE_HTML5_LC,
2884     controls => FEATURE_HTML5_LC,
2885     end => FEATURE_HTML5_LC,
2886     loopend => FEATURE_HTML5_LC,
2887     loopstart => FEATURE_HTML5_LC,
2888     playcount => FEATURE_HTML5_LC,
2889     src => FEATURE_HTML5_LC,
2890     start => FEATURE_HTML5_LC,
2891 wakaba 1.42 }),
2892 wakaba 1.1 };
2893    
2894     $Element->{$HTML_NS}->{source} = {
2895 wakaba 1.40 %HTMLEmptyChecker,
2896 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2897 wakaba 1.40 check_attrs => sub {
2898     my ($self, $item, $element_state) = @_;
2899 wakaba 1.1 $GetHTMLAttrsChecker->({
2900     src => $HTMLURIAttrChecker,
2901     type => $HTMLIMTAttrChecker,
2902     media => $HTMLMQAttrChecker,
2903 wakaba 1.50 }, {
2904     %HTMLAttrStatus,
2905     media => FEATURE_HTML5_DEFAULT,
2906     src => FEATURE_HTML5_DEFAULT,
2907     type => FEATURE_HTML5_DEFAULT,
2908 wakaba 1.40 })->($self, $item, $element_state);
2909     unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2910     $self->{onerror}->(node => $item->{node},
2911 wakaba 1.1 type => 'attribute missing:src');
2912     }
2913     },
2914     };
2915    
2916     $Element->{$HTML_NS}->{canvas} = {
2917 wakaba 1.40 %HTMLTransparentChecker,
2918 wakaba 1.48 status => FEATURE_HTML5_LC,
2919 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2920 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2921     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2922 wakaba 1.50 }, {
2923     %HTMLAttrStatus,
2924     height => FEATURE_HTML5_LC,
2925     width => FEATURE_HTML5_LC,
2926 wakaba 1.1 }),
2927     };
2928    
2929     $Element->{$HTML_NS}->{map} = {
2930 wakaba 1.40 %HTMLProseContentChecker,
2931 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2932 wakaba 1.40 check_attrs => sub {
2933     my ($self, $item, $element_state) = @_;
2934 wakaba 1.4 my $has_id;
2935     $GetHTMLAttrsChecker->({
2936     id => sub {
2937     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2938     my ($self, $attr) = @_;
2939     my $value = $attr->value;
2940     if (length $value > 0) {
2941     if ($self->{id}->{$value}) {
2942     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2943     push @{$self->{id}->{$value}}, $attr;
2944     } else {
2945     $self->{id}->{$value} = [$attr];
2946     }
2947 wakaba 1.1 } else {
2948 wakaba 1.4 ## NOTE: MUST contain at least one character
2949     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2950 wakaba 1.1 }
2951 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2952     $self->{onerror}->(node => $attr, type => 'space in ID');
2953     }
2954     $self->{map}->{$value} ||= $attr;
2955     $has_id = 1;
2956     },
2957 wakaba 1.49 }, {
2958     %HTMLAttrStatus,
2959 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2960     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2961     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2962     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2963 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
2964 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2965     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2966     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2967     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2968     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2969     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2970     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2971     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2972     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2973     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2974     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2975 wakaba 1.40 })->($self, $item, $element_state);
2976     $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
2977 wakaba 1.4 unless $has_id;
2978     },
2979 wakaba 1.1 };
2980    
2981     $Element->{$HTML_NS}->{area} = {
2982 wakaba 1.40 %HTMLEmptyChecker,
2983 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2984 wakaba 1.40 check_attrs => sub {
2985     my ($self, $item, $element_state) = @_;
2986 wakaba 1.1 my %attr;
2987     my $coords;
2988 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2989 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2990     $attr_ns = '' unless defined $attr_ns;
2991     my $attr_ln = $attr->manakai_local_name;
2992     my $checker;
2993     if ($attr_ns eq '') {
2994     $checker = {
2995     alt => sub { },
2996     ## NOTE: |alt| value has no conformance creteria.
2997     shape => $GetHTMLEnumeratedAttrChecker->({
2998     circ => -1, circle => 1,
2999     default => 1,
3000     poly => 1, polygon => -1,
3001     rect => 1, rectangle => -1,
3002     }),
3003     coords => sub {
3004     my ($self, $attr) = @_;
3005     my $value = $attr->value;
3006     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
3007     $coords = [split /,/, $value];
3008     } else {
3009     $self->{onerror}->(node => $attr,
3010     type => 'coords:syntax error');
3011     }
3012     },
3013     target => $HTMLTargetAttrChecker,
3014     href => $HTMLURIAttrChecker,
3015     ping => $HTMLSpaceURIsAttrChecker,
3016 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3017 wakaba 1.1 media => $HTMLMQAttrChecker,
3018     hreflang => $HTMLLanguageTagAttrChecker,
3019     type => $HTMLIMTAttrChecker,
3020     }->{$attr_ln};
3021     if ($checker) {
3022     $attr{$attr_ln} = $attr;
3023     } else {
3024     $checker = $HTMLAttrChecker->{$attr_ln};
3025     }
3026     }
3027     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3028     || $AttrChecker->{$attr_ns}->{''};
3029     if ($checker) {
3030     $checker->($self, $attr) if ref $checker;
3031 wakaba 1.49 } elsif ($attr_ns eq '') {
3032 wakaba 1.54 #
3033 wakaba 1.1 } else {
3034     $self->{onerror}->(node => $attr, level => 'unsupported',
3035     type => 'attribute');
3036     ## ISSUE: No comformance createria for unknown attributes in the spec
3037     }
3038 wakaba 1.49
3039     if ($attr_ns eq '') {
3040     $self->_attr_status_info ($attr, {
3041     %HTMLAttrStatus,
3042     %HTMLM12NCommonAttrStatus,
3043     accesskey => FEATURE_M12N10_REC,
3044 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3045     coords => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3046     href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3047 wakaba 1.54 hreflang => FEATURE_HTML5_DEFAULT,
3048 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3049     media => FEATURE_HTML5_DEFAULT,
3050 wakaba 1.49 nohref => FEATURE_M12N10_REC,
3051 wakaba 1.50 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3052     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3053     ping => FEATURE_HTML5_DEFAULT,
3054     rel => FEATURE_HTML5_DEFAULT,
3055     shape => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3056     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3057     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3058     type => FEATURE_HTML5_DEFAULT,
3059 wakaba 1.49 }->{$attr_ln});
3060     }
3061 wakaba 1.1 }
3062    
3063     if (defined $attr{href}) {
3064 wakaba 1.4 $self->{has_hyperlink_element} = 1;
3065 wakaba 1.1 unless (defined $attr{alt}) {
3066 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3067 wakaba 1.1 type => 'attribute missing:alt');
3068     }
3069     } else {
3070     for (qw/target ping rel media hreflang type alt/) {
3071     if (defined $attr{$_}) {
3072     $self->{onerror}->(node => $attr{$_},
3073     type => 'attribute not allowed');
3074     }
3075     }
3076     }
3077    
3078     my $shape = 'rectangle';
3079     if (defined $attr{shape}) {
3080     $shape = {
3081     circ => 'circle', circle => 'circle',
3082     default => 'default',
3083     poly => 'polygon', polygon => 'polygon',
3084     rect => 'rectangle', rectangle => 'rectangle',
3085     }->{lc $attr{shape}->value} || 'rectangle';
3086     ## TODO: ASCII lowercase?
3087     }
3088    
3089     if ($shape eq 'circle') {
3090     if (defined $attr{coords}) {
3091     if (defined $coords) {
3092     if (@$coords == 3) {
3093     if ($coords->[2] < 0) {
3094     $self->{onerror}->(node => $attr{coords},
3095     type => 'coords:out of range:2');
3096     }
3097     } else {
3098     $self->{onerror}->(node => $attr{coords},
3099     type => 'coords:number:3:'.@$coords);
3100     }
3101     } else {
3102     ## NOTE: A syntax error has been reported.
3103     }
3104     } else {
3105 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3106 wakaba 1.1 type => 'attribute missing:coords');
3107     }
3108     } elsif ($shape eq 'default') {
3109     if (defined $attr{coords}) {
3110     $self->{onerror}->(node => $attr{coords},
3111     type => 'attribute not allowed');
3112     }
3113     } elsif ($shape eq 'polygon') {
3114     if (defined $attr{coords}) {
3115     if (defined $coords) {
3116     if (@$coords >= 6) {
3117     unless (@$coords % 2 == 0) {
3118     $self->{onerror}->(node => $attr{coords},
3119     type => 'coords:number:even:'.@$coords);
3120     }
3121     } else {
3122     $self->{onerror}->(node => $attr{coords},
3123     type => 'coords:number:>=6:'.@$coords);
3124     }
3125     } else {
3126     ## NOTE: A syntax error has been reported.
3127     }
3128     } else {
3129 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3130 wakaba 1.1 type => 'attribute missing:coords');
3131     }
3132     } elsif ($shape eq 'rectangle') {
3133     if (defined $attr{coords}) {
3134     if (defined $coords) {
3135     if (@$coords == 4) {
3136     unless ($coords->[0] < $coords->[2]) {
3137     $self->{onerror}->(node => $attr{coords},
3138     type => 'coords:out of range:0');
3139     }
3140     unless ($coords->[1] < $coords->[3]) {
3141     $self->{onerror}->(node => $attr{coords},
3142     type => 'coords:out of range:1');
3143     }
3144     } else {
3145     $self->{onerror}->(node => $attr{coords},
3146     type => 'coords:number:4:'.@$coords);
3147     }
3148     } else {
3149     ## NOTE: A syntax error has been reported.
3150     }
3151     } else {
3152 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3153 wakaba 1.1 type => 'attribute missing:coords');
3154     }
3155     }
3156     },
3157     };
3158     ## TODO: only in map
3159    
3160     $Element->{$HTML_NS}->{table} = {
3161 wakaba 1.40 %HTMLChecker,
3162 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3163     check_attrs => $GetHTMLAttrsChecker->({}, {
3164     %HTMLAttrStatus,
3165     %HTMLM12NCommonAttrStatus,
3166     align => FEATURE_M12N10_REC_DEPRECATED,
3167     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3168     border => FEATURE_M12N10_REC,
3169     cellpadding => FEATURE_M12N10_REC,
3170     cellspacing => FEATURE_M12N10_REC,
3171     datafld => FEATURE_HTML4_REC_RESERVED,
3172     dataformatas => FEATURE_HTML4_REC_RESERVED,
3173     datapagesize => FEATURE_M12N10_REC,
3174     datasrc => FEATURE_HTML4_REC_RESERVED,
3175     frame => FEATURE_M12N10_REC,
3176 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3177 wakaba 1.49 rules => FEATURE_M12N10_REC,
3178     summary => FEATURE_M12N10_REC,
3179     width => FEATURE_M12N10_REC,
3180     }),
3181 wakaba 1.40 check_start => sub {
3182     my ($self, $item, $element_state) = @_;
3183     $element_state->{phase} = 'before caption';
3184     },
3185     check_child_element => sub {
3186     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3187     $child_is_transparent, $element_state) = @_;
3188     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3189     $self->{onerror}->(node => $child_el,
3190     type => 'element not allowed:minus',
3191     level => $self->{must_level});
3192     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3193     #
3194     } elsif ($element_state->{phase} eq 'in tbodys') {
3195     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3196     #$element_state->{phase} = 'in tbodys';
3197     } elsif (not $element_state->{has_tfoot} and
3198     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3199     $element_state->{phase} = 'after tfoot';
3200     $element_state->{has_tfoot} = 1;
3201     } else {
3202     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3203     }
3204     } elsif ($element_state->{phase} eq 'in trs') {
3205     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3206     #$element_state->{phase} = 'in trs';
3207     } elsif (not $element_state->{has_tfoot} and
3208     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3209     $element_state->{phase} = 'after tfoot';
3210     $element_state->{has_tfoot} = 1;
3211     } else {
3212     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3213     }
3214     } elsif ($element_state->{phase} eq 'after thead') {
3215     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3216     $element_state->{phase} = 'in tbodys';
3217     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3218     $element_state->{phase} = 'in trs';
3219     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3220     $element_state->{phase} = 'in tbodys';
3221     $element_state->{has_tfoot} = 1;
3222     } else {
3223     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3224     }
3225     } elsif ($element_state->{phase} eq 'in colgroup') {
3226     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3227     $element_state->{phase} = 'in colgroup';
3228     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3229     $element_state->{phase} = 'after thead';
3230     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3231     $element_state->{phase} = 'in tbodys';
3232     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3233     $element_state->{phase} = 'in trs';
3234     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3235     $element_state->{phase} = 'in tbodys';
3236     $element_state->{has_tfoot} = 1;
3237     } else {
3238     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3239     }
3240     } elsif ($element_state->{phase} eq 'before caption') {
3241     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
3242     $element_state->{phase} = 'in colgroup';
3243     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3244     $element_state->{phase} = 'in colgroup';
3245     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3246     $element_state->{phase} = 'after thead';
3247     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3248     $element_state->{phase} = 'in tbodys';
3249     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3250     $element_state->{phase} = 'in trs';
3251     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3252     $element_state->{phase} = 'in tbodys';
3253     $element_state->{has_tfoot} = 1;
3254     } else {
3255     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3256     }
3257     } elsif ($element_state->{phase} eq 'after tfoot') {
3258     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3259     } else {
3260     die "check_child_element: Bad |table| phase: $element_state->{phase}";
3261     }
3262     },
3263     check_child_text => sub {
3264     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3265     if ($has_significant) {
3266     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3267 wakaba 1.1 }
3268 wakaba 1.40 },
3269     check_end => sub {
3270     my ($self, $item, $element_state) = @_;
3271 wakaba 1.1
3272     ## Table model errors
3273     require Whatpm::HTMLTable;
3274 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
3275 wakaba 1.1 my %opt = @_;
3276     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
3277     });
3278 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
3279 wakaba 1.1
3280 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3281 wakaba 1.1 },
3282     };
3283    
3284     $Element->{$HTML_NS}->{caption} = {
3285 wakaba 1.40 %HTMLPhrasingContentChecker,
3286 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3287     check_attrs => $GetHTMLAttrsChecker->({}, {
3288     %HTMLAttrStatus,
3289     %HTMLM12NCommonAttrStatus,
3290     align => FEATURE_M12N10_REC_DEPRECATED,
3291 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3292 wakaba 1.49 }),
3293 wakaba 1.1 };
3294    
3295     $Element->{$HTML_NS}->{colgroup} = {
3296 wakaba 1.40 %HTMLEmptyChecker,
3297 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3298 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3299 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3300     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
3301     ## TODO: "attribute not supported" if |col|.
3302     ## ISSUE: MUST NOT if any |col|?
3303     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
3304 wakaba 1.49 }, {
3305     %HTMLAttrStatus,
3306     %HTMLM12NCommonAttrStatus,
3307     align => FEATURE_M12N10_REC,
3308     char => FEATURE_M12N10_REC,
3309     charoff => FEATURE_M12N10_REC,
3310 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3311     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3312 wakaba 1.49 valign => FEATURE_M12N10_REC,
3313     width => FEATURE_M12N10_REC,
3314 wakaba 1.1 }),
3315 wakaba 1.40 check_child_element => sub {
3316     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3317     $child_is_transparent, $element_state) = @_;
3318     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3319     $self->{onerror}->(node => $child_el,
3320     type => 'element not allowed:minus',
3321     level => $self->{must_level});
3322     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3323     #
3324     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
3325     #
3326     } else {
3327     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3328     }
3329     },
3330     check_child_text => sub {
3331     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3332     if ($has_significant) {
3333     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3334 wakaba 1.1 }
3335     },
3336     };
3337    
3338     $Element->{$HTML_NS}->{col} = {
3339 wakaba 1.40 %HTMLEmptyChecker,
3340 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3341 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3342 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3343 wakaba 1.49 }, {
3344     %HTMLAttrStatus,
3345     %HTMLM12NCommonAttrStatus,
3346     align => FEATURE_M12N10_REC,
3347     char => FEATURE_M12N10_REC,
3348     charoff => FEATURE_M12N10_REC,
3349 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3350     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3351 wakaba 1.49 valign => FEATURE_M12N10_REC,
3352     width => FEATURE_M12N10_REC,
3353 wakaba 1.1 }),
3354     };
3355    
3356     $Element->{$HTML_NS}->{tbody} = {
3357 wakaba 1.40 %HTMLChecker,
3358 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3359     check_attrs => $GetHTMLAttrsChecker->({}, {
3360     %HTMLAttrStatus,
3361     %HTMLM12NCommonAttrStatus,
3362     align => FEATURE_M12N10_REC,
3363     char => FEATURE_M12N10_REC,
3364     charoff => FEATURE_M12N10_REC,
3365 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3366 wakaba 1.49 valign => FEATURE_M12N10_REC,
3367     }),
3368 wakaba 1.40 check_child_element => sub {
3369     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3370     $child_is_transparent, $element_state) = @_;
3371     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3372     $self->{onerror}->(node => $child_el,
3373     type => 'element not allowed:minus',
3374     level => $self->{must_level});
3375     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3376     #
3377     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3378     $element_state->{has_tr} = 1;
3379     } else {
3380     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3381     }
3382     },
3383     check_child_text => sub {
3384     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3385     if ($has_significant) {
3386     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3387 wakaba 1.1 }
3388 wakaba 1.40 },
3389     check_end => sub {
3390     my ($self, $item, $element_state) = @_;
3391     unless ($element_state->{has_tr}) {
3392     $self->{onerror}->(node => $item->{node},
3393     type => 'child element missing:tr');
3394 wakaba 1.1 }
3395 wakaba 1.40
3396     $HTMLChecker{check_end}->(@_);
3397 wakaba 1.1 },
3398     };
3399    
3400     $Element->{$HTML_NS}->{thead} = {
3401 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3402 wakaba 1.1 };
3403    
3404     $Element->{$HTML_NS}->{tfoot} = {
3405 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3406 wakaba 1.1 };
3407    
3408     $Element->{$HTML_NS}->{tr} = {
3409 wakaba 1.40 %HTMLChecker,
3410 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3411     check_attrs => $GetHTMLAttrsChecker->({}, {
3412     %HTMLAttrStatus,
3413     %HTMLM12NCommonAttrStatus,
3414     align => FEATURE_M12N10_REC,
3415     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3416     char => FEATURE_M12N10_REC,
3417     charoff => FEATURE_M12N10_REC,
3418 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3419 wakaba 1.49 valign => FEATURE_M12N10_REC,
3420     }),
3421 wakaba 1.40 check_child_element => sub {
3422     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3423     $child_is_transparent, $element_state) = @_;
3424     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3425     $self->{onerror}->(node => $child_el,
3426     type => 'element not allowed:minus',
3427     level => $self->{must_level});
3428     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3429     #
3430     } elsif ($child_nsuri eq $HTML_NS and
3431     ($child_ln eq 'td' or $child_ln eq 'th')) {
3432     $element_state->{has_cell} = 1;
3433     } else {
3434     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3435     }
3436     },
3437     check_child_text => sub {
3438     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3439     if ($has_significant) {
3440     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3441 wakaba 1.1 }
3442 wakaba 1.40 },
3443     check_end => sub {
3444     my ($self, $item, $element_state) = @_;
3445     unless ($element_state->{has_cell}) {
3446     $self->{onerror}->(node => $item->{node},
3447     type => 'child element missing:td|th');
3448 wakaba 1.1 }
3449 wakaba 1.40
3450     $HTMLChecker{check_end}->(@_);
3451 wakaba 1.1 },
3452     };
3453    
3454     $Element->{$HTML_NS}->{td} = {
3455 wakaba 1.40 %HTMLProseContentChecker,
3456 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3457 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3458 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3459     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3460 wakaba 1.49 }, {
3461     %HTMLAttrStatus,
3462     %HTMLM12NCommonAttrStatus,
3463     abbr => FEATURE_M12N10_REC,
3464     align => FEATURE_M12N10_REC,
3465     axis => FEATURE_M12N10_REC,
3466     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3467     char => FEATURE_M12N10_REC,
3468     charoff => FEATURE_M12N10_REC,
3469 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3470 wakaba 1.49 headers => FEATURE_M12N10_REC,
3471     height => FEATURE_M12N10_REC_DEPRECATED,
3472 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3473 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3474 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3475 wakaba 1.49 scope => FEATURE_M12N10_REC,
3476     valign => FEATURE_M12N10_REC,
3477     width => FEATURE_M12N10_REC_DEPRECATED,
3478 wakaba 1.1 }),
3479     };
3480    
3481     $Element->{$HTML_NS}->{th} = {
3482 wakaba 1.40 %HTMLPhrasingContentChecker,
3483 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3484 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3485 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3486     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3487     scope => $GetHTMLEnumeratedAttrChecker
3488     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3489 wakaba 1.49 }, {
3490     %HTMLAttrStatus,
3491     %HTMLM12NCommonAttrStatus,
3492     abbr => FEATURE_M12N10_REC,
3493     align => FEATURE_M12N10_REC,
3494     axis => FEATURE_M12N10_REC,
3495     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3496     char => FEATURE_M12N10_REC,
3497     charoff => FEATURE_M12N10_REC,
3498 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3499 wakaba 1.49 headers => FEATURE_M12N10_REC,
3500     height => FEATURE_M12N10_REC_DEPRECATED,
3501 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3502 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3503 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3504     scope => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3505 wakaba 1.49 valign => FEATURE_M12N10_REC,
3506     width => FEATURE_M12N10_REC_DEPRECATED,
3507 wakaba 1.1 }),
3508     };
3509    
3510 wakaba 1.52 my $AttrCheckerNotImplemented = sub {
3511     my ($self, $attr) = @_;
3512     $self->{onerror}->(node => $attr, level => 'unsupported',
3513     type => 'attribute');
3514     };
3515    
3516     $Element->{$HTML_NS}->{form} = {
3517 wakaba 1.56 %HTMLProseContentChecker, ## NOTE: Flow* [WF2]
3518     ## TODO: form in form is allowed in XML [WF2]
3519 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3520     check_attrs => $GetHTMLAttrsChecker->({
3521 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3522 wakaba 1.52 'accept-charset' => $AttrCheckerNotImplemented, ## TODO: Charsets
3523     action => $HTMLURIAttrChecker, ## TODO: "User agent behavior for a value other than HTTP URI is undefined" [HTML4]
3524 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3525     enctype => $HTMLIMTAttrChecker, ## TODO: "multipart/form-data" should be used when type=file is used [HTML4] ## TODO: MUST NOT parameter [WF2]
3526     method => $GetHTMLEnumeratedAttrChecker->({
3527     get => 1, post => 1, put => 1, delete => 1,
3528     }),
3529 wakaba 1.52 ## NOTE: "get" SHOULD be used for idempotent submittion,
3530     ## "post" SHOULD be used otherwise [HTML4]. This cannot be tested.
3531     name => sub { }, # CDATA in HTML4 ## TODO: must be same as |id| (informative!) [XHTML10]
3532 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
3533     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3534 wakaba 1.52 target => $HTMLTargetAttrChecker,
3535     ## TODO: Warn for combination whose behavior is not defined.
3536     }, {
3537     %HTMLAttrStatus,
3538     %HTMLM12NCommonAttrStatus,
3539 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3540 wakaba 1.52 'accept-charset' => FEATURE_M12N10_REC,
3541 wakaba 1.56 action => FEATURE_WF2 | FEATURE_M12N10_REC,
3542     data => FEATURE_WF2,
3543     enctype => FEATURE_WF2 | FEATURE_M12N10_REC,
3544 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3545 wakaba 1.56 method => FEATURE_WF2 | FEATURE_M12N10_REC,
3546 wakaba 1.52 name => FEATURE_M12N10_REC_DEPRECATED,
3547 wakaba 1.56 onreceived => FEATURE_WF2,
3548 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3549     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3550 wakaba 1.56 replace => FEATURE_WF2,
3551 wakaba 1.52 target => FEATURE_M12N10_REC,
3552     }),
3553     ## TODO: Tests
3554     ## TODO: Tests for <nest/> in <form>
3555     };
3556    
3557     $Element->{$HTML_NS}->{fieldset} = {
3558     %HTMLProseContentChecker, ## NOTE: legend, %Flow; ## TODO: legend
3559     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3560 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3561     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3562     ## TODO: form [WF2]
3563     }, {
3564 wakaba 1.52 %HTMLAttrStatus,
3565     %HTMLM12NCommonAttrStatus,
3566 wakaba 1.56 disabled => FEATURE_WF2,
3567     form => FEATURE_WF2,
3568 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3569     }),
3570     ## TODO: Tests
3571     ## TODO: Tests for <nest/> in <fieldset>
3572     };
3573    
3574     $Element->{$HTML_NS}->{input} = {
3575 wakaba 1.56 %HTMLEmptyChecker, ## MUST [WF2]
3576 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3577     check_attrs => $GetHTMLAttrsChecker->({
3578 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3579 wakaba 1.52 accesskey => $AttrCheckerNotImplemented, ## TODO: Character
3580     ## TODO: "Note. Authors should consider the input method of the expected reader when specifying an accesskey." [HTML4]
3581     ## "We recommend that authors include the access key in label text or wherever the access key is to apply." [HTML4]
3582 wakaba 1.56 action => $HTMLURIAttrChecker,
3583 wakaba 1.52 align => $GetHTMLEnumeratedAttrChecker->({
3584     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
3585     }),
3586     alt => sub {}, ## NOTE: Text [M12N] ## TODO: |alt| should be provided for |type=image| [HTML4]
3587     ## NOTE: HTML4 has a "should" for accessibility, which cannot be tested
3588     ## here.
3589 wakaba 1.56 autocomplete => $GetHTMLEnumeratedAttrChecker->({on => 1, off => 1}),
3590     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3591 wakaba 1.52 checked => $GetHTMLBooleanAttrChecker->('checked'),
3592     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3593 wakaba 1.56 enctype => $HTMLIMTAttrChecker,
3594     ## TODO: form [WF2]
3595     ## TODO: inputmode [WF2]
3596 wakaba 1.52 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
3597 wakaba 1.56 ## TODO: list [WF2]
3598     ## TODO: max [WF2]
3599 wakaba 1.52 maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3600 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
3601     get => 1, post => 1, put => 1, delete => 1,
3602     }),
3603     ## TODO: min [WF2]
3604 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3605     readonly => $GetHTMLBooleanAttrChecker->('readonly'),
3606 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3607     required => $GetHTMLBooleanAttrChecker->('required'),
3608 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3609     src => $HTMLURIAttrChecker,
3610 wakaba 1.56 ## TODO: step [WF2]
3611     target => $HTMLTargetAttrChecker,
3612     ## TODO: template
3613 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
3614     text => 1, password => 1, checkbox => 1, radio => 1, submit => 1,
3615     reset => 1, file => 1, hidden => 1, image => 1, button => 1,
3616 wakaba 1.56 ## [WF2]
3617     datatime => 1, 'datetime-local' => 1, date => 1, month => 1, week => 1,
3618     time => 1, number => 1, range => 1, email => 1, url => 1,
3619     add => 1, remove => 1, 'move-up' => 1, 'move-down' => 1,
3620 wakaba 1.52 }),
3621     usemap => $HTMLUsemapAttrChecker,
3622 wakaba 1.56 value => sub {}, ## NOTE: CDATA [M12N] ## TODO: "optional except when the type attribute has the value "radio" or "checkbox"" [HTML4] ## TODO: constraints [WF2]
3623     ## TODO: "authors should ensure that in each set of radio buttons that one is initially "on"." [HTML4] [WF2]
3624 wakaba 1.52 }, {
3625     %HTMLAttrStatus,
3626     %HTMLM12NCommonAttrStatus,
3627 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3628 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
3629 wakaba 1.56 action => FEATURE_WF2,
3630 wakaba 1.52 align => FEATURE_M12N10_REC_DEPRECATED,
3631     alt => FEATURE_M12N10_REC,
3632 wakaba 1.56 autocomplete => FEATURE_WF2,
3633     autofocus => FEATURE_WF2,
3634 wakaba 1.52 checked => FEATURE_M12N10_REC,
3635     datafld => FEATURE_HTML4_REC_RESERVED,
3636     dataformatas => FEATURE_HTML4_REC_RESERVED,
3637     datasrc => FEATURE_HTML4_REC_RESERVED,
3638 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3639     form => FEATURE_WF2,
3640     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
3641 wakaba 1.52 ismap => FEATURE_M12N10_REC,
3642     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3643 wakaba 1.56 list => FEATURE_WF2,
3644     max => FEATURE_WF2,
3645     maxlength => FEATURE_WF2 | FEATURE_M12N10_REC,
3646     method => FEATURE_WF2,
3647     min => FEATURE_WF2,
3648 wakaba 1.52 name => FEATURE_M12N10_REC,
3649     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3650     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3651     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3652     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3653 wakaba 1.56 readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
3654     required => FEATURE_WF2,
3655     size => FEATURE_WF2_DEPRECATED | FEATURE_M12N10_REC,
3656 wakaba 1.52 src => FEATURE_M12N10_REC,
3657 wakaba 1.56 step => FEATURE_WF2,
3658 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3659 wakaba 1.56 template => FEATURE_WF2,
3660 wakaba 1.52 type => FEATURE_M12N10_REC,
3661     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
3662     value => FEATURE_M12N10_REC,
3663     }),
3664     ## TODO: Tests
3665     ## TODO: Tests for <nest/> in <input>
3666     };
3667    
3668 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
3669    
3670 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
3671     %HTMLProseContentChecker, ## NOTE: %Flow; - something [XHTML10]
3672     ## TODO: -A|%formctrl;|form|fieldset [HTML4]
3673     ## TODO: image map (img) in |button| is "illegal" [HTML4].
3674     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3675     check_attrs => $GetHTMLAttrsChecker->({
3676     accesskey => $AttrCheckerNotImplemented, ## TODO: Character
3677 wakaba 1.56 action => $HTMLURIAttrChecker,
3678     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3679 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3680 wakaba 1.56 ## TODO: form [WF2]
3681     method => $GetHTMLEnumeratedAttrChecker->({
3682     get => 1, post => 1, put => 1, delete => 1,
3683     }),
3684 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3685 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
3686     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3687     target => $HTMLTargetAttrChecker,
3688     ## TODO: template [WF2]
3689 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
3690     button => 1, submit => 1, reset => 1,
3691     }),
3692     value => sub {}, ## NOTE: CDATA [M12N]
3693     }, {
3694     %HTMLAttrStatus,
3695     %HTMLM12NCommonAttrStatus,
3696     accesskey => FEATURE_M12N10_REC,
3697 wakaba 1.56 action => FEATURE_WF2,
3698     autofocus => FEATURE_WF2,
3699 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
3700     dataformatas => FEATURE_HTML4_REC_RESERVED,
3701     datasrc => FEATURE_HTML4_REC_RESERVED,
3702 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3703     enctype => FEATURE_WF2,
3704     form => FEATURE_WF2,
3705 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3706 wakaba 1.56 method => FEATURE_WF2,
3707 wakaba 1.52 name => FEATURE_M12N10_REC,
3708     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3709     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3710 wakaba 1.56 oninvalid => FEATURE_WF2,
3711     replace => FEATURE_WF2,
3712 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3713 wakaba 1.56 target => FEATURE_WF2,
3714     template => FEATURE_WF2,
3715 wakaba 1.52 type => FEATURE_M12N10_REC,
3716     value => FEATURE_M12N10_REC,
3717     }),
3718     ## TODO: Tests
3719     ## TODO: Tests for <nest/> in <button>
3720     };
3721    
3722     $Element->{$HTML_NS}->{label} = {
3723     %HTMLPhrasingContentChecker, ## NOTE: %Inline - label [XHTML10] ## TODO: -label
3724 wakaba 1.56 ## TODO: At most one form control [WF2]
3725 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3726     check_attrs => $GetHTMLAttrsChecker->({
3727     accesskey => $AttrCheckerNotImplemented, ## TODO: Charcter
3728     for => $AttrCheckerNotImplemented, ## TODO: IDREF ## TODO: Must be |id| of control [HTML4] ## TODO: Or, "may only contain one control element"
3729     }, {
3730     %HTMLAttrStatus,
3731     %HTMLM12NCommonAttrStatus,
3732 wakaba 1.56 accesskey => FEATURE_WF2 | FEATURE_M12N10_REC,
3733 wakaba 1.52 for => FEATURE_M12N10_REC,
3734     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3735     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3736     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3737     }),
3738     ## TODO: Tests
3739     ## TODO: Tests for <nest/> in <label>
3740     };
3741    
3742     $Element->{$HTML_NS}->{select} = {
3743 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (optgroup|option)* [HTML4] + [WF2] ## TODO: SHOULD avoid empty and visible [WF2]
3744 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
3745     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
3746     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3747 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
3748 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
3749 wakaba 1.56 ## TODO: accesskey [WF2]
3750     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3751 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3752 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3753     ## TODO: form [WF2]
3754 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3755     name => sub {}, ## NOTE: CDATA [M12N]
3756 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
3757     ## TODO: pattern [WF2] ## TODO: |title| semantics
3758 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3759     }, {
3760     %HTMLAttrStatus,
3761     %HTMLM12NCommonAttrStatus,
3762 wakaba 1.56 accesskey => FEATURE_WF2,
3763     autofocus => FEATURE_WF2,
3764     data => FEATURE_WF2,
3765 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
3766     dataformatas => FEATURE_HTML4_REC_RESERVED,
3767     datasrc => FEATURE_HTML4_REC_RESERVED,
3768 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3769     form => FEATURE_WF2,
3770 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3771     multiple => FEATURE_M12N10_REC,
3772     name => FEATURE_M12N10_REC,
3773     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3774     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3775     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3776 wakaba 1.56 oninvalid => FEATURE_WF2,
3777     pattern => FEATURE_WF2,
3778 wakaba 1.52 size => FEATURE_M12N10_REC,
3779     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3780     }),
3781     ## TODO: Tests
3782     ## TODO: Tests for <nest/> in <select>
3783     };
3784 wakaba 1.1
3785 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
3786 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (transparent | option)*
3787     ## TODO: |option| child MUST be empty [WF2]
3788 wakaba 1.52 status => FEATURE_WF2,
3789 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3790     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3791     }, {
3792 wakaba 1.52 %HTMLAttrStatus,
3793 wakaba 1.56 data => FEATURE_WF2,
3794 wakaba 1.52 }),
3795     ## TODO: Tests
3796     ## TODO: Tests for <nest/> in <datalist>
3797     };
3798 wakaba 1.49
3799 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
3800 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (option|optgroup)* [HTML4] + [WF2] SHOULD avoid empty and visible [WF2]
3801 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3802     check_attrs => $GetHTMLAttrsChecker->({
3803     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3804     label => sub {}, ## NOTE: Text [M12N] ## TODO: required
3805     }, {
3806     %HTMLAttrStatus,
3807     %HTMLM12NCommonAttrStatus,
3808 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3809 wakaba 1.52 label => FEATURE_M12N10_REC,
3810     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3811     }),
3812     ## TODO: Tests
3813     ## TODO: Tests for <nest/> in <optgroup>
3814     };
3815    
3816     $Element->{$HTML_NS}->{option} = {
3817     %HTMLTextChecker,
3818     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3819     check_attrs => $GetHTMLAttrsChecker->({
3820     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3821     label => sub {}, ## NOTE: Text [M12N]
3822     selected => $GetHTMLBooleanAttrChecker->('selected'),
3823     value => sub {}, ## NOTE: CDATA [M12N]
3824     }, {
3825     %HTMLAttrStatus,
3826     %HTMLM12NCommonAttrStatus,
3827 wakaba 1.56 disabled => FEATURE_WF2, FEATURE_M12N10_REC,
3828 wakaba 1.52 label => FEATURE_M12N10_REC,
3829     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3830     selected => FEATURE_M12N10_REC,
3831     value => FEATURE_M12N10_REC,
3832     }),
3833     ## TODO: Tests
3834     ## TODO: Tests for <nest/> in <option>
3835     };
3836 wakaba 1.49
3837 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
3838     %HTMLTextChecker,
3839     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3840     check_attrs => $GetHTMLAttrsChecker->({
3841 wakaba 1.56 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type
3842 wakaba 1.52 accesskey => $AttrCheckerNotImplemented, ## TODO: Character
3843 wakaba 1.56 autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3844     cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## TODO: SHOULD if wrap=hard [WF2]
3845 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3846 wakaba 1.56 ## TODO: form [WF2]
3847     ## TODO: inputmode [WF2]
3848     maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3849 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3850 wakaba 1.56 ## TODO: pattern [WF2] ## TODO: |title| special semantics
3851 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
3852 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
3853     rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3854     oninvalid => $HTMLEventHandlerAttrChecker,
3855     wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
3856 wakaba 1.52 }, {
3857     %HTMLAttrStatus,
3858     %HTMLM12NCommonAttrStatus,
3859 wakaba 1.56 accept => FEATURE_WF2,
3860 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
3861 wakaba 1.56 autofocus => FEATURE_WF2,
3862 wakaba 1.52 cols => FEATURE_M12N10_REC,
3863     datafld => FEATURE_HTML4_REC_RESERVED,
3864 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
3865     datasrc => FEATURE_HTML4_REC_RESERVED,
3866 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3867     form => FEATURE_WF2,
3868     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
3869 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3870 wakaba 1.56 maxlength => FEATURE_WF2,
3871 wakaba 1.52 name => FEATURE_M12N10_REC,
3872     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3873     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3874     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3875 wakaba 1.56 oninvalid => FEATURE_WF2,
3876 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3877 wakaba 1.56 pattern => FEATURE_WF2,
3878     readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
3879     required => FEATURE_WF2,
3880 wakaba 1.52 rows => FEATURE_M12N10_REC,
3881     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3882 wakaba 1.56 wrap => FEATURE_WF2,
3883 wakaba 1.52 }),
3884     ## TODO: Tests
3885     ## TODO: Tests for <nest/> in <textarea>
3886     };
3887 wakaba 1.49
3888 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
3889 wakaba 1.56 %HTMLPhrasingContentChecker, ## Inline [WF2]
3890 wakaba 1.52 status => FEATURE_WF2,
3891 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3892     ## TODO: for [WF2]
3893     ## TODO: form [WF2]
3894     ## TODO: name [WF2]
3895     ## onformchange[WF2]
3896     ## onforminput[WF2]
3897     }, {
3898 wakaba 1.52 %HTMLAttrStatus,
3899 wakaba 1.56 for => FEATURE_WF2,
3900     form => FEATURE_WF2,
3901     name => FEATURE_WF2,
3902     onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
3903     onformchange => FEATURE_WF2,
3904     onforminput => FEATURE_WF2,
3905 wakaba 1.52 }),
3906     ## TODO: Tests
3907     ## TODO: Tests for <nest/> in <output>
3908 wakaba 1.56 ## NOTE: "The output element should be used when ..." [WF2]
3909 wakaba 1.52 };
3910    
3911     ## TODO: repetition template
3912    
3913     $Element->{$HTML_NS}->{isindex} = {
3914     %HTMLEmptyChecker,
3915 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
3916     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
3917 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
3918     prompt => sub {}, ## NOTE: Text [M12N]
3919     }, {
3920     %HTMLAttrStatus,
3921     class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3922     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3923     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3924     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3925     prompt => FEATURE_M12N10_REC_DEPRECATED,
3926     style => FEATURE_XHTML10_REC,
3927     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3928     }),
3929     ## TODO: Tests
3930     ## TODO: Tests for <nest/> in <isindex>
3931     };
3932 wakaba 1.49
3933 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
3934 wakaba 1.40 %HTMLChecker,
3935 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3936 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3937 wakaba 1.1 src => $HTMLURIAttrChecker,
3938     defer => $GetHTMLBooleanAttrChecker->('defer'),
3939     async => $GetHTMLBooleanAttrChecker->('async'),
3940     type => $HTMLIMTAttrChecker,
3941 wakaba 1.49 }, {
3942     %HTMLAttrStatus,
3943     %HTMLM12NCommonAttrStatus,
3944 wakaba 1.50 async => FEATURE_HTML5_DEFAULT,
3945 wakaba 1.49 charset => FEATURE_M12N10_REC,
3946 wakaba 1.50 defer => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3947 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
3948     for => FEATURE_HTML4_REC_RESERVED,
3949 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3950 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
3951 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3952     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3953 wakaba 1.9 }),
3954 wakaba 1.40 check_start => sub {
3955     my ($self, $item, $element_state) = @_;
3956 wakaba 1.1
3957 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
3958     $element_state->{must_be_empty} = 1;
3959 wakaba 1.1 } else {
3960     ## NOTE: No content model conformance in HTML5 spec.
3961 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
3962     my $language = $item->{node}->get_attribute_ns (undef, 'language');
3963 wakaba 1.1 if ((defined $type and $type eq '') or
3964     (defined $language and $language eq '')) {
3965     $type = 'text/javascript';
3966     } elsif (defined $type) {
3967     #
3968     } elsif (defined $language) {
3969     $type = 'text/' . $language;
3970     } else {
3971     $type = 'text/javascript';
3972     }
3973 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
3974     }
3975     },
3976     check_child_element => sub {
3977     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3978     $child_is_transparent, $element_state) = @_;
3979     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3980     $self->{onerror}->(node => $child_el,
3981     type => 'element not allowed:minus',
3982     level => $self->{must_level});
3983     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3984     #
3985     } else {
3986     if ($element_state->{must_be_empty}) {
3987     $self->{onerror}->(node => $child_el,
3988     type => 'element not allowed');
3989     }
3990     }
3991     },
3992     check_child_text => sub {
3993     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3994     if ($has_significant and
3995     $element_state->{must_be_empty}) {
3996     $self->{onerror}->(node => $child_node,
3997     type => 'character not allowed');
3998     }
3999     },
4000     check_end => sub {
4001     my ($self, $item, $element_state) = @_;
4002     unless ($element_state->{must_be_empty}) {
4003     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
4004     type => 'script:'.$element_state->{script_type});
4005     ## TODO: text/javascript support
4006    
4007     $HTMLChecker{check_end}->(@_);
4008 wakaba 1.1 }
4009     },
4010     };
4011 wakaba 1.25 ## ISSUE: Significant check and text child node
4012 wakaba 1.1
4013     ## NOTE: When script is disabled.
4014     $Element->{$HTML_NS}->{noscript} = {
4015 wakaba 1.40 %HTMLTransparentChecker,
4016 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4017     check_attrs => $GetHTMLAttrsChecker->({}, {
4018     %HTMLAttrStatus,
4019     %HTMLM12NCommonAttrStatus,
4020 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4021 wakaba 1.49 }),
4022 wakaba 1.40 check_start => sub {
4023     my ($self, $item, $element_state) = @_;
4024 wakaba 1.3
4025 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
4026     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
4027 wakaba 1.3 }
4028    
4029 wakaba 1.40 unless ($self->{flag}->{in_head}) {
4030     $self->_add_minus_elements ($element_state,
4031     {$HTML_NS => {noscript => 1}});
4032     }
4033 wakaba 1.3 },
4034 wakaba 1.40 check_child_element => sub {
4035     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4036     $child_is_transparent, $element_state) = @_;
4037     if ($self->{flag}->{in_head}) {
4038     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4039     $self->{onerror}->(node => $child_el,
4040     type => 'element not allowed:minus',
4041     level => $self->{must_level});
4042     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4043     #
4044     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
4045     #
4046     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
4047     if ($child_el->has_attribute_ns (undef, 'scoped')) {
4048     $self->{onerror}->(node => $child_el,
4049     type => 'element not allowed:head noscript',
4050     level => $self->{must_level});
4051     }
4052     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
4053 wakaba 1.47 my $http_equiv_attr
4054     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
4055     if ($http_equiv_attr) {
4056     ## TODO: case
4057     if (lc $http_equiv_attr->value eq 'content-type') {
4058 wakaba 1.40 $self->{onerror}->(node => $child_el,
4059 wakaba 1.34 type => 'element not allowed:head noscript',
4060     level => $self->{must_level});
4061 wakaba 1.47 } else {
4062     #
4063 wakaba 1.3 }
4064 wakaba 1.47 } else {
4065     $self->{onerror}->(node => $child_el,
4066     type => 'element not allowed:head noscript',
4067     level => $self->{must_level});
4068 wakaba 1.3 }
4069 wakaba 1.40 } else {
4070     $self->{onerror}->(node => $child_el,
4071     type => 'element not allowed:head noscript',
4072     level => $self->{must_level});
4073     }
4074     } else {
4075     $HTMLTransparentChecker{check_child_element}->(@_);
4076     }
4077     },
4078     check_child_text => sub {
4079     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4080     if ($self->{flag}->{in_head}) {
4081     if ($has_significant) {
4082     $self->{onerror}->(node => $child_node,
4083     type => 'character not allowed');
4084 wakaba 1.3 }
4085     } else {
4086 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
4087     }
4088     },
4089     check_end => sub {
4090     my ($self, $item, $element_state) = @_;
4091     $self->_remove_minus_elements ($element_state);
4092     if ($self->{flag}->{in_head}) {
4093     $HTMLChecker{check_end}->(@_);
4094     } else {
4095     $HTMLPhrasingContentChecker{check_end}->(@_);
4096 wakaba 1.3 }
4097 wakaba 1.1 },
4098     };
4099 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
4100 wakaba 1.1
4101     $Element->{$HTML_NS}->{'event-source'} = {
4102 wakaba 1.40 %HTMLEmptyChecker,
4103 wakaba 1.48 status => FEATURE_HTML5_LC,
4104 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4105 wakaba 1.1 src => $HTMLURIAttrChecker,
4106 wakaba 1.50 }, {
4107     %HTMLAttrStatus,
4108     src => FEATURE_HTML5_LC,
4109 wakaba 1.1 }),
4110     };
4111    
4112     $Element->{$HTML_NS}->{details} = {
4113 wakaba 1.40 %HTMLProseContentChecker,
4114 wakaba 1.48 status => FEATURE_HTML5_WD,
4115 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4116 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
4117 wakaba 1.50 }, {
4118     %HTMLAttrStatus,
4119     open => FEATURE_HTML5_WD,
4120 wakaba 1.1 }),
4121 wakaba 1.43 ## NOTE: legend, Prose
4122     check_child_element => sub {
4123     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4124     $child_is_transparent, $element_state) = @_;
4125     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4126     $self->{onerror}->(node => $child_el,
4127     type => 'element not allowed:minus',
4128     level => $self->{must_level});
4129     $element_state->{has_non_legend} = 1;
4130     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4131     #
4132     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4133     if ($element_state->{has_non_legend}) {
4134     $self->{onerror}->(node => $child_el,
4135     type => 'element not allowed:details legend',
4136     level => $self->{must_level});
4137     }
4138     $element_state->{has_legend} = 1;
4139     $element_state->{has_non_legend} = 1;
4140     } else {
4141     $HTMLProseContentChecker{check_child_element}->(@_);
4142     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4143     ## ISSUE: |<details><object><legend>xx</legend></object>..</details>|
4144     ## is conforming?
4145     }
4146     },
4147     check_child_text => sub {
4148     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4149     if ($has_significant) {
4150     $element_state->{has_non_legend} = 1;
4151     }
4152     },
4153     check_end => sub {
4154     my ($self, $item, $element_state) = @_;
4155 wakaba 1.1
4156 wakaba 1.43 unless ($element_state->{has_legend}) {
4157     $self->{onerror}->(node => $item->{node},
4158     type => 'element missing:legend',
4159     level => $self->{must_level});
4160     }
4161    
4162     $HTMLProseContentChecker{check_end}->(@_);
4163     ## ISSUE: |<details><legend>aa</legend></details>| error?
4164 wakaba 1.1 },
4165     };
4166    
4167     $Element->{$HTML_NS}->{datagrid} = {
4168 wakaba 1.40 %HTMLProseContentChecker,
4169 wakaba 1.48 status => FEATURE_HTML5_WD,
4170 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4171 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4172     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
4173 wakaba 1.50 }, {
4174     %HTMLAttrStatus,
4175     disabled => FEATURE_HTML5_WD,
4176     multiple => FEATURE_HTML5_WD,
4177 wakaba 1.1 }),
4178 wakaba 1.40 check_start => sub {
4179     my ($self, $item, $element_state) = @_;
4180 wakaba 1.1
4181 wakaba 1.40 $self->_add_minus_elements ($element_state,
4182     {$HTML_NS => {a => 1, datagrid => 1}});
4183     $element_state->{phase} = 'any';
4184     },
4185     ## Prose -(text* table Prose*) | table | select | datalist | Empty
4186     check_child_element => sub {
4187     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4188     $child_is_transparent, $element_state) = @_;
4189     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4190     $self->{onerror}->(node => $child_el,
4191     type => 'element not allowed:minus',
4192     level => $self->{must_level});
4193     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4194     #
4195     } elsif ($element_state->{phase} eq 'prose') {
4196     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4197 wakaba 1.44 if (not $element_state->{has_element} and
4198 wakaba 1.40 $child_nsuri eq $HTML_NS and
4199     $child_ln eq 'table') {
4200     $self->{onerror}->(node => $child_el,
4201     type => 'element not allowed');
4202     } else {
4203 wakaba 1.8 #
4204 wakaba 1.1 }
4205 wakaba 1.40 } else {
4206     $self->{onerror}->(node => $child_el,
4207     type => 'element not allowed');
4208     }
4209 wakaba 1.43 $element_state->{has_element} = 1;
4210 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
4211     if ($child_nsuri eq $HTML_NS and
4212     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
4213     $element_state->{phase} = 'none';
4214     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4215     $element_state->{has_element} = 1;
4216     $element_state->{phase} = 'prose';
4217 wakaba 1.43 ## TODO: transparent?
4218 wakaba 1.40 } else {
4219     $self->{onerror}->(node => $child_el,
4220     type => 'element not allowed');
4221     }
4222     } elsif ($element_state->{phase} eq 'none') {
4223     $self->{onerror}->(node => $child_el,
4224     type => 'element not allowed');
4225     } else {
4226     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
4227     }
4228     },
4229     check_child_text => sub {
4230     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4231     if ($has_significant) {
4232     if ($element_state->{phase} eq 'prose') {
4233     #
4234     } elsif ($element_state->{phase} eq 'any') {
4235     $element_state->{phase} = 'prose';
4236     } else {
4237     $self->{onerror}->(node => $child_node,
4238     type => 'character not allowed');
4239 wakaba 1.1 }
4240     }
4241 wakaba 1.40 },
4242     check_end => sub {
4243     my ($self, $item, $element_state) = @_;
4244     $self->_remove_minus_elements ($element_state);
4245 wakaba 1.1
4246 wakaba 1.40 if ($element_state->{phase} eq 'none') {
4247     $HTMLChecker{check_end}->(@_);
4248     } else {
4249     $HTMLPhrasingContentChecker{check_end}->(@_);
4250     }
4251     },
4252 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
4253     ## are not disallowed (assuming that form control contents are also
4254     ## prose content).
4255 wakaba 1.1 };
4256    
4257     $Element->{$HTML_NS}->{command} = {
4258 wakaba 1.40 %HTMLEmptyChecker,
4259 wakaba 1.48 status => FEATURE_HTML5_WD,
4260 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4261 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
4262     default => $GetHTMLBooleanAttrChecker->('default'),
4263     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4264     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
4265     icon => $HTMLURIAttrChecker,
4266     label => sub { }, ## NOTE: No conformance creteria
4267     radiogroup => sub { }, ## NOTE: No conformance creteria
4268     type => sub {
4269     my ($self, $attr) = @_;
4270     my $value = $attr->value;
4271     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
4272     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
4273     }
4274     },
4275 wakaba 1.50 }, {
4276     %HTMLAttrStatus,
4277     checked => FEATURE_HTML5_WD,
4278     default => FEATURE_HTML5_WD,
4279     disabled => FEATURE_HTML5_WD,
4280     hidden => FEATURE_HTML5_WD,
4281     icon => FEATURE_HTML5_WD,
4282     label => FEATURE_HTML5_WD,
4283     radiogroup => FEATURE_HTML5_WD,
4284     type => FEATURE_HTML5_WD,
4285 wakaba 1.1 }),
4286     };
4287    
4288     $Element->{$HTML_NS}->{menu} = {
4289 wakaba 1.40 %HTMLPhrasingContentChecker,
4290 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
4291     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
4292     ## NOTE: We don't want any |menu| element warned as deprecated.
4293 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4294 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
4295     id => sub {
4296     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
4297     my ($self, $attr) = @_;
4298     my $value = $attr->value;
4299     if (length $value > 0) {
4300     if ($self->{id}->{$value}) {
4301     $self->{onerror}->(node => $attr, type => 'duplicate ID');
4302     push @{$self->{id}->{$value}}, $attr;
4303     } else {
4304     $self->{id}->{$value} = [$attr];
4305     }
4306     } else {
4307     ## NOTE: MUST contain at least one character
4308     $self->{onerror}->(node => $attr, type => 'empty attribute value');
4309     }
4310     if ($value =~ /[\x09-\x0D\x20]/) {
4311     $self->{onerror}->(node => $attr, type => 'space in ID');
4312     }
4313     $self->{menu}->{$value} ||= $attr;
4314     ## ISSUE: <menu id=""><p contextmenu=""> match?
4315     },
4316     label => sub { }, ## NOTE: No conformance creteria
4317     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
4318 wakaba 1.49 }, {
4319     %HTMLAttrStatus,
4320     %HTMLM12NCommonAttrStatus,
4321 wakaba 1.50 autosubmit => FEATURE_HTML5_WD,
4322 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
4323 wakaba 1.50 label => FEATURE_HTML5_WD,
4324     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4325     type => FEATURE_HTML5_WD,
4326 wakaba 1.1 }),
4327 wakaba 1.40 check_start => sub {
4328     my ($self, $item, $element_state) = @_;
4329     $element_state->{phase} = 'li or phrasing';
4330     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
4331     $self->{flag}->{in_menu} = 1;
4332     },
4333     check_child_element => sub {
4334     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4335     $child_is_transparent, $element_state) = @_;
4336     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4337     $self->{onerror}->(node => $child_el,
4338     type => 'element not allowed:minus',
4339     level => $self->{must_level});
4340     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4341     #
4342     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
4343     if ($element_state->{phase} eq 'li') {
4344     #
4345     } elsif ($element_state->{phase} eq 'li or phrasing') {
4346     $element_state->{phase} = 'li';
4347     } else {
4348     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4349     }
4350     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4351     if ($element_state->{phase} eq 'phrasing') {
4352     #
4353     } elsif ($element_state->{phase} eq 'li or phrasing') {
4354     $element_state->{phase} = 'phrasing';
4355     } else {
4356     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4357     }
4358     } else {
4359     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4360     }
4361     },
4362     check_child_text => sub {
4363     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4364     if ($has_significant) {
4365     if ($element_state->{phase} eq 'phrasing') {
4366     #
4367     } elsif ($element_state->{phase} eq 'li or phrasing') {
4368     $element_state->{phase} = 'phrasing';
4369     } else {
4370     $self->{onerror}->(node => $child_node,
4371     type => 'character not allowed');
4372 wakaba 1.1 }
4373     }
4374 wakaba 1.40 },
4375     check_end => sub {
4376     my ($self, $item, $element_state) = @_;
4377     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
4378    
4379     if ($element_state->{phase} eq 'li') {
4380     $HTMLChecker{check_end}->(@_);
4381     } else { # 'phrasing' or 'li or phrasing'
4382     $HTMLPhrasingContentChecker{check_end}->(@_);
4383 wakaba 1.1 }
4384     },
4385 wakaba 1.8 };
4386    
4387     $Element->{$HTML_NS}->{datatemplate} = {
4388 wakaba 1.40 %HTMLChecker,
4389 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4390 wakaba 1.40 check_child_element => sub {
4391     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4392     $child_is_transparent, $element_state) = @_;
4393     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4394     $self->{onerror}->(node => $child_el,
4395     type => 'element not allowed:minus',
4396     level => $self->{must_level});
4397     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4398     #
4399     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
4400     #
4401     } else {
4402     $self->{onerror}->(node => $child_el,
4403     type => 'element not allowed:datatemplate');
4404     }
4405     },
4406     check_child_text => sub {
4407     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4408     if ($has_significant) {
4409     $self->{onerror}->(node => $child_node, type => 'character not allowed');
4410 wakaba 1.8 }
4411     },
4412     is_xml_root => 1,
4413     };
4414    
4415     $Element->{$HTML_NS}->{rule} = {
4416 wakaba 1.40 %HTMLChecker,
4417 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4418 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4419 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
4420 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
4421 wakaba 1.50 }, {
4422     %HTMLAttrStatus,
4423     condition => FEATURE_HTML5_AT_RISK,
4424     mode => FEATURE_HTML5_AT_RISK,
4425 wakaba 1.8 }),
4426 wakaba 1.40 check_start => sub {
4427     my ($self, $item, $element_state) = @_;
4428     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
4429     },
4430     check_child_element => sub { },
4431     check_child_text => sub { },
4432     check_end => sub {
4433     my ($self, $item, $element_state) = @_;
4434     $self->_remove_plus_elements ($element_state);
4435     $HTMLChecker{check_end}->(@_);
4436 wakaba 1.8 },
4437     ## NOTE: "MAY be anything that, when the parent |datatemplate|
4438     ## is applied to some conforming data, results in a conforming DOM tree.":
4439     ## We don't check against this.
4440     };
4441    
4442     $Element->{$HTML_NS}->{nest} = {
4443 wakaba 1.40 %HTMLEmptyChecker,
4444 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4445 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4446 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
4447     mode => sub {
4448     my ($self, $attr) = @_;
4449     my $value = $attr->value;
4450     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
4451     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
4452     }
4453     },
4454 wakaba 1.50 }, {
4455     %HTMLAttrStatus,
4456     filter => FEATURE_HTML5_AT_RISK,
4457     mode => FEATURE_HTML5_AT_RISK,
4458 wakaba 1.8 }),
4459 wakaba 1.1 };
4460    
4461     $Element->{$HTML_NS}->{legend} = {
4462 wakaba 1.40 %HTMLPhrasingContentChecker,
4463 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4464 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4465     # accesskey => $AttrCheckerNotImplemented, ## TODO: Character ## TODO: This attribute is not part of HTML5
4466     # align => $GetHTMLEnumeratedAttrChecker->({
4467     # top => 1, bottom => 1, left => 1, right => 1,
4468     # }),
4469     }, {
4470 wakaba 1.49 %HTMLAttrStatus,
4471     %HTMLM12NCommonAttrStatus,
4472     accesskey => FEATURE_M12N10_REC,
4473     align => FEATURE_M12N10_REC_DEPRECATED,
4474 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4475 wakaba 1.49 }),
4476 wakaba 1.1 };
4477    
4478     $Element->{$HTML_NS}->{div} = {
4479 wakaba 1.40 %HTMLProseContentChecker,
4480 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4481     check_attrs => $GetHTMLAttrsChecker->({}, {
4482     %HTMLAttrStatus,
4483     %HTMLM12NCommonAttrStatus,
4484     align => FEATURE_M12N10_REC_DEPRECATED,
4485     datafld => FEATURE_HTML4_REC_RESERVED,
4486     dataformatas => FEATURE_HTML4_REC_RESERVED,
4487     datasrc => FEATURE_HTML4_REC_RESERVED,
4488 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4489 wakaba 1.49 }),
4490 wakaba 1.1 };
4491    
4492     $Element->{$HTML_NS}->{font} = {
4493 wakaba 1.40 %HTMLTransparentChecker,
4494 wakaba 1.50 status => FEATURE_HTML5_AT_RISK | FEATURE_M12N10_REC_DEPRECATED,
4495 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({ ## TODO
4496     }, {
4497     %HTMLAttrStatus,
4498 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4499 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
4500 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4501 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
4502 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4503     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4504 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
4505 wakaba 1.50 style => FEATURE_HTML5_AT_RISK | FEATURE_XHTML10_REC,
4506     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4507 wakaba 1.49 }),
4508 wakaba 1.1 };
4509 wakaba 1.49
4510     ## TODO: frameset FEATURE_M12N10_REC
4511     ## class title id cols rows onload onunload style(x10)
4512     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
4513     ## noframes Common, lang(xhtml10)
4514    
4515     ## TODO: deprecated:
4516     ## basefont color face id size
4517     ## center Common lang(xhtml10)
4518     ## dir Common compat lang(xhtml10)
4519    
4520     ## TODO: CR: ruby rb rt rp rbc rtc @rbspan
4521 wakaba 1.56
4522     =pod
4523    
4524     WF2: Documents MUST comply to [CHARMOD].
4525     WF2: Vencor extensions MUST NOT be used.
4526    
4527     =cut
4528 wakaba 1.1
4529     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
4530    
4531     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24