/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.59 - (hide annotations) (download)
Sun Mar 2 03:39:41 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.58: +25 -4 lines
++ whatpm/t/ChangeLog	2 Mar 2008 03:39:34 -0000
2008-03-02  Wakaba  <wakaba@suika.fam.cx>

	* content-model-1.dat: Test data for |<area>| are added.

	* content-model-2.dat: Test data for |<img ismap>| are added.

++ whatpm/Whatpm/ChangeLog	2 Mar 2008 03:36:45 -0000
2008-03-02  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src: s/local_name/manakai_local_name/g.

++ whatpm/Whatpm/HTML/ChangeLog	2 Mar 2008 03:28:10 -0000
2008-03-02  Wakaba  <wakaba@suika.fam.cx>

	* Serializer.pm (get_inner_html): Typo fixed.

++ whatpm/Whatpm/ContentChecker/ChangeLog	2 Mar 2008 03:37:30 -0000
2008-03-02  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: |in_a_href| flag is not reset after the </a>.
	Raise an error if |area| is not a descendant of |map|.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.58 sub FEATURE_HTML5_ROLE () {
8     Whatpm::ContentChecker::FEATURE_STATUS_WD
9     ## TODO: svg:*/@role
10     }
11    
12 wakaba 1.54 sub FEATURE_HTML5_LC () {
13     Whatpm::ContentChecker::FEATURE_STATUS_LC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16     sub FEATURE_HTML5_AT_RISK () {
17     Whatpm::ContentChecker::FEATURE_STATUS_WD |
18     Whatpm::ContentChecker::FEATURE_ALLOWED
19     }
20     sub FEATURE_HTML5_WD () {
21     Whatpm::ContentChecker::FEATURE_STATUS_WD |
22     Whatpm::ContentChecker::FEATURE_ALLOWED
23     }
24     sub FEATURE_HTML5_FD () {
25     Whatpm::ContentChecker::FEATURE_STATUS_WD |
26     Whatpm::ContentChecker::FEATURE_ALLOWED
27     }
28     sub FEATURE_HTML5_DEFAULT () {
29     Whatpm::ContentChecker::FEATURE_STATUS_WD |
30     Whatpm::ContentChecker::FEATURE_ALLOWED
31 wakaba 1.49 }
32 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
33     ## NOTE: Was part of HTML5, but was dropped.
34 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
35     }
36 wakaba 1.54 sub FEATURE_WF2 () {
37     Whatpm::ContentChecker::FEATURE_STATUS_LC |
38     Whatpm::ContentChecker::FEATURE_ALLOWED
39     }
40 wakaba 1.56 sub FEATURE_WF2_DEPRECATED () {
41     Whatpm::ContentChecker::FEATURE_STATUS_LC
42     ## NOTE: MUST NOT be used.
43     }
44 wakaba 1.49
45 wakaba 1.58 ## TODO: RDFa LC
46    
47     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
48     ## attribute can be used- the only requirements for that matter is:
49     ## "the attribute MUST be referenced using its namespace-qualified form" (and
50     ## this is a host language conformance!).
51    
52 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
53     ## NOTE: Only additions to M12N10_REC are marked.
54     Whatpm::ContentChecker::FEATURE_STATUS_CR
55     }
56     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
57     Whatpm::ContentChecker::FEATURE_STATUS_CR |
58     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
59     }
60    
61 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
62     ## It contains a number of problems. (However, again, it's a REC!)
63 wakaba 1.54 sub FEATURE_M12N10_REC () {
64     ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process suck!
65     Whatpm::ContentChecker::FEATURE_STATUS_REC
66     }
67     sub FEATURE_M12N10_REC_DEPRECATED () {
68     Whatpm::ContentChecker::FEATURE_STATUS_REC |
69     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
70     }
71 wakaba 1.58 ## NOTE: XHTML M12N 1.1 is a LC at the time of writing and no
72     ## addition from 1.0.
73 wakaba 1.49
74     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
75     ## (second edition). Only missing attributes from M12N10 abstract
76     ## definition are added.
77 wakaba 1.54 sub FEATURE_XHTML10_REC () {
78     Whatpm::ContentChecker::FEATURE_STATUS_CR
79     }
80    
81 wakaba 1.58 ## TODO: ISO-HTML
82    
83 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
84     ## 4.01). Only missing attributes from XHTML10 are added.
85 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
86     Whatpm::ContentChecker::FEATURE_STATUS_WD
87     }
88    
89     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
90     ## rather than presentational attributes (deprecated or not deprecated).
91 wakaba 1.48
92 wakaba 1.58 ## TODO: HTML 3.2 REC
93     ## TODO: HTML 2.x RFC
94     ## TODO: HTML 2.0 RFC
95     ## TODO: Other HTML RFCs
96    
97 wakaba 1.29 ## December 2007 HTML5 Classification
98    
99     my $HTMLMetadataContent = {
100     $HTML_NS => {
101     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
102     'event-source' => 1, command => 1, datatemplate => 1,
103     ## NOTE: A |meta| with no |name| element is not allowed as
104     ## a metadata content other than |head| element.
105     meta => 1,
106 wakaba 1.56 ## NOTE: Only when empty [WF2]
107     form => 1,
108 wakaba 1.29 },
109     ## NOTE: RDF is mentioned in the HTML5 spec.
110     ## TODO: Other RDF elements?
111     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
112     };
113    
114     my $HTMLProseContent = {
115     $HTML_NS => {
116     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
117     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
118     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
119     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
120     details => 1, ## ISSUE: "Prose element" in spec.
121     datagrid => 1, ## ISSUE: "Prose element" in spec.
122     datatemplate => 1,
123     div => 1, ## ISSUE: No category in spec.
124     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
125     ## Additionally, it must be before any other element or
126     ## non-inter-element-whitespace text node.
127     style => 1,
128    
129 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
130 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
131     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
132     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
133     command => 1, font => 1,
134     a => 1,
135     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
136     ## NOTE: |area| is allowed only as a descendant of |map|.
137     area => 1,
138    
139     ins => 1, del => 1,
140    
141     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
142     menu => 1,
143    
144     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
145     canvas => 1,
146     },
147    
148     ## NOTE: Embedded
149     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
150     q<http://www.w3.org/2000/svg> => {svg => 1},
151     };
152    
153 wakaba 1.58 my $HTMLSectioningContent = {
154 wakaba 1.57 $HTML_NS => {
155     section => 1, nav => 1, article => 1, aside => 1,
156     ## NOTE: |body| is only allowed in |html| element.
157     body => 1,
158     },
159     };
160    
161 wakaba 1.58 my $HTMLSectioningRoot = {
162 wakaba 1.29 $HTML_NS => {
163 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
164 wakaba 1.29 },
165     };
166    
167     my $HTMLHeadingContent = {
168     $HTML_NS => {
169     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
170     },
171     };
172    
173     my $HTMLPhrasingContent = {
174     ## NOTE: All phrasing content is also prose content.
175     $HTML_NS => {
176 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
177 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
178     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
179     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
180     command => 1, font => 1,
181     a => 1,
182     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
183     ## NOTE: |area| is allowed only as a descendant of |map|.
184     area => 1,
185    
186     ## NOTE: Transparent.
187     ins => 1, del => 1,
188    
189     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
190     menu => 1,
191    
192     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
193     canvas => 1,
194 wakaba 1.56
195     ## NOTE: WF2
196     input => 1, ## NOTE: type=hidden
197     datalist => 1, ## NOTE: block | where |select| allowed
198 wakaba 1.29 },
199    
200     ## NOTE: Embedded
201     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
202     q<http://www.w3.org/2000/svg> => {svg => 1},
203    
204     ## NOTE: And non-inter-element-whitespace text nodes.
205     };
206    
207 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
208 wakaba 1.29
209     my $HTMLInteractiveContent = {
210     $HTML_NS => {
211     a => 1,
212 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
213 wakaba 1.29 },
214     };
215    
216 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
217     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
218    
219     ## -- Common attribute syntacx checkers
220    
221 wakaba 1.1 our $AttrChecker;
222    
223     my $GetHTMLEnumeratedAttrChecker = sub {
224     my $states = shift; # {value => conforming ? 1 : -1}
225     return sub {
226     my ($self, $attr) = @_;
227     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
228     if ($states->{$value} > 0) {
229     #
230     } elsif ($states->{$value}) {
231     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
232     } else {
233     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
234     }
235     };
236     }; # $GetHTMLEnumeratedAttrChecker
237    
238     my $GetHTMLBooleanAttrChecker = sub {
239     my $local_name = shift;
240     return sub {
241     my ($self, $attr) = @_;
242     my $value = $attr->value;
243     unless ($value eq $local_name or $value eq '') {
244     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
245     }
246     };
247     }; # $GetHTMLBooleanAttrChecker
248    
249 wakaba 1.8 ## Unordered set of space-separated tokens
250 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
251 wakaba 1.8 my ($self, $attr) = @_;
252     my %word;
253     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
254     unless ($word{$word}) {
255     $word{$word} = 1;
256     } else {
257     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
258     }
259     }
260 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
261 wakaba 1.8
262 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
263     ## whose allowed values are defined by the section on link types)
264     my $HTMLLinkTypesAttrChecker = sub {
265 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
266 wakaba 1.1 my %word;
267     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
268     unless ($word{$word}) {
269     $word{$word} = 1;
270 wakaba 1.18 } elsif ($word eq 'up') {
271     #
272 wakaba 1.1 } else {
273     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
274     }
275     }
276     ## NOTE: Case sensitive match (since HTML5 spec does not say link
277     ## types are case-insensitive and it says "The value should not
278     ## be confusingly similar to any other defined value (e.g.
279     ## differing only in case).").
280     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
281     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
282     ## values to be used conformingly.
283     require Whatpm::_LinkTypeList;
284     our $LinkType;
285     for my $word (keys %word) {
286     my $def = $LinkType->{$word};
287     if (defined $def) {
288     if ($def->{status} eq 'accepted') {
289     if (defined $def->{effect}->[$a_or_area]) {
290     #
291     } else {
292     $self->{onerror}->(node => $attr,
293     type => 'link type:bad context:'.$word);
294     }
295     } elsif ($def->{status} eq 'proposal') {
296     $self->{onerror}->(node => $attr, level => 's',
297     type => 'link type:proposed:'.$word);
298 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
299     #
300     } else {
301     $self->{onerror}->(node => $attr,
302     type => 'link type:bad context:'.$word);
303     }
304 wakaba 1.1 } else { # rejected or synonym
305     $self->{onerror}->(node => $attr,
306     type => 'link type:non-conforming:'.$word);
307     }
308 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
309     if ($word eq 'alternate') {
310     #
311     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
312     $todo->{has_hyperlink_link_type} = 1;
313     }
314     }
315 wakaba 1.1 if ($def->{unique}) {
316     unless ($self->{has_link_type}->{$word}) {
317     $self->{has_link_type}->{$word} = 1;
318     } else {
319     $self->{onerror}->(node => $attr,
320     type => 'link type:duplicate:'.$word);
321     }
322     }
323     } else {
324     $self->{onerror}->(node => $attr, level => 'unsupported',
325     type => 'link type:'.$word);
326     }
327     }
328 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
329     if $word{alternate} and not $word{stylesheet};
330 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
331     ## says that using both X-Pingback: header field and HTML
332     ## <link rel=pingback> is deprecated and if both appears they
333     ## SHOULD contain exactly the same value.
334     ## ISSUE: Pingback 1.0 specification defines the exact representation
335     ## of its link element, which cannot be tested by the current arch.
336     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
337     ## include any string that matches to the pattern for the rel=pingback link,
338     ## which again inpossible to test.
339     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
340 wakaba 1.12
341     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
342 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
343     ## then they SHOULD be described in different paragraphs.".
344 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
345 wakaba 1.20
346     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
347 wakaba 1.1
348     ## URI (or IRI)
349     my $HTMLURIAttrChecker = sub {
350     my ($self, $attr) = @_;
351     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
352     my $value = $attr->value;
353     Whatpm::URIChecker->check_iri_reference ($value, sub {
354     my %opt = @_;
355     $self->{onerror}->(node => $attr, level => $opt{level},
356     type => 'URI::'.$opt{type}.
357     (defined $opt{position} ? ':'.$opt{position} : ''));
358     });
359 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
360 wakaba 1.1 }; # $HTMLURIAttrChecker
361    
362     ## A space separated list of one or more URIs (or IRIs)
363     my $HTMLSpaceURIsAttrChecker = sub {
364     my ($self, $attr) = @_;
365     my $i = 0;
366     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
367     Whatpm::URIChecker->check_iri_reference ($value, sub {
368     my %opt = @_;
369     $self->{onerror}->(node => $attr, level => $opt{level},
370 wakaba 1.2 type => 'URIs:'.':'.
371     $opt{type}.':'.$i.
372 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
373     });
374     $i++;
375     }
376     ## ISSUE: Relative references?
377     ## ISSUE: Leading or trailing white spaces are conformant?
378     ## ISSUE: A sequence of white space characters are conformant?
379     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
380     ## NOTE: Duplication seems not an error.
381 wakaba 1.4 $self->{has_uri_attr} = 1;
382 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
383    
384     my $HTMLDatetimeAttrChecker = sub {
385     my ($self, $attr) = @_;
386     my $value = $attr->value;
387     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
388     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
389     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
390     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
391     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
392     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
393     if $d < 1 or
394     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
395     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
396     if $M == 2 and $d == 29 and
397     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
398     } else {
399     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
400     }
401     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
402     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
403     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
404     if defined $s and $s > 59;
405     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
406     if $zh > 23;
407     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
408     if $zm > 59;
409     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
410     } else {
411     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
412     }
413     }; # $HTMLDatetimeAttrChecker
414    
415     my $HTMLIntegerAttrChecker = sub {
416     my ($self, $attr) = @_;
417     my $value = $attr->value;
418     unless ($value =~ /\A-?[0-9]+\z/) {
419     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
420     }
421     }; # $HTMLIntegerAttrChecker
422    
423     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
424     my $range_check = shift;
425     return sub {
426     my ($self, $attr) = @_;
427     my $value = $attr->value;
428     if ($value =~ /\A[0-9]+\z/) {
429     unless ($range_check->($value + 0)) {
430     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
431     }
432     } else {
433     $self->{onerror}->(node => $attr,
434     type => 'nninteger:syntax error');
435     }
436     };
437     }; # $GetHTMLNonNegativeIntegerAttrChecker
438    
439     my $GetHTMLFloatingPointNumberAttrChecker = sub {
440     my $range_check = shift;
441     return sub {
442     my ($self, $attr) = @_;
443     my $value = $attr->value;
444     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
445     unless ($range_check->($value + 0)) {
446     $self->{onerror}->(node => $attr, type => 'float:out of range');
447     }
448     } else {
449     $self->{onerror}->(node => $attr,
450     type => 'float:syntax error');
451     }
452     };
453     }; # $GetHTMLFloatingPointNumberAttrChecker
454    
455     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
456     ## ISSUE: RFC 2046 does not define syntax of media types.
457     ## ISSUE: The definition of "a valid MIME type" is unknown.
458     ## Syntactical correctness?
459     my $HTMLIMTAttrChecker = sub {
460     my ($self, $attr) = @_;
461     my $value = $attr->value;
462     ## ISSUE: RFC 2045 Content-Type header field allows insertion
463     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
464     ## ISSUE: RFC 2231 extension? Maybe no.
465     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
466     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
467     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
468     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
469     my @type = ($1, $2);
470     my $param = $3;
471     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
472     if (defined $2) {
473     push @type, $1 => $2;
474     } else {
475     my $n = $1;
476     my $v = $2;
477     $v =~ s/\\(.)/$1/gs;
478     push @type, $n => $v;
479     }
480     }
481     require Whatpm::IMTChecker;
482     Whatpm::IMTChecker->check_imt (sub {
483     my %opt = @_;
484     $self->{onerror}->(node => $attr, level => $opt{level},
485     type => 'IMT:'.$opt{type});
486     }, @type);
487     } else {
488     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
489     }
490     }; # $HTMLIMTAttrChecker
491    
492     my $HTMLLanguageTagAttrChecker = sub {
493 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
494    
495 wakaba 1.1 my ($self, $attr) = @_;
496 wakaba 1.6 my $value = $attr->value;
497     require Whatpm::LangTag;
498     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
499     my %opt = @_;
500     my $type = 'LangTag:'.$opt{type};
501     $type .= ':' . $opt{subtag} if defined $opt{subtag};
502     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
503     level => $opt{level});
504     });
505 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
506 wakaba 1.6
507     ## TODO: testdata
508 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
509    
510     ## "A valid media query [MQ]"
511     my $HTMLMQAttrChecker = sub {
512     my ($self, $attr) = @_;
513     $self->{onerror}->(node => $attr, level => 'unsupported',
514     type => 'media query');
515     ## ISSUE: What is "a valid media query"?
516     }; # $HTMLMQAttrChecker
517    
518     my $HTMLEventHandlerAttrChecker = sub {
519     my ($self, $attr) = @_;
520     $self->{onerror}->(node => $attr, level => 'unsupported',
521     type => 'event handler');
522     ## TODO: MUST contain valid ECMAScript code matching the
523     ## ECMAScript |FunctionBody| production. [ECMA262]
524     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
525     ## ISSUE: Automatic semicolon insertion does not apply?
526     ## ISSUE: Other script languages?
527     }; # $HTMLEventHandlerAttrChecker
528    
529     my $HTMLUsemapAttrChecker = sub {
530     my ($self, $attr) = @_;
531     ## MUST be a valid hashed ID reference to a |map| element
532     my $value = $attr->value;
533     if ($value =~ s/^#//) {
534     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
535     push @{$self->{usemap}}, [$value => $attr];
536     } else {
537     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
538     }
539     ## NOTE: Space characters in hashed ID references are conforming.
540     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
541     }; # $HTMLUsemapAttrChecker
542    
543     my $HTMLTargetAttrChecker = sub {
544     my ($self, $attr) = @_;
545     my $value = $attr->value;
546     if ($value =~ /^_/) {
547     $value = lc $value; ## ISSUE: ASCII case-insentitive?
548     unless ({
549     _self => 1, _parent => 1, _top => 1,
550     }->{$value}) {
551     $self->{onerror}->(node => $attr,
552     type => 'reserved browsing context name');
553     }
554     } else {
555 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
556 wakaba 1.1 }
557     }; # $HTMLTargetAttrChecker
558    
559 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
560     my ($self, $attr) = @_;
561    
562     ## ISSUE: Namespace resolution?
563    
564     my $value = $attr->value;
565    
566     require Whatpm::CSS::SelectorsParser;
567     my $p = Whatpm::CSS::SelectorsParser->new;
568     $p->{pseudo_class}->{$_} = 1 for qw/
569     active checked disabled empty enabled first-child first-of-type
570     focus hover indeterminate last-child last-of-type link only-child
571     only-of-type root target visited
572     lang nth-child nth-last-child nth-of-type nth-last-of-type not
573     -manakai-contains -manakai-current
574     /;
575    
576     $p->{pseudo_element}->{$_} = 1 for qw/
577     after before first-letter first-line
578     /;
579    
580     $p->{must_level} = $self->{must_level};
581     $p->{onerror} = sub {
582     my %opt = @_;
583     $opt{type} = 'selectors:'.$opt{type};
584     $self->{onerror}->(%opt, node => $attr);
585     };
586     $p->parse_string ($value);
587     }; # $HTMLSelectorsAttrChecker
588    
589 wakaba 1.1 my $HTMLAttrChecker = {
590 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
591 wakaba 1.1 id => sub {
592     ## NOTE: |map| has its own variant of |id=""| checker
593     my ($self, $attr) = @_;
594     my $value = $attr->value;
595     if (length $value > 0) {
596     if ($self->{id}->{$value}) {
597     $self->{onerror}->(node => $attr, type => 'duplicate ID');
598     push @{$self->{id}->{$value}}, $attr;
599     } else {
600     $self->{id}->{$value} = [$attr];
601     }
602     if ($value =~ /[\x09-\x0D\x20]/) {
603     $self->{onerror}->(node => $attr, type => 'space in ID');
604     }
605     } else {
606     ## NOTE: MUST contain at least one character
607     $self->{onerror}->(node => $attr, type => 'empty attribute value');
608     }
609     },
610     title => sub {}, ## NOTE: No conformance creteria
611     lang => sub {
612     my ($self, $attr) = @_;
613 wakaba 1.6 my $value = $attr->value;
614     if ($value eq '') {
615     #
616     } else {
617     require Whatpm::LangTag;
618     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
619     my %opt = @_;
620     my $type = 'LangTag:'.$opt{type};
621     $type .= ':' . $opt{subtag} if defined $opt{subtag};
622     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
623     level => $opt{level});
624     });
625     }
626 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
627     unless ($attr->owner_document->manakai_is_html) {
628     $self->{onerror}->(node => $attr, type => 'in XML:lang');
629     }
630 wakaba 1.6
631     ## TODO: test data
632 wakaba 1.1 },
633     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
634     class => sub {
635     my ($self, $attr) = @_;
636     my %word;
637     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
638     unless ($word{$word}) {
639     $word{$word} = 1;
640     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
641     } else {
642     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
643     }
644     }
645     },
646     contextmenu => sub {
647     my ($self, $attr) = @_;
648     my $value = $attr->value;
649     push @{$self->{contextmenu}}, [$value => $attr];
650     ## ISSUE: "The value must be the ID of a menu element in the DOM."
651     ## What is "in the DOM"? A menu Element node that is not part
652     ## of the Document tree is in the DOM? A menu Element node that
653     ## belong to another Document tree is in the DOM?
654     },
655 wakaba 1.48 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'), ## TODO: status: Working Draft
656 wakaba 1.56 ## TODO: repeat, repeat-start, repeat-min, repeat-max, repeat-template ## TODO: global
657 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
658 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
659     ## TODO: ref, template, registrationmark
660 wakaba 1.1 };
661    
662 wakaba 1.49 my %HTMLAttrStatus = (
663 wakaba 1.50 class => FEATURE_HTML5_DEFAULT,
664     contenteditable => FEATURE_HTML5_DEFAULT,
665     contextmenu => FEATURE_HTML5_WD,
666     dir => FEATURE_HTML5_DEFAULT,
667     draggable => FEATURE_HTML5_LC,
668     id => FEATURE_HTML5_DEFAULT,
669     irrelevant => FEATURE_HTML5_WD,
670     lang => FEATURE_HTML5_DEFAULT,
671     ref => FEATURE_HTML5_AT_RISK,
672     registrationmark => FEATURE_HTML5_AT_RISK,
673 wakaba 1.58 role => FEATURE_HTML5_ROLE,
674 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
675     template => FEATURE_HTML5_AT_RISK,
676     title => FEATURE_HTML5_DEFAULT,
677 wakaba 1.49 );
678    
679     my %HTMLM12NCommonAttrStatus = (
680 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
681     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
682     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
683     onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
684     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
685     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
686     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
687     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
688     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
689     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
690     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
691     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
692     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
693 wakaba 1.55 style => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
694     FEATURE_M12N10_REC,
695 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
696 wakaba 1.49 );
697    
698 wakaba 1.1 for (qw/
699     onabort onbeforeunload onblur onchange onclick oncontextmenu
700     ondblclick ondrag ondragend ondragenter ondragleave ondragover
701     ondragstart ondrop onerror onfocus onkeydown onkeypress
702     onkeyup onload onmessage onmousedown onmousemove onmouseout
703     onmouseover onmouseup onmousewheel onresize onscroll onselect
704     onsubmit onunload
705     /) {
706     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
707 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
708 wakaba 1.1 }
709    
710     my $GetHTMLAttrsChecker = sub {
711     my $element_specific_checker = shift;
712 wakaba 1.49 my $element_specific_status = shift;
713 wakaba 1.1 return sub {
714 wakaba 1.40 my ($self, $item, $element_state) = @_;
715     for my $attr (@{$item->{node}->attributes}) {
716 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
717     $attr_ns = '' unless defined $attr_ns;
718     my $attr_ln = $attr->manakai_local_name;
719     my $checker;
720     if ($attr_ns eq '') {
721     $checker = $element_specific_checker->{$attr_ln}
722 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
723 wakaba 1.1 }
724     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
725 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
726 wakaba 1.1 if ($checker) {
727 wakaba 1.40 $checker->($self, $attr, $item);
728 wakaba 1.49 } elsif ($attr_ns eq '') {
729 wakaba 1.54 #
730 wakaba 1.1 } else {
731     $self->{onerror}->(node => $attr, level => 'unsupported',
732     type => 'attribute');
733 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
734     }
735     if ($attr_ns eq '') {
736     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
737 wakaba 1.1 }
738 wakaba 1.49 ## TODO: global attribute
739 wakaba 1.1 }
740     };
741     }; # $GetHTMLAttrsChecker
742    
743 wakaba 1.40 my %HTMLChecker = (
744     %Whatpm::ContentChecker::AnyChecker,
745 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
746 wakaba 1.40 );
747    
748     my %HTMLEmptyChecker = (
749     %HTMLChecker,
750     check_child_element => sub {
751     my ($self, $item, $child_el, $child_nsuri, $child_ln,
752     $child_is_transparent, $element_state) = @_;
753     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
754     $self->{onerror}->(node => $child_el,
755     type => 'element not allowed:minus',
756     level => $self->{must_level});
757     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
758     #
759     } else {
760     $self->{onerror}->(node => $child_el,
761     type => 'element not allowed:empty',
762     level => $self->{must_level});
763     }
764     },
765     check_child_text => sub {
766     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
767     if ($has_significant) {
768     $self->{onerror}->(node => $child_node,
769     type => 'character not allowed:empty',
770     level => $self->{must_level});
771     }
772     },
773     );
774    
775     my %HTMLTextChecker = (
776     %HTMLChecker,
777     check_child_element => sub {
778     my ($self, $item, $child_el, $child_nsuri, $child_ln,
779     $child_is_transparent, $element_state) = @_;
780     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
781     $self->{onerror}->(node => $child_el,
782     type => 'element not allowed:minus',
783     level => $self->{must_level});
784     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
785     #
786     } else {
787     $self->{onerror}->(node => $child_el, type => 'element not allowed');
788     }
789     },
790     );
791    
792 wakaba 1.58 ## TODO: Rename as "FlowContent" (HTML5 revision 1261)
793 wakaba 1.40 my %HTMLProseContentChecker = (
794     %HTMLChecker,
795     check_child_element => sub {
796     my ($self, $item, $child_el, $child_nsuri, $child_ln,
797     $child_is_transparent, $element_state) = @_;
798     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
799     $self->{onerror}->(node => $child_el,
800     type => 'element not allowed:minus',
801     level => $self->{must_level});
802     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
803     #
804     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
805     if ($element_state->{has_non_style} or
806     not $child_el->has_attribute_ns (undef, 'scoped')) {
807     $self->{onerror}->(node => $child_el,
808     type => 'element not allowed:prose style',
809     level => $self->{must_level});
810     }
811     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
812 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
813 wakaba 1.40 } else {
814     $element_state->{has_non_style} = 1;
815     $self->{onerror}->(node => $child_el,
816     type => 'element not allowed:prose',
817     level => $self->{must_level})
818     }
819     },
820     check_child_text => sub {
821     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
822     if ($has_significant) {
823     $element_state->{has_non_style} = 1;
824     }
825     },
826     check_end => sub {
827     my ($self, $item, $element_state) = @_;
828     if ($element_state->{has_significant}) {
829 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
830 wakaba 1.40 } elsif ($item->{transparent}) {
831     #
832     } else {
833     $self->{onerror}->(node => $item->{node},
834     level => $self->{should_level},
835     type => 'no significant content');
836     }
837     },
838     );
839    
840     my %HTMLPhrasingContentChecker = (
841     %HTMLChecker,
842     check_child_element => sub {
843     my ($self, $item, $child_el, $child_nsuri, $child_ln,
844     $child_is_transparent, $element_state) = @_;
845     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
846     $self->{onerror}->(node => $child_el,
847     type => 'element not allowed:minus',
848     level => $self->{must_level});
849     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
850     #
851     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
852     #
853     } else {
854     $self->{onerror}->(node => $child_el,
855     type => 'element not allowed:phrasing',
856     level => $self->{must_level});
857     }
858     },
859     check_end => $HTMLProseContentChecker{check_end},
860     ## NOTE: The definition for |li| assumes that the only differences
861     ## between prose and phrasing content checkers are |check_child_element|
862     ## and |check_child_text|.
863     );
864    
865     my %HTMLTransparentChecker = %HTMLProseContentChecker;
866     ## ISSUE: Significant content rule should be applied to transparent element
867 wakaba 1.46 ## with parent?
868 wakaba 1.40
869 wakaba 1.1 our $Element;
870     our $ElementDefault;
871    
872     $Element->{$HTML_NS}->{''} = {
873 wakaba 1.40 %HTMLChecker,
874 wakaba 1.1 };
875    
876     $Element->{$HTML_NS}->{html} = {
877 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
878 wakaba 1.1 is_root => 1,
879 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
880 wakaba 1.16 manifest => $HTMLURIAttrChecker,
881 wakaba 1.1 xmlns => sub {
882     my ($self, $attr) = @_;
883     my $value = $attr->value;
884     unless ($value eq $HTML_NS) {
885     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
886     }
887     unless ($attr->owner_document->manakai_is_html) {
888     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
889     ## TODO: Test
890     }
891     },
892 wakaba 1.49 }, {
893     %HTMLAttrStatus,
894 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
895     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
896     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
897     manifest => FEATURE_HTML5_DEFAULT,
898 wakaba 1.49 version => FEATURE_M12N10_REC,
899 wakaba 1.50 xmlns => FEATURE_HTML5_DEFAULT,
900 wakaba 1.1 }),
901 wakaba 1.40 check_start => sub {
902     my ($self, $item, $element_state) = @_;
903     $element_state->{phase} = 'before head';
904     },
905     check_child_element => sub {
906     my ($self, $item, $child_el, $child_nsuri, $child_ln,
907     $child_is_transparent, $element_state) = @_;
908     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
909     $self->{onerror}->(node => $child_el,
910     type => 'element not allowed:minus',
911     level => $self->{must_level});
912     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
913     #
914     } elsif ($element_state->{phase} eq 'before head') {
915     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
916     $element_state->{phase} = 'after head';
917     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
918     $self->{onerror}->(node => $child_el,
919     type => 'ps element missing:head');
920     $element_state->{phase} = 'after body';
921     } else {
922     $self->{onerror}->(node => $child_el,
923     type => 'element not allowed');
924     }
925     } elsif ($element_state->{phase} eq 'after head') {
926     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
927     $element_state->{phase} = 'after body';
928     } else {
929     $self->{onerror}->(node => $child_el,
930     type => 'element not allowed');
931     }
932     } elsif ($element_state->{phase} eq 'after body') {
933     $self->{onerror}->(node => $child_el,
934     type => 'element not allowed');
935     } else {
936     die "check_child_element: Bad |html| phase: $element_state->{phase}";
937     }
938     },
939     check_child_text => sub {
940     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
941     if ($has_significant) {
942     $self->{onerror}->(node => $child_node,
943     type => 'character not allowed');
944     }
945     },
946     check_end => sub {
947     my ($self, $item, $element_state) = @_;
948     if ($element_state->{phase} eq 'after body') {
949     #
950     } elsif ($element_state->{phase} eq 'before head') {
951     $self->{onerror}->(node => $item->{node},
952     type => 'child element missing:head');
953     $self->{onerror}->(node => $item->{node},
954     type => 'child element missing:body');
955     } elsif ($element_state->{phase} eq 'after head') {
956     $self->{onerror}->(node => $item->{node},
957     type => 'child element missing:body');
958     } else {
959     die "check_end: Bad |html| phase: $element_state->{phase}";
960     }
961 wakaba 1.1
962 wakaba 1.40 $HTMLChecker{check_end}->(@_);
963     },
964     };
965 wakaba 1.25
966 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
967 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
968     check_attrs => $GetHTMLAttrsChecker->({}, {
969     %HTMLAttrStatus,
970 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
971     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
972     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
973 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
974     }),
975 wakaba 1.40 check_child_element => sub {
976     my ($self, $item, $child_el, $child_nsuri, $child_ln,
977     $child_is_transparent, $element_state) = @_;
978     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
979     $self->{onerror}->(node => $child_el,
980     type => 'element not allowed:minus',
981     level => $self->{must_level});
982     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
983     #
984     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
985     unless ($element_state->{has_title}) {
986     $element_state->{has_title} = 1;
987     } else {
988     $self->{onerror}->(node => $child_el,
989     type => 'element not allowed:head title',
990     level => $self->{must_level});
991     }
992     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
993     if ($child_el->has_attribute_ns (undef, 'scoped')) {
994     $self->{onerror}->(node => $child_el,
995     type => 'element not allowed:head style',
996     level => $self->{must_level});
997 wakaba 1.1 }
998 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
999     #
1000    
1001     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1002     ## a |meta| element with none of |charset|, |name|,
1003     ## or |http-equiv| attribute is not allowed. It is non-conforming
1004     ## anyway.
1005 wakaba 1.56
1006     ## TODO: |form| MUST be empty and in XML [WF2].
1007 wakaba 1.40 } else {
1008     $self->{onerror}->(node => $child_el,
1009     type => 'element not allowed:metadata',
1010     level => $self->{must_level});
1011     }
1012     $element_state->{in_head_original} = $self->{flag}->{in_head};
1013     $self->{flag}->{in_head} = 1;
1014     },
1015     check_child_text => sub {
1016     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1017     if ($has_significant) {
1018     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1019 wakaba 1.1 }
1020 wakaba 1.40 },
1021     check_end => sub {
1022     my ($self, $item, $element_state) = @_;
1023     unless ($element_state->{has_title}) {
1024     $self->{onerror}->(node => $item->{node},
1025     type => 'child element missing:title');
1026 wakaba 1.1 }
1027 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1028 wakaba 1.1
1029 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1030 wakaba 1.1 },
1031     };
1032    
1033 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1034     %HTMLTextChecker,
1035 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1036     check_attrs => $GetHTMLAttrsChecker->({}, {
1037     %HTMLAttrStatus,
1038 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1039     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1040     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1041 wakaba 1.49 }),
1042 wakaba 1.40 };
1043 wakaba 1.1
1044 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1045 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1046 wakaba 1.40 %HTMLEmptyChecker,
1047     check_attrs => sub {
1048     my ($self, $item, $element_state) = @_;
1049 wakaba 1.1
1050 wakaba 1.40 if ($self->{has_base}) {
1051     $self->{onerror}->(node => $item->{node},
1052     type => 'element not allowed:base');
1053     } else {
1054     $self->{has_base} = 1;
1055 wakaba 1.29 }
1056    
1057 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1058     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1059 wakaba 1.14
1060     if ($self->{has_uri_attr} and $has_href) {
1061 wakaba 1.4 ## ISSUE: Are these examples conforming?
1062     ## <head profile="a b c"><base href> (except for |profile|'s
1063     ## non-conformance)
1064     ## <title xml:base="relative"/><base href/> (maybe it should be)
1065     ## <unknown xmlns="relative"/><base href/> (assuming that
1066     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1067     ## <style>@import 'relative';</style><base href>
1068     ## <script>location.href = 'relative';</script><base href>
1069 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1070     ## an exception.
1071 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1072 wakaba 1.4 type => 'basehref after URI attribute');
1073     }
1074 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1075 wakaba 1.4 ## ISSUE: Are these examples conforming?
1076     ## <head><title xlink:href=""/><base target="name"/></head>
1077     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1078     ## (assuming that |xbl:xbl| is allowed before |base|)
1079     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1080     ## <link href=""/><base target="name"/>
1081     ## <link rel=unknown href=""><base target=name>
1082 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1083 wakaba 1.4 type => 'basetarget after hyperlink');
1084     }
1085    
1086 wakaba 1.14 if (not $has_href and not $has_target) {
1087 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1088 wakaba 1.14 type => 'attribute missing:href|target');
1089     }
1090    
1091 wakaba 1.4 return $GetHTMLAttrsChecker->({
1092     href => $HTMLURIAttrChecker,
1093     target => $HTMLTargetAttrChecker,
1094 wakaba 1.49 }, {
1095     %HTMLAttrStatus,
1096 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1097     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1098     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1099 wakaba 1.40 })->($self, $item, $element_state);
1100 wakaba 1.4 },
1101 wakaba 1.1 };
1102    
1103     $Element->{$HTML_NS}->{link} = {
1104 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1105 wakaba 1.40 %HTMLEmptyChecker,
1106     check_attrs => sub {
1107     my ($self, $item, $element_state) = @_;
1108 wakaba 1.1 $GetHTMLAttrsChecker->({
1109     href => $HTMLURIAttrChecker,
1110 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
1111 wakaba 1.1 media => $HTMLMQAttrChecker,
1112     hreflang => $HTMLLanguageTagAttrChecker,
1113     type => $HTMLIMTAttrChecker,
1114     ## NOTE: Though |title| has special semantics,
1115     ## syntactically same as the |title| as global attribute.
1116 wakaba 1.49 }, {
1117     %HTMLAttrStatus,
1118     %HTMLM12NCommonAttrStatus,
1119     charset => FEATURE_M12N10_REC,
1120 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1121     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1122     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1123     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1124     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1125 wakaba 1.49 rev => FEATURE_M12N10_REC,
1126     target => FEATURE_M12N10_REC,
1127 wakaba 1.50 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1128 wakaba 1.40 })->($self, $item, $element_state);
1129     if ($item->{node}->has_attribute_ns (undef, 'href')) {
1130     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
1131 wakaba 1.4 } else {
1132 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1133 wakaba 1.1 type => 'attribute missing:href');
1134     }
1135 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
1136     $self->{onerror}->(node => $item->{node},
1137 wakaba 1.1 type => 'attribute missing:rel');
1138     }
1139     },
1140     };
1141    
1142     $Element->{$HTML_NS}->{meta} = {
1143 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1144 wakaba 1.40 %HTMLEmptyChecker,
1145     check_attrs => sub {
1146     my ($self, $item, $element_state) = @_;
1147 wakaba 1.1 my $name_attr;
1148     my $http_equiv_attr;
1149     my $charset_attr;
1150     my $content_attr;
1151 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1152 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1153     $attr_ns = '' unless defined $attr_ns;
1154     my $attr_ln = $attr->manakai_local_name;
1155     my $checker;
1156     if ($attr_ns eq '') {
1157     if ($attr_ln eq 'content') {
1158     $content_attr = $attr;
1159     $checker = 1;
1160     } elsif ($attr_ln eq 'name') {
1161     $name_attr = $attr;
1162     $checker = 1;
1163     } elsif ($attr_ln eq 'http-equiv') {
1164     $http_equiv_attr = $attr;
1165     $checker = 1;
1166     } elsif ($attr_ln eq 'charset') {
1167     $charset_attr = $attr;
1168     $checker = 1;
1169     } else {
1170     $checker = $HTMLAttrChecker->{$attr_ln}
1171     || $AttrChecker->{$attr_ns}->{$attr_ln}
1172     || $AttrChecker->{$attr_ns}->{''};
1173     }
1174     } else {
1175     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1176     || $AttrChecker->{$attr_ns}->{''};
1177     }
1178     if ($checker) {
1179     $checker->($self, $attr) if ref $checker;
1180 wakaba 1.49 } elsif ($attr_ns eq '') {
1181 wakaba 1.54 #
1182 wakaba 1.1 } else {
1183     $self->{onerror}->(node => $attr, level => 'unsupported',
1184     type => 'attribute');
1185 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1186     }
1187    
1188     if ($attr_ns eq '') {
1189     $self->_attr_status_info ($attr, {
1190     %HTMLAttrStatus,
1191 wakaba 1.50 charset => FEATURE_HTML5_DEFAULT,
1192     content => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1193     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1194     'http-equiv' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1195     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1196     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1197     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1198 wakaba 1.49 scheme => FEATURE_M12N10_REC,
1199     }->{$attr_ln});
1200 wakaba 1.1 }
1201     }
1202    
1203     if (defined $name_attr) {
1204     if (defined $http_equiv_attr) {
1205     $self->{onerror}->(node => $http_equiv_attr,
1206     type => 'attribute not allowed');
1207     } elsif (defined $charset_attr) {
1208     $self->{onerror}->(node => $charset_attr,
1209     type => 'attribute not allowed');
1210     }
1211     my $metadata_name = $name_attr->value;
1212     my $metadata_value;
1213     if (defined $content_attr) {
1214     $metadata_value = $content_attr->value;
1215     } else {
1216 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1217 wakaba 1.1 type => 'attribute missing:content');
1218     $metadata_value = '';
1219     }
1220     } elsif (defined $http_equiv_attr) {
1221     if (defined $charset_attr) {
1222     $self->{onerror}->(node => $charset_attr,
1223     type => 'attribute not allowed');
1224     }
1225     unless (defined $content_attr) {
1226 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1227 wakaba 1.1 type => 'attribute missing:content');
1228     }
1229     } elsif (defined $charset_attr) {
1230     if (defined $content_attr) {
1231     $self->{onerror}->(node => $content_attr,
1232     type => 'attribute not allowed');
1233     }
1234     } else {
1235     if (defined $content_attr) {
1236     $self->{onerror}->(node => $content_attr,
1237     type => 'attribute not allowed');
1238 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1239 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1240     } else {
1241 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1242 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1243     }
1244     }
1245    
1246 wakaba 1.32 my $check_charset_decl = sub () {
1247 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1248 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1249     for my $el (@{$parent->child_nodes}) {
1250     next unless $el->node_type == 1; # ELEMENT_NODE
1251 wakaba 1.40 unless ($el eq $item->{node}) {
1252 wakaba 1.29 ## NOTE: Not the first child element.
1253 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1254 wakaba 1.32 type => 'element not allowed:meta charset',
1255     level => $self->{must_level});
1256 wakaba 1.29 }
1257     last;
1258     ## NOTE: Entity references are not supported.
1259     }
1260     } else {
1261 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1262 wakaba 1.32 type => 'element not allowed:meta charset',
1263     level => $self->{must_level});
1264 wakaba 1.29 }
1265    
1266 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1267     $self->{onerror}->(node => $item->{node},
1268 wakaba 1.32 type => 'in XML:charset',
1269     level => $self->{must_level});
1270 wakaba 1.1 }
1271 wakaba 1.32 }; # $check_charset_decl
1272 wakaba 1.21
1273 wakaba 1.32 my $check_charset = sub ($$) {
1274     my ($attr, $charset_value) = @_;
1275 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1276     ## is not explicitly spelled in the HTML5 spec, the Character Set
1277     ## registry of IANA, which is referenced from HTML5 spec, says that
1278     ## charset name is case-insensitive.
1279     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1280    
1281     require Message::Charset::Info;
1282     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1283 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1284 wakaba 1.21 if (defined $ic) {
1285     ## TODO: Test for this case
1286     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1287     if ($charset ne $ic_charset) {
1288 wakaba 1.32 $self->{onerror}->(node => $attr,
1289 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1290 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1291     level => $self->{must_level});
1292 wakaba 1.21 }
1293     } else {
1294     ## NOTE: MUST, but not checkable, since the document is not originally
1295     ## in serialized form (or the parser does not preserve the input
1296     ## encoding information).
1297 wakaba 1.32 $self->{onerror}->(node => $attr,
1298     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1299 wakaba 1.21 level => 'unsupported');
1300     }
1301    
1302     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1303     ## Syntactically valid and registered? What about x-charset names?
1304     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1305     ($charset_value)) {
1306 wakaba 1.32 $self->{onerror}->(node => $attr,
1307     type => 'charset:syntax error:'.$charset_value, ## TODO
1308     level => $self->{must_level});
1309 wakaba 1.21 }
1310    
1311     if ($charset) {
1312     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1313     ## with no "preferred MIME name" label)?
1314     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1315     if (($charset_status &
1316     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1317     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1318 wakaba 1.32 $self->{onerror}->(node => $attr,
1319 wakaba 1.21 type => 'charset:not preferred:'.
1320 wakaba 1.32 $charset_value, ## TODO
1321     level => $self->{must_level});
1322 wakaba 1.21 }
1323     if (($charset_status &
1324     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1325     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1326     if ($charset_value =~ /^x-/) {
1327 wakaba 1.32 $self->{onerror}->(node => $attr,
1328     type => 'charset:private:'.$charset_value, ## TODO
1329 wakaba 1.21 level => $self->{good_level});
1330     } else {
1331 wakaba 1.32 $self->{onerror}->(node => $attr,
1332 wakaba 1.21 type => 'charset:not registered:'.
1333 wakaba 1.32 $charset_value, ## TODO
1334 wakaba 1.21 level => $self->{good_level});
1335     }
1336     }
1337     } elsif ($charset_value =~ /^x-/) {
1338 wakaba 1.32 $self->{onerror}->(node => $attr,
1339     type => 'charset:private:'.$charset_value, ## TODO
1340 wakaba 1.21 level => $self->{good_level});
1341     } else {
1342 wakaba 1.32 $self->{onerror}->(node => $attr,
1343     type => 'charset:not registered:'.$charset_value, ## TODO
1344 wakaba 1.21 level => $self->{good_level});
1345     }
1346    
1347 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1348     $self->{onerror}->(node => $attr,
1349 wakaba 1.22 type => 'character reference in charset',
1350     level => $self->{must_level});
1351     }
1352 wakaba 1.32 }; # $check_charset
1353    
1354     ## TODO: metadata conformance
1355    
1356     ## TODO: pragma conformance
1357     if (defined $http_equiv_attr) { ## An enumerated attribute
1358     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1359     if ({
1360     'refresh' => 1,
1361     'default-style' => 1,
1362     }->{$keyword}) {
1363     #
1364 wakaba 1.33
1365     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1366 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1367 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
1368 wakaba 1.33
1369 wakaba 1.32 $check_charset_decl->();
1370     if ($content_attr) {
1371     my $content = $content_attr->value;
1372 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
1373     [\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
1374     =(.+)\z!sx) {
1375 wakaba 1.32 $check_charset->($content_attr, $1);
1376     } else {
1377     $self->{onerror}->(node => $content_attr,
1378     type => 'meta content-type syntax error',
1379     level => $self->{must_level});
1380     }
1381     }
1382     } else {
1383     $self->{onerror}->(node => $http_equiv_attr,
1384     type => 'enumerated:invalid');
1385     }
1386     }
1387    
1388     if (defined $charset_attr) {
1389     $check_charset_decl->();
1390     $check_charset->($charset_attr, $charset_attr->value);
1391 wakaba 1.1 }
1392     },
1393     };
1394    
1395     $Element->{$HTML_NS}->{style} = {
1396 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1397 wakaba 1.40 %HTMLChecker,
1398     check_attrs => $GetHTMLAttrsChecker->({
1399 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1400     media => $HTMLMQAttrChecker,
1401     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1402     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1403     ## not different
1404 wakaba 1.49 }, {
1405     %HTMLAttrStatus,
1406 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1407     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1408     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1409     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1410     scoped => FEATURE_HTML5_DEFAULT,
1411     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1412     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1413 wakaba 1.1 }),
1414 wakaba 1.40 check_start => sub {
1415     my ($self, $item, $element_state) = @_;
1416    
1417 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1418 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1419 wakaba 1.27 if (not defined $type or
1420     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1421 wakaba 1.40 $element_state->{allow_element} = 0;
1422     $element_state->{style_type} = 'text/css';
1423     } else {
1424     $element_state->{allow_element} = 1; # unknown
1425     $element_state->{style_type} = $type; ## TODO: $type normalization
1426     }
1427     },
1428     check_child_element => sub {
1429     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1430     $child_is_transparent, $element_state) = @_;
1431     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1432     $self->{onerror}->(node => $child_el,
1433     type => 'element not allowed:minus',
1434     level => $self->{must_level});
1435     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1436     #
1437     } elsif ($element_state->{allow_element}) {
1438     #
1439     } else {
1440     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1441     }
1442     },
1443     check_child_text => sub {
1444     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1445     $element_state->{text} .= $child_node->text_content;
1446     },
1447     check_end => sub {
1448     my ($self, $item, $element_state) = @_;
1449     if ($element_state->{style_type} eq 'text/css') {
1450     $self->{onsubdoc}->({s => $element_state->{text},
1451     container_node => $item->{node},
1452 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1453 wakaba 1.27 } else {
1454 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1455     type => 'style:'.$element_state->{style_type});
1456 wakaba 1.27 }
1457 wakaba 1.40
1458     $HTMLChecker{check_end}->(@_);
1459 wakaba 1.1 },
1460     };
1461 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1462 wakaba 1.1
1463     $Element->{$HTML_NS}->{body} = {
1464 wakaba 1.40 %HTMLProseContentChecker,
1465 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1466     check_attrs => $GetHTMLAttrsChecker->({}, {
1467     %HTMLAttrStatus,
1468     %HTMLM12NCommonAttrStatus,
1469     alink => FEATURE_M12N10_REC_DEPRECATED,
1470     background => FEATURE_M12N10_REC_DEPRECATED,
1471     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
1472 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1473 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
1474 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1475     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1476 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
1477     vlink => FEATURE_M12N10_REC_DEPRECATED,
1478     }),
1479 wakaba 1.1 };
1480    
1481     $Element->{$HTML_NS}->{section} = {
1482 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1483 wakaba 1.40 %HTMLProseContentChecker,
1484 wakaba 1.1 };
1485    
1486     $Element->{$HTML_NS}->{nav} = {
1487 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1488 wakaba 1.40 %HTMLProseContentChecker,
1489 wakaba 1.1 };
1490    
1491     $Element->{$HTML_NS}->{article} = {
1492 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1493 wakaba 1.40 %HTMLProseContentChecker,
1494 wakaba 1.1 };
1495    
1496     $Element->{$HTML_NS}->{blockquote} = {
1497 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1498 wakaba 1.40 %HTMLProseContentChecker,
1499     check_attrs => $GetHTMLAttrsChecker->({
1500 wakaba 1.1 cite => $HTMLURIAttrChecker,
1501 wakaba 1.49 }, {
1502     %HTMLAttrStatus,
1503     %HTMLM12NCommonAttrStatus,
1504 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1505     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1506 wakaba 1.1 }),
1507     };
1508    
1509     $Element->{$HTML_NS}->{aside} = {
1510 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1511 wakaba 1.40 %HTMLProseContentChecker,
1512 wakaba 1.1 };
1513    
1514     $Element->{$HTML_NS}->{h1} = {
1515 wakaba 1.40 %HTMLPhrasingContentChecker,
1516 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1517     check_attrs => $GetHTMLAttrsChecker->({}, {
1518     %HTMLAttrStatus,
1519     %HTMLM12NCommonAttrStatus,
1520     align => FEATURE_M12N10_REC_DEPRECATED,
1521 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1522 wakaba 1.49 }),
1523 wakaba 1.40 check_start => sub {
1524     my ($self, $item, $element_state) = @_;
1525     $self->{flag}->{has_hn} = 1;
1526 wakaba 1.1 },
1527     };
1528    
1529 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1530 wakaba 1.1
1531 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1532 wakaba 1.1
1533 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1534 wakaba 1.1
1535 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1536 wakaba 1.1
1537 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1538 wakaba 1.1
1539 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1540    
1541 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1542 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1543 wakaba 1.40 %HTMLProseContentChecker,
1544     check_start => sub {
1545     my ($self, $item, $element_state) = @_;
1546     $self->_add_minus_elements ($element_state,
1547     {$HTML_NS => {qw/header 1 footer 1/}},
1548 wakaba 1.58 $HTMLSectioningContent);
1549 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1550     $self->{flag}->{has_hn} = 0;
1551     },
1552     check_end => sub {
1553     my ($self, $item, $element_state) = @_;
1554     $self->_remove_minus_elements ($element_state);
1555     unless ($self->{flag}->{has_hn}) {
1556     $self->{onerror}->(node => $item->{node},
1557     type => 'element missing:hn');
1558     }
1559     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1560 wakaba 1.1
1561 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1562 wakaba 1.1 },
1563 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1564 wakaba 1.1 };
1565    
1566     $Element->{$HTML_NS}->{footer} = {
1567 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1568 wakaba 1.40 %HTMLProseContentChecker,
1569     check_start => sub {
1570     my ($self, $item, $element_state) = @_;
1571     $self->_add_minus_elements ($element_state,
1572     {$HTML_NS => {footer => 1}},
1573 wakaba 1.58 $HTMLSectioningContent,
1574 wakaba 1.57 $HTMLHeadingContent);
1575 wakaba 1.40 },
1576     check_end => sub {
1577     my ($self, $item, $element_state) = @_;
1578     $self->_remove_minus_elements ($element_state);
1579 wakaba 1.1
1580 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1581 wakaba 1.1 },
1582     };
1583    
1584     $Element->{$HTML_NS}->{address} = {
1585 wakaba 1.40 %HTMLProseContentChecker,
1586 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1587     check_attrs => $GetHTMLAttrsChecker->({}, {
1588     %HTMLAttrStatus,
1589     %HTMLM12NCommonAttrStatus,
1590 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1591 wakaba 1.49 }),
1592 wakaba 1.40 check_start => sub {
1593     my ($self, $item, $element_state) = @_;
1594     $self->_add_minus_elements ($element_state,
1595     {$HTML_NS => {footer => 1, address => 1}},
1596     $HTMLSectioningContent, $HTMLHeadingContent);
1597     },
1598     check_end => sub {
1599     my ($self, $item, $element_state) = @_;
1600     $self->_remove_minus_elements ($element_state);
1601 wakaba 1.29
1602 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1603 wakaba 1.29 },
1604 wakaba 1.1 };
1605    
1606     $Element->{$HTML_NS}->{p} = {
1607 wakaba 1.40 %HTMLPhrasingContentChecker,
1608 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1609     check_attrs => $GetHTMLAttrsChecker->({}, {
1610     %HTMLAttrStatus,
1611     %HTMLM12NCommonAttrStatus,
1612     align => FEATURE_M12N10_REC_DEPRECATED,
1613 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1614 wakaba 1.49 }),
1615 wakaba 1.1 };
1616    
1617     $Element->{$HTML_NS}->{hr} = {
1618 wakaba 1.40 %HTMLEmptyChecker,
1619 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1620     check_attrs => $GetHTMLAttrsChecker->({}, {
1621     %HTMLAttrStatus,
1622     %HTMLM12NCommonAttrStatus,
1623     align => FEATURE_M12N10_REC_DEPRECATED,
1624 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1625 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
1626     size => FEATURE_M12N10_REC_DEPRECATED,
1627     width => FEATURE_M12N10_REC_DEPRECATED,
1628     }),
1629 wakaba 1.1 };
1630    
1631     $Element->{$HTML_NS}->{br} = {
1632 wakaba 1.40 %HTMLEmptyChecker,
1633 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1634     check_attrs => $GetHTMLAttrsChecker->({}, {
1635     %HTMLAttrStatus,
1636 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1637 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
1638 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1639 wakaba 1.49 style => FEATURE_XHTML10_REC,
1640 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1641 wakaba 1.49 }),
1642 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1643     ## (This requirement is semantic so that we cannot check.)
1644 wakaba 1.1 };
1645    
1646     $Element->{$HTML_NS}->{dialog} = {
1647 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1648 wakaba 1.40 %HTMLChecker,
1649     check_start => sub {
1650     my ($self, $item, $element_state) = @_;
1651     $element_state->{phase} = 'before dt';
1652     },
1653     check_child_element => sub {
1654     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1655     $child_is_transparent, $element_state) = @_;
1656     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1657     $self->{onerror}->(node => $child_el,
1658     type => 'element not allowed:minus',
1659     level => $self->{must_level});
1660     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1661     #
1662     } elsif ($element_state->{phase} eq 'before dt') {
1663     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1664     $element_state->{phase} = 'before dd';
1665     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1666     $self->{onerror}
1667     ->(node => $child_el, type => 'ps element missing:dt');
1668     $element_state->{phase} = 'before dt';
1669     } else {
1670     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1671     }
1672     } elsif ($element_state->{phase} eq 'before dd') {
1673     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1674     $element_state->{phase} = 'before dt';
1675     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1676     $self->{onerror}
1677     ->(node => $child_el, type => 'ps element missing:dd');
1678     $element_state->{phase} = 'before dd';
1679     } else {
1680     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1681 wakaba 1.1 }
1682 wakaba 1.40 } else {
1683     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1684     }
1685     },
1686     check_child_text => sub {
1687     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1688     if ($has_significant) {
1689     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1690 wakaba 1.1 }
1691 wakaba 1.40 },
1692     check_end => sub {
1693     my ($self, $item, $element_state) = @_;
1694     if ($element_state->{phase} eq 'before dd') {
1695     $self->{onerror}->(node => $item->{node},
1696     type => 'child element missing:dd');
1697 wakaba 1.1 }
1698 wakaba 1.40
1699     $HTMLChecker{check_end}->(@_);
1700 wakaba 1.1 },
1701     };
1702    
1703     $Element->{$HTML_NS}->{pre} = {
1704 wakaba 1.40 %HTMLPhrasingContentChecker,
1705 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1706     check_attrs => $GetHTMLAttrsChecker->({}, {
1707     %HTMLAttrStatus,
1708     %HTMLM12NCommonAttrStatus,
1709 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1710 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
1711     }),
1712 wakaba 1.1 };
1713    
1714     $Element->{$HTML_NS}->{ol} = {
1715 wakaba 1.40 %HTMLChecker,
1716 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1717 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1718 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1719 wakaba 1.53 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
1720 wakaba 1.49 }, {
1721     %HTMLAttrStatus,
1722     %HTMLM12NCommonAttrStatus,
1723     compact => FEATURE_M12N10_REC_DEPRECATED,
1724 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1725 wakaba 1.53 reversed => FEATURE_HTML5_DEFAULT,
1726 wakaba 1.54 #start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
1727     start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1728 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1729 wakaba 1.1 }),
1730 wakaba 1.40 check_child_element => sub {
1731     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1732     $child_is_transparent, $element_state) = @_;
1733     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1734     $self->{onerror}->(node => $child_el,
1735     type => 'element not allowed:minus',
1736     level => $self->{must_level});
1737     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1738     #
1739     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1740     #
1741     } else {
1742     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1743 wakaba 1.1 }
1744 wakaba 1.40 },
1745     check_child_text => sub {
1746     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1747     if ($has_significant) {
1748     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1749 wakaba 1.1 }
1750     },
1751     };
1752    
1753     $Element->{$HTML_NS}->{ul} = {
1754 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1755 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1756     check_attrs => $GetHTMLAttrsChecker->({}, {
1757     %HTMLAttrStatus,
1758     %HTMLM12NCommonAttrStatus,
1759     compact => FEATURE_M12N10_REC_DEPRECATED,
1760 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1761 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1762     }),
1763 wakaba 1.1 };
1764    
1765     $Element->{$HTML_NS}->{li} = {
1766 wakaba 1.40 %HTMLProseContentChecker,
1767 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1768 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1769 wakaba 1.49 value => sub {
1770 wakaba 1.1 my ($self, $attr) = @_;
1771     my $parent = $attr->owner_element->manakai_parent_element;
1772     if (defined $parent) {
1773     my $parent_ns = $parent->namespace_uri;
1774     $parent_ns = '' unless defined $parent_ns;
1775     my $parent_ln = $parent->manakai_local_name;
1776     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1777     $self->{onerror}->(node => $attr, level => 'unsupported',
1778     type => 'attribute');
1779     }
1780     }
1781     $HTMLIntegerAttrChecker->($self, $attr);
1782 wakaba 1.49 }, ## TODO: test
1783     }, {
1784     %HTMLAttrStatus,
1785     %HTMLM12NCommonAttrStatus,
1786 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1787 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1788 wakaba 1.55 #value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
1789     # FEATURE_M12N10_REC_DEPRECATED,
1790     value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
1791     FEATURE_M12N10_REC,
1792 wakaba 1.1 }),
1793 wakaba 1.40 check_child_element => sub {
1794     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1795     $child_is_transparent, $element_state) = @_;
1796     if ($self->{flag}->{in_menu}) {
1797     $HTMLPhrasingContentChecker{check_child_element}->(@_);
1798     } else {
1799     $HTMLProseContentChecker{check_child_element}->(@_);
1800     }
1801     },
1802     check_child_text => sub {
1803     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1804     if ($self->{flag}->{in_menu}) {
1805     $HTMLPhrasingContentChecker{check_child_text}->(@_);
1806 wakaba 1.1 } else {
1807 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
1808 wakaba 1.1 }
1809     },
1810     };
1811    
1812     $Element->{$HTML_NS}->{dl} = {
1813 wakaba 1.40 %HTMLChecker,
1814 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1815     check_attrs => $GetHTMLAttrsChecker->({}, {
1816     %HTMLAttrStatus,
1817     %HTMLM12NCommonAttrStatus,
1818     compact => FEATURE_M12N10_REC_DEPRECATED,
1819 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1820 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1821     }),
1822 wakaba 1.40 check_start => sub {
1823     my ($self, $item, $element_state) = @_;
1824     $element_state->{phase} = 'before dt';
1825     },
1826     check_child_element => sub {
1827     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1828     $child_is_transparent, $element_state) = @_;
1829     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1830     $self->{onerror}->(node => $child_el,
1831     type => 'element not allowed:minus',
1832     level => $self->{must_level});
1833     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1834     #
1835     } elsif ($element_state->{phase} eq 'in dds') {
1836     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1837     #$element_state->{phase} = 'in dds';
1838     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1839     $element_state->{phase} = 'in dts';
1840     } else {
1841     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1842     }
1843     } elsif ($element_state->{phase} eq 'in dts') {
1844     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1845     #$element_state->{phase} = 'in dts';
1846     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1847     $element_state->{phase} = 'in dds';
1848     } else {
1849     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1850     }
1851     } elsif ($element_state->{phase} eq 'before dt') {
1852     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1853     $element_state->{phase} = 'in dts';
1854     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1855     $self->{onerror}
1856     ->(node => $child_el, type => 'ps element missing:dt');
1857     $element_state->{phase} = 'in dds';
1858     } else {
1859     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1860 wakaba 1.1 }
1861 wakaba 1.40 } else {
1862     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
1863 wakaba 1.1 }
1864 wakaba 1.40 },
1865     check_child_text => sub {
1866     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1867     if ($has_significant) {
1868     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1869     }
1870     },
1871     check_end => sub {
1872     my ($self, $item, $element_state) = @_;
1873     if ($element_state->{phase} eq 'in dts') {
1874     $self->{onerror}->(node => $item->{node},
1875     type => 'child element missing:dd');
1876 wakaba 1.1 }
1877    
1878 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1879 wakaba 1.1 },
1880     };
1881    
1882     $Element->{$HTML_NS}->{dt} = {
1883 wakaba 1.40 %HTMLPhrasingContentChecker,
1884 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1885     check_attrs => $GetHTMLAttrsChecker->({}, {
1886     %HTMLAttrStatus,
1887     %HTMLM12NCommonAttrStatus,
1888 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1889 wakaba 1.49 }),
1890 wakaba 1.1 };
1891    
1892     $Element->{$HTML_NS}->{dd} = {
1893 wakaba 1.40 %HTMLProseContentChecker,
1894 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1895     check_attrs => $GetHTMLAttrsChecker->({}, {
1896     %HTMLAttrStatus,
1897     %HTMLM12NCommonAttrStatus,
1898 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1899 wakaba 1.49 }),
1900 wakaba 1.1 };
1901    
1902     $Element->{$HTML_NS}->{a} = {
1903 wakaba 1.40 %HTMLPhrasingContentChecker,
1904 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1905 wakaba 1.40 check_attrs => sub {
1906     my ($self, $item, $element_state) = @_;
1907 wakaba 1.1 my %attr;
1908 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1909 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1910     $attr_ns = '' unless defined $attr_ns;
1911     my $attr_ln = $attr->manakai_local_name;
1912     my $checker;
1913     if ($attr_ns eq '') {
1914     $checker = {
1915     target => $HTMLTargetAttrChecker,
1916     href => $HTMLURIAttrChecker,
1917     ping => $HTMLSpaceURIsAttrChecker,
1918 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
1919 wakaba 1.1 media => $HTMLMQAttrChecker,
1920     hreflang => $HTMLLanguageTagAttrChecker,
1921     type => $HTMLIMTAttrChecker,
1922     }->{$attr_ln};
1923     if ($checker) {
1924     $attr{$attr_ln} = $attr;
1925     } else {
1926     $checker = $HTMLAttrChecker->{$attr_ln};
1927     }
1928     }
1929     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1930     || $AttrChecker->{$attr_ns}->{''};
1931     if ($checker) {
1932     $checker->($self, $attr) if ref $checker;
1933 wakaba 1.49 } elsif ($attr_ns eq '') {
1934 wakaba 1.54 #
1935 wakaba 1.1 } else {
1936     $self->{onerror}->(node => $attr, level => 'unsupported',
1937     type => 'attribute');
1938 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
1939 wakaba 1.1 }
1940 wakaba 1.49
1941     if ($attr_ns eq '') {
1942     $self->_attr_status_info ($attr, {
1943     %HTMLAttrStatus,
1944     %HTMLM12NCommonAttrStatus,
1945     accesskey => FEATURE_M12N10_REC,
1946     charset => FEATURE_M12N10_REC,
1947     coords => FEATURE_M12N10_REC,
1948 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1949     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1950     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1951     media => FEATURE_HTML5_DEFAULT,
1952 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
1953 wakaba 1.50 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1954     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1955     ping => FEATURE_HTML5_DEFAULT,
1956     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1957 wakaba 1.49 rev => FEATURE_M12N10_REC,
1958     shape => FEATURE_M12N10_REC,
1959 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1960     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1961     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1962 wakaba 1.49 }->{$attr_ln});
1963     }
1964 wakaba 1.1 }
1965    
1966 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
1967 wakaba 1.4 if (defined $attr{href}) {
1968     $self->{has_hyperlink_element} = 1;
1969 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
1970 wakaba 1.4 } else {
1971 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1972     if (defined $attr{$_}) {
1973     $self->{onerror}->(node => $attr{$_},
1974     type => 'attribute not allowed');
1975     }
1976     }
1977     }
1978     },
1979 wakaba 1.40 check_start => sub {
1980     my ($self, $item, $element_state) = @_;
1981     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
1982     },
1983     check_end => sub {
1984     my ($self, $item, $element_state) = @_;
1985     $self->_remove_minus_elements ($element_state);
1986 wakaba 1.59 delete $self->{flag}->{in_a_href}
1987     unless $element_state->{in_a_href_original};
1988 wakaba 1.1
1989 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1990 wakaba 1.1 },
1991     };
1992    
1993     $Element->{$HTML_NS}->{q} = {
1994 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1995 wakaba 1.40 %HTMLPhrasingContentChecker,
1996     check_attrs => $GetHTMLAttrsChecker->({
1997 wakaba 1.50 cite => $HTMLURIAttrChecker,
1998     }, {
1999 wakaba 1.49 %HTMLAttrStatus,
2000     %HTMLM12NCommonAttrStatus,
2001 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2002     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2003 wakaba 1.1 }),
2004     };
2005    
2006     $Element->{$HTML_NS}->{cite} = {
2007 wakaba 1.40 %HTMLPhrasingContentChecker,
2008 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2009     check_attrs => $GetHTMLAttrsChecker->({}, {
2010     %HTMLAttrStatus,
2011     %HTMLM12NCommonAttrStatus,
2012 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2013 wakaba 1.49 }),
2014 wakaba 1.1 };
2015    
2016     $Element->{$HTML_NS}->{em} = {
2017 wakaba 1.40 %HTMLPhrasingContentChecker,
2018 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2019     check_attrs => $GetHTMLAttrsChecker->({}, {
2020     %HTMLAttrStatus,
2021     %HTMLM12NCommonAttrStatus,
2022 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2023 wakaba 1.49 }),
2024 wakaba 1.1 };
2025    
2026     $Element->{$HTML_NS}->{strong} = {
2027 wakaba 1.40 %HTMLPhrasingContentChecker,
2028 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2029     check_attrs => $GetHTMLAttrsChecker->({}, {
2030     %HTMLAttrStatus,
2031     %HTMLM12NCommonAttrStatus,
2032 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2033 wakaba 1.49 }),
2034 wakaba 1.1 };
2035    
2036     $Element->{$HTML_NS}->{small} = {
2037 wakaba 1.40 %HTMLPhrasingContentChecker,
2038 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2039     check_attrs => $GetHTMLAttrsChecker->({}, {
2040     %HTMLAttrStatus,
2041     %HTMLM12NCommonAttrStatus,
2042 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2043 wakaba 1.49 }),
2044 wakaba 1.1 };
2045    
2046 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
2047     %HTMLPhrasingContentChecker,
2048     status => FEATURE_M12N10_REC,
2049     check_attrs => $GetHTMLAttrsChecker->({}, {
2050     %HTMLAttrStatus,
2051     %HTMLM12NCommonAttrStatus,
2052     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2053     }),
2054     };
2055    
2056 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
2057 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2058 wakaba 1.40 %HTMLPhrasingContentChecker,
2059 wakaba 1.1 };
2060    
2061     $Element->{$HTML_NS}->{dfn} = {
2062 wakaba 1.40 %HTMLPhrasingContentChecker,
2063 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2064     check_attrs => $GetHTMLAttrsChecker->({}, {
2065     %HTMLAttrStatus,
2066     %HTMLM12NCommonAttrStatus,
2067 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2068 wakaba 1.49 }),
2069 wakaba 1.40 check_start => sub {
2070     my ($self, $item, $element_state) = @_;
2071     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
2072 wakaba 1.1
2073 wakaba 1.40 my $node = $item->{node};
2074 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
2075     unless (defined $term) {
2076     for my $child (@{$node->child_nodes}) {
2077     if ($child->node_type == 1) { # ELEMENT_NODE
2078     if (defined $term) {
2079     undef $term;
2080     last;
2081     } elsif ($child->manakai_local_name eq 'abbr') {
2082     my $nsuri = $child->namespace_uri;
2083     if (defined $nsuri and $nsuri eq $HTML_NS) {
2084     my $attr = $child->get_attribute_node_ns (undef, 'title');
2085     if ($attr) {
2086     $term = $attr->value;
2087     }
2088     }
2089     }
2090     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2091     ## TEXT_NODE or CDATA_SECTION_NODE
2092     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2093     next;
2094     }
2095     undef $term;
2096     last;
2097     }
2098     }
2099     unless (defined $term) {
2100     $term = $node->text_content;
2101     }
2102     }
2103     if ($self->{term}->{$term}) {
2104     $self->{onerror}->(node => $node, type => 'duplicate term');
2105     push @{$self->{term}->{$term}}, $node;
2106     } else {
2107     $self->{term}->{$term} = [$node];
2108     }
2109     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2110     ## has |title|.
2111 wakaba 1.40 },
2112     check_end => sub {
2113     my ($self, $item, $element_state) = @_;
2114     $self->_remove_minus_elements ($element_state);
2115 wakaba 1.1
2116 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2117 wakaba 1.1 },
2118     };
2119    
2120     $Element->{$HTML_NS}->{abbr} = {
2121 wakaba 1.40 %HTMLPhrasingContentChecker,
2122 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2123     check_attrs => $GetHTMLAttrsChecker->({}, {
2124     %HTMLAttrStatus,
2125     %HTMLM12NCommonAttrStatus,
2126 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2127 wakaba 1.49 }),
2128     };
2129    
2130     $Element->{$HTML_NS}->{acronym} = {
2131     %HTMLPhrasingContentChecker,
2132     status => FEATURE_M12N10_REC,
2133     check_attrs => $GetHTMLAttrsChecker->({}, {
2134     %HTMLAttrStatus,
2135     %HTMLM12NCommonAttrStatus,
2136 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2137 wakaba 1.49 }),
2138 wakaba 1.1 };
2139    
2140     $Element->{$HTML_NS}->{time} = {
2141 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2142 wakaba 1.40 %HTMLPhrasingContentChecker,
2143     check_attrs => $GetHTMLAttrsChecker->({
2144 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
2145 wakaba 1.49 }, {
2146     %HTMLAttrStatus,
2147     %HTMLM12NCommonAttrStatus,
2148 wakaba 1.50 datetime => FEATURE_HTML5_DEFAULT,
2149 wakaba 1.1 }),
2150     ## TODO: Write tests
2151 wakaba 1.40 check_end => sub {
2152     my ($self, $item, $element_state) = @_;
2153 wakaba 1.1
2154 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
2155 wakaba 1.1 my $input;
2156     my $reg_sp;
2157     my $input_node;
2158     if ($attr) {
2159     $input = $attr->value;
2160     $reg_sp = qr/[\x09-\x0D\x20]*/;
2161     $input_node = $attr;
2162     } else {
2163 wakaba 1.40 $input = $item->{node}->text_content;
2164 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
2165 wakaba 1.40 $input_node = $item->{node};
2166 wakaba 1.1
2167     ## ISSUE: What is the definition for "successfully extracts a date
2168     ## or time"? If the algorithm says the string is invalid but
2169     ## return some date or time, is it "successfully"?
2170     }
2171    
2172     my $hour;
2173     my $minute;
2174     my $second;
2175     if ($input =~ /
2176     \A
2177     [\x09-\x0D\x20]*
2178     ([0-9]+) # 1
2179     (?>
2180     -([0-9]+) # 2
2181     -([0-9]+) # 3
2182     [\x09-\x0D\x20]*
2183     (?>
2184     T
2185     [\x09-\x0D\x20]*
2186     )?
2187     ([0-9]+) # 4
2188     :([0-9]+) # 5
2189     (?>
2190     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2191     )?
2192     [\x09-\x0D\x20]*
2193     (?>
2194     Z
2195     [\x09-\x0D\x20]*
2196     |
2197     [+-]([0-9]+):([0-9]+) # 7, 8
2198     [\x09-\x0D\x20]*
2199     )?
2200     \z
2201     |
2202     :([0-9]+) # 9
2203     (?>
2204     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2205     )?
2206     [\x09-\x0D\x20]*\z
2207     )
2208     /x) {
2209     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2210     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2211     length $4 != 2 or length $5 != 2) {
2212     $self->{onerror}->(node => $input_node,
2213     type => 'dateortime:syntax error');
2214     }
2215    
2216     if (1 <= $2 and $2 <= 12) {
2217     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2218     if $3 < 1 or
2219     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2220     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2221     if $2 == 2 and $3 == 29 and
2222     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2223     } else {
2224     $self->{onerror}->(node => $input_node,
2225     type => 'datetime:bad month');
2226     }
2227    
2228     ($hour, $minute, $second) = ($4, $5, $6);
2229    
2230     if (defined $7) { ## [+-]hh:mm
2231     if (length $7 != 2 or length $8 != 2) {
2232     $self->{onerror}->(node => $input_node,
2233     type => 'dateortime:syntax error');
2234     }
2235    
2236     $self->{onerror}->(node => $input_node,
2237     type => 'datetime:bad timezone hour')
2238     if $7 > 23;
2239     $self->{onerror}->(node => $input_node,
2240     type => 'datetime:bad timezone minute')
2241     if $8 > 59;
2242     }
2243     } else { ## hh:mm
2244     if (length $1 != 2 or length $9 != 2) {
2245     $self->{onerror}->(node => $input_node,
2246     type => qq'dateortime:syntax error');
2247     }
2248    
2249     ($hour, $minute, $second) = ($1, $9, $10);
2250     }
2251    
2252     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2253     if $hour > 23;
2254     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2255     if $minute > 59;
2256    
2257     if (defined $second) { ## s
2258     ## NOTE: Integer part of second don't have to have length of two.
2259    
2260     if (substr ($second, 0, 1) eq '.') {
2261     $self->{onerror}->(node => $input_node,
2262     type => 'dateortime:syntax error');
2263     }
2264    
2265     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2266     if $second >= 60;
2267     }
2268     } else {
2269     $self->{onerror}->(node => $input_node,
2270     type => 'dateortime:syntax error');
2271     }
2272    
2273 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2274 wakaba 1.1 },
2275     };
2276    
2277     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2278 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2279 wakaba 1.40 %HTMLPhrasingContentChecker,
2280     check_attrs => $GetHTMLAttrsChecker->({
2281 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2282     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2283     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2284     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2285     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2286     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2287 wakaba 1.50 }, {
2288     %HTMLAttrStatus,
2289     high => FEATURE_HTML5_DEFAULT,
2290     low => FEATURE_HTML5_DEFAULT,
2291     max => FEATURE_HTML5_DEFAULT,
2292     min => FEATURE_HTML5_DEFAULT,
2293     optimum => FEATURE_HTML5_DEFAULT,
2294     value => FEATURE_HTML5_DEFAULT,
2295 wakaba 1.1 }),
2296     };
2297    
2298     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2299 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2300 wakaba 1.40 %HTMLPhrasingContentChecker,
2301     check_attrs => $GetHTMLAttrsChecker->({
2302 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2303     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2304 wakaba 1.50 }, {
2305     %HTMLAttrStatus,
2306     max => FEATURE_HTML5_DEFAULT,
2307     value => FEATURE_HTML5_DEFAULT,
2308 wakaba 1.1 }),
2309     };
2310    
2311     $Element->{$HTML_NS}->{code} = {
2312 wakaba 1.40 %HTMLPhrasingContentChecker,
2313 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2314     check_attrs => $GetHTMLAttrsChecker->({}, {
2315     %HTMLAttrStatus,
2316     %HTMLM12NCommonAttrStatus,
2317 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2318 wakaba 1.49 }),
2319 wakaba 1.1 };
2320    
2321     $Element->{$HTML_NS}->{var} = {
2322 wakaba 1.40 %HTMLPhrasingContentChecker,
2323 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2324     check_attrs => $GetHTMLAttrsChecker->({}, {
2325     %HTMLAttrStatus,
2326     %HTMLM12NCommonAttrStatus,
2327 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2328 wakaba 1.49 }),
2329 wakaba 1.1 };
2330    
2331     $Element->{$HTML_NS}->{samp} = {
2332 wakaba 1.40 %HTMLPhrasingContentChecker,
2333 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2334     check_attrs => $GetHTMLAttrsChecker->({}, {
2335     %HTMLAttrStatus,
2336     %HTMLM12NCommonAttrStatus,
2337 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2338 wakaba 1.49 }),
2339 wakaba 1.1 };
2340    
2341     $Element->{$HTML_NS}->{kbd} = {
2342 wakaba 1.40 %HTMLPhrasingContentChecker,
2343 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2344     check_attrs => $GetHTMLAttrsChecker->({}, {
2345     %HTMLAttrStatus,
2346     %HTMLM12NCommonAttrStatus,
2347 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2348 wakaba 1.49 }),
2349 wakaba 1.1 };
2350    
2351     $Element->{$HTML_NS}->{sub} = {
2352 wakaba 1.40 %HTMLPhrasingContentChecker,
2353 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2354     check_attrs => $GetHTMLAttrsChecker->({}, {
2355     %HTMLAttrStatus,
2356     %HTMLM12NCommonAttrStatus,
2357 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2358 wakaba 1.49 }),
2359 wakaba 1.1 };
2360    
2361 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
2362 wakaba 1.1
2363     $Element->{$HTML_NS}->{span} = {
2364 wakaba 1.40 %HTMLPhrasingContentChecker,
2365 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2366     check_attrs => $GetHTMLAttrsChecker->({}, {
2367     %HTMLAttrStatus,
2368     %HTMLM12NCommonAttrStatus,
2369     datafld => FEATURE_HTML4_REC_RESERVED,
2370     dataformatas => FEATURE_HTML4_REC_RESERVED,
2371     datasrc => FEATURE_HTML4_REC_RESERVED,
2372 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2373 wakaba 1.49 }),
2374 wakaba 1.1 };
2375    
2376     $Element->{$HTML_NS}->{i} = {
2377 wakaba 1.40 %HTMLPhrasingContentChecker,
2378 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2379     check_attrs => $GetHTMLAttrsChecker->({}, {
2380     %HTMLAttrStatus,
2381     %HTMLM12NCommonAttrStatus,
2382 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2383 wakaba 1.49 }),
2384 wakaba 1.1 };
2385    
2386 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
2387    
2388     $Element->{$HTML_NS}->{tt} = $Element->{$HTML_NS}->{big};
2389    
2390     $Element->{$HTML_NS}->{s} = {
2391 wakaba 1.40 %HTMLPhrasingContentChecker,
2392 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
2393 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
2394     %HTMLAttrStatus,
2395     %HTMLM12NCommonAttrStatus,
2396 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2397 wakaba 1.49 }),
2398 wakaba 1.1 };
2399    
2400 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
2401    
2402     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
2403    
2404 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
2405 wakaba 1.40 %HTMLPhrasingContentChecker,
2406 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2407 wakaba 1.40 check_attrs => sub {
2408     my ($self, $item, $element_state) = @_;
2409 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
2410     %HTMLAttrStatus,
2411 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2412     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2413     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2414 wakaba 1.49 style => FEATURE_XHTML10_REC,
2415 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2416     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2417 wakaba 1.49 })->($self, $item, $element_state);
2418 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
2419     $self->{onerror}->(node => $item->{node},
2420     type => 'attribute missing:dir');
2421 wakaba 1.1 }
2422     },
2423     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2424     };
2425    
2426 wakaba 1.29 =pod
2427    
2428     ## TODO:
2429    
2430     +
2431     + <p>Partly because of the confusion described above, authors are
2432     + strongly recommended to always mark up all paragraphs with the
2433     + <code>p</code> element, and to not have any <code>ins</code> or
2434     + <code>del</code> elements that cross across any <span
2435     + title="paragraph">implied paragraphs</span>.</p>
2436     +
2437     (An informative note)
2438    
2439     <p><code>ins</code> elements should not cross <span
2440     + title="paragraph">implied paragraph</span> boundaries.</p>
2441     (normative)
2442    
2443     + <p><code>del</code> elements should not cross <span
2444     + title="paragraph">implied paragraph</span> boundaries.</p>
2445     (normative)
2446    
2447     =cut
2448    
2449 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2450 wakaba 1.40 %HTMLTransparentChecker,
2451 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2452 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2453 wakaba 1.1 cite => $HTMLURIAttrChecker,
2454     datetime => $HTMLDatetimeAttrChecker,
2455 wakaba 1.49 }, {
2456     %HTMLAttrStatus,
2457     %HTMLM12NCommonAttrStatus,
2458 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2459     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2460     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2461 wakaba 1.1 }),
2462     };
2463    
2464     $Element->{$HTML_NS}->{del} = {
2465 wakaba 1.40 %HTMLTransparentChecker,
2466 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2467 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2468 wakaba 1.1 cite => $HTMLURIAttrChecker,
2469     datetime => $HTMLDatetimeAttrChecker,
2470 wakaba 1.49 }, {
2471     %HTMLAttrStatus,
2472     %HTMLM12NCommonAttrStatus,
2473 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2474     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2475     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2476 wakaba 1.1 }),
2477 wakaba 1.40 check_end => sub {
2478     my ($self, $item, $element_state) = @_;
2479     if ($element_state->{has_significant}) {
2480     ## NOTE: Significantness flag does not propagate.
2481     } elsif ($item->{transparent}) {
2482     #
2483     } else {
2484     $self->{onerror}->(node => $item->{node},
2485     level => $self->{should_level},
2486     type => 'no significant content');
2487     }
2488 wakaba 1.1 },
2489     };
2490    
2491 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
2492 wakaba 1.40 %HTMLProseContentChecker,
2493 wakaba 1.48 status => FEATURE_HTML5_FD,
2494 wakaba 1.53 ## NOTE: legend, Prose | Prose, legend?
2495 wakaba 1.41 check_child_element => sub {
2496     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2497     $child_is_transparent, $element_state) = @_;
2498     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2499     $self->{onerror}->(node => $child_el,
2500     type => 'element not allowed:minus',
2501     level => $self->{must_level});
2502     $element_state->{has_non_legend} = 1;
2503     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2504     #
2505     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
2506     if ($element_state->{has_legend_at_first}) {
2507     $self->{onerror}->(node => $child_el,
2508     type => 'element not allowed:figure legend',
2509     level => $self->{must_level});
2510     } elsif ($element_state->{has_legend}) {
2511     $self->{onerror}->(node => $element_state->{has_legend},
2512     type => 'element not allowed:figure legend',
2513     level => $self->{must_level});
2514     $element_state->{has_legend} = $child_el;
2515     } elsif ($element_state->{has_non_legend}) {
2516     $element_state->{has_legend} = $child_el;
2517     } else {
2518     $element_state->{has_legend_at_first} = 1;
2519 wakaba 1.35 }
2520 wakaba 1.41 delete $element_state->{has_non_legend};
2521     } else {
2522     $HTMLProseContentChecker{check_child_element}->(@_);
2523 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
2524 wakaba 1.41 }
2525     },
2526     check_child_text => sub {
2527     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2528     if ($has_significant) {
2529     $element_state->{has_non_legend} = 1;
2530 wakaba 1.35 }
2531 wakaba 1.41 },
2532     check_end => sub {
2533     my ($self, $item, $element_state) = @_;
2534 wakaba 1.35
2535 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2536     #
2537     } elsif ($element_state->{has_legend}) {
2538     if ($element_state->{has_non_legend}) {
2539     $self->{onerror}->(node => $element_state->{has_legend},
2540 wakaba 1.35 type => 'element not allowed:figure legend',
2541     level => $self->{must_level});
2542     }
2543     }
2544 wakaba 1.41
2545     $HTMLProseContentChecker{check_end}->(@_);
2546     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2547 wakaba 1.35 },
2548     };
2549 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2550 wakaba 1.1
2551     $Element->{$HTML_NS}->{img} = {
2552 wakaba 1.40 %HTMLEmptyChecker,
2553 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2554 wakaba 1.40 check_attrs => sub {
2555     my ($self, $item, $element_state) = @_;
2556 wakaba 1.1 $GetHTMLAttrsChecker->({
2557     alt => sub { }, ## NOTE: No syntactical requirement
2558     src => $HTMLURIAttrChecker,
2559     usemap => $HTMLUsemapAttrChecker,
2560     ismap => sub {
2561 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2562     if (not $self->{flag}->{in_a_href}) {
2563 wakaba 1.15 $self->{onerror}->(node => $attr,
2564 wakaba 1.59 type => 'attribute not allowed:ismap',
2565     level => $self->{must_level});
2566 wakaba 1.1 }
2567 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2568 wakaba 1.1 },
2569     ## TODO: height
2570     ## TODO: width
2571 wakaba 1.49 }, {
2572     %HTMLAttrStatus,
2573     %HTMLM12NCommonAttrStatus,
2574     align => FEATURE_M12N10_REC_DEPRECATED,
2575 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2576 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
2577 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2578 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
2579 wakaba 1.50 ismap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2580     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2581 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2582     name => FEATURE_M12N10_REC_DEPRECATED,
2583 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2584     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2585 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
2586 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2587 wakaba 1.40 })->($self, $item);
2588     unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2589     $self->{onerror}->(node => $item->{node},
2590 wakaba 1.37 type => 'attribute missing:alt',
2591     level => $self->{should_level});
2592 wakaba 1.1 }
2593 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2594     $self->{onerror}->(node => $item->{node},
2595     type => 'attribute missing:src');
2596 wakaba 1.1 }
2597     },
2598     };
2599    
2600     $Element->{$HTML_NS}->{iframe} = {
2601 wakaba 1.40 %HTMLTextChecker,
2602 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2603     ## NOTE: Not part of M12N10 Strict
2604 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2605 wakaba 1.1 src => $HTMLURIAttrChecker,
2606 wakaba 1.49 }, {
2607     %HTMLAttrStatus,
2608     %HTMLM12NCommonAttrStatus,
2609     align => FEATURE_XHTML10_REC,
2610 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2611 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
2612     height => FEATURE_M12N10_REC,
2613 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2614 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2615     marginheight => FEATURE_M12N10_REC,
2616     marginwidth => FEATURE_M12N10_REC,
2617     name => FEATURE_M12N10_REC_DEPRECATED,
2618     scrolling => FEATURE_M12N10_REC,
2619 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2620     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2621 wakaba 1.49 width => FEATURE_M12N10_REC,
2622 wakaba 1.1 }),
2623 wakaba 1.40 };
2624    
2625 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2626 wakaba 1.40 %HTMLEmptyChecker,
2627 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2628 wakaba 1.40 check_attrs => sub {
2629     my ($self, $item, $element_state) = @_;
2630 wakaba 1.1 my $has_src;
2631 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2632 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2633     $attr_ns = '' unless defined $attr_ns;
2634     my $attr_ln = $attr->manakai_local_name;
2635     my $checker;
2636     if ($attr_ns eq '') {
2637     if ($attr_ln eq 'src') {
2638     $checker = $HTMLURIAttrChecker;
2639     $has_src = 1;
2640     } elsif ($attr_ln eq 'type') {
2641     $checker = $HTMLIMTAttrChecker;
2642     } else {
2643     ## TODO: height
2644     ## TODO: width
2645     $checker = $HTMLAttrChecker->{$attr_ln}
2646     || sub { }; ## NOTE: Any local attribute is ok.
2647     }
2648     }
2649     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2650     || $AttrChecker->{$attr_ns}->{''};
2651     if ($checker) {
2652     $checker->($self, $attr);
2653 wakaba 1.50 } elsif ($attr_ns eq '') {
2654 wakaba 1.54 #
2655 wakaba 1.1 } else {
2656     $self->{onerror}->(node => $attr, level => 'unsupported',
2657     type => 'attribute');
2658 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
2659     }
2660    
2661     if ($attr_ns eq '') {
2662     my $status = {
2663     %HTMLAttrStatus,
2664     height => FEATURE_HTML5_DEFAULT,
2665     src => FEATURE_HTML5_DEFAULT,
2666     type => FEATURE_HTML5_DEFAULT,
2667     width => FEATURE_HTML5_DEFAULT,
2668     }->{$attr_ln};
2669     $self->_attr_status_info ($attr, $status) if $status;
2670 wakaba 1.1 }
2671     }
2672    
2673     unless ($has_src) {
2674 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2675 wakaba 1.1 type => 'attribute missing:src');
2676     }
2677     },
2678     };
2679    
2680 wakaba 1.49 ## TODO:
2681     ## {applet} FEATURE_M12N10_REC_DEPRECATED
2682     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
2683    
2684 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
2685 wakaba 1.40 %HTMLTransparentChecker,
2686 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2687 wakaba 1.40 check_attrs => sub {
2688     my ($self, $item, $element_state) = @_;
2689 wakaba 1.1 $GetHTMLAttrsChecker->({
2690     data => $HTMLURIAttrChecker,
2691     type => $HTMLIMTAttrChecker,
2692     usemap => $HTMLUsemapAttrChecker,
2693     ## TODO: width
2694     ## TODO: height
2695 wakaba 1.49 }, {
2696     %HTMLAttrStatus,
2697     %HTMLM12NCommonAttrStatus,
2698     align => FEATURE_XHTML10_REC,
2699     archive => FEATURE_M12N10_REC,
2700     border => FEATURE_XHTML10_REC,
2701     classid => FEATURE_M12N10_REC,
2702     codebase => FEATURE_M12N10_REC,
2703     codetype => FEATURE_M12N10_REC,
2704 wakaba 1.50 data => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2705 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
2706     dataformatas => FEATURE_HTML4_REC_RESERVED,
2707     datasrc => FEATURE_HTML4_REC_RESERVED,
2708     declare => FEATURE_M12N10_REC,
2709 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2710 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
2711 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2712 wakaba 1.49 name => FEATURE_M12N10_REC,
2713     standby => FEATURE_M12N10_REC,
2714 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2715     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2716     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2717 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
2718 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2719 wakaba 1.40 })->($self, $item);
2720     unless ($item->{node}->has_attribute_ns (undef, 'data')) {
2721     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
2722     $self->{onerror}->(node => $item->{node},
2723 wakaba 1.1 type => 'attribute missing:data|type');
2724     }
2725     }
2726     },
2727 wakaba 1.41 ## NOTE: param*, transparent (Prose)
2728     check_child_element => sub {
2729     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2730     $child_is_transparent, $element_state) = @_;
2731     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2732     $self->{onerror}->(node => $child_el,
2733     type => 'element not allowed:minus',
2734     level => $self->{must_level});
2735     $element_state->{has_non_legend} = 1;
2736     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2737     #
2738     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
2739     if ($element_state->{has_non_param}) {
2740     $self->{onerror}->(node => $child_el,
2741     type => 'element not allowed:prose',
2742     level => $self->{must_level});
2743 wakaba 1.39 }
2744 wakaba 1.41 } else {
2745     $HTMLProseContentChecker{check_child_element}->(@_);
2746     $element_state->{has_non_param} = 1;
2747 wakaba 1.39 }
2748 wakaba 1.25 },
2749 wakaba 1.41 check_child_text => sub {
2750     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2751     if ($has_significant) {
2752     $element_state->{has_non_param} = 1;
2753     }
2754 wakaba 1.42 },
2755     check_end => sub {
2756     my ($self, $item, $element_state) = @_;
2757     if ($element_state->{has_significant}) {
2758 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
2759 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
2760     ## NOTE: Transparent.
2761     } else {
2762     $self->{onerror}->(node => $item->{node},
2763     level => $self->{should_level},
2764     type => 'no significant content');
2765     }
2766     },
2767 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2768 wakaba 1.1 };
2769 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
2770     ## What about |<section><object data><style scoped></style>x</object></section>|?
2771     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
2772 wakaba 1.1
2773     $Element->{$HTML_NS}->{param} = {
2774 wakaba 1.40 %HTMLEmptyChecker,
2775 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2776 wakaba 1.40 check_attrs => sub {
2777     my ($self, $item, $element_state) = @_;
2778 wakaba 1.1 $GetHTMLAttrsChecker->({
2779     name => sub { },
2780     value => sub { },
2781 wakaba 1.49 }, {
2782     %HTMLAttrStatus,
2783 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2784     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2785 wakaba 1.49 type => FEATURE_M12N10_REC,
2786 wakaba 1.50 value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2787 wakaba 1.49 valuetype => FEATURE_M12N10_REC,
2788 wakaba 1.40 })->($self, $item);
2789     unless ($item->{node}->has_attribute_ns (undef, 'name')) {
2790     $self->{onerror}->(node => $item->{node},
2791 wakaba 1.1 type => 'attribute missing:name');
2792     }
2793 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
2794     $self->{onerror}->(node => $item->{node},
2795 wakaba 1.1 type => 'attribute missing:value');
2796     }
2797     },
2798     };
2799    
2800     $Element->{$HTML_NS}->{video} = {
2801 wakaba 1.40 %HTMLTransparentChecker,
2802 wakaba 1.48 status => FEATURE_HTML5_LC,
2803 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2804 wakaba 1.1 src => $HTMLURIAttrChecker,
2805     ## TODO: start, loopstart, loopend, end
2806     ## ISSUE: they MUST be "value time offset"s. Value?
2807 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2808 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2809     controls => $GetHTMLBooleanAttrChecker->('controls'),
2810 wakaba 1.59 poster => $HTMLURIAttrChecker,
2811 wakaba 1.42 ## TODO: width, height
2812 wakaba 1.50 }, {
2813     %HTMLAttrStatus,
2814     autoplay => FEATURE_HTML5_LC,
2815     controls => FEATURE_HTML5_LC,
2816     end => FEATURE_HTML5_LC,
2817     height => FEATURE_HTML5_LC,
2818     loopend => FEATURE_HTML5_LC,
2819     loopstart => FEATURE_HTML5_LC,
2820     playcount => FEATURE_HTML5_LC,
2821     poster => FEATURE_HTML5_LC,
2822     src => FEATURE_HTML5_LC,
2823     start => FEATURE_HTML5_LC,
2824     width => FEATURE_HTML5_LC,
2825 wakaba 1.1 }),
2826 wakaba 1.42 check_start => sub {
2827     my ($self, $item, $element_state) = @_;
2828     $element_state->{allow_source}
2829     = not $item->{node}->has_attribute_ns (undef, 'src');
2830     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
2831     ## NOTE: It might be set true by |check_element|.
2832     },
2833     check_child_element => sub {
2834     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2835     $child_is_transparent, $element_state) = @_;
2836     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2837     $self->{onerror}->(node => $child_el,
2838     type => 'element not allowed:minus',
2839     level => $self->{must_level});
2840     delete $element_state->{allow_source};
2841     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2842     #
2843     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
2844 wakaba 1.45 unless ($element_state->{allow_source}) {
2845 wakaba 1.42 $self->{onerror}->(node => $child_el,
2846     type => 'element not allowed:prose',
2847     level => $self->{must_level});
2848     }
2849 wakaba 1.45 $element_state->{has_source} = 1;
2850 wakaba 1.1 } else {
2851 wakaba 1.42 delete $element_state->{allow_source};
2852     $HTMLProseContentChecker{check_child_element}->(@_);
2853     }
2854     },
2855     check_child_text => sub {
2856     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2857     if ($has_significant) {
2858     delete $element_state->{allow_source};
2859     }
2860     $HTMLProseContentChecker{check_child_text}->(@_);
2861     },
2862     check_end => sub {
2863     my ($self, $item, $element_state) = @_;
2864     if ($element_state->{has_source} == -1) {
2865     $self->{onerror}->(node => $item->{node},
2866     type => 'element missing:source',
2867     level => $self->{must_level});
2868 wakaba 1.1 }
2869 wakaba 1.42
2870     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
2871 wakaba 1.1 },
2872     };
2873    
2874     $Element->{$HTML_NS}->{audio} = {
2875 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
2876 wakaba 1.48 status => FEATURE_HTML5_LC,
2877 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
2878     src => $HTMLURIAttrChecker,
2879     ## TODO: start, loopstart, loopend, end
2880     ## ISSUE: they MUST be "value time offset"s. Value?
2881     ## ISSUE: playcount has no conformance creteria
2882     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2883     controls => $GetHTMLBooleanAttrChecker->('controls'),
2884 wakaba 1.50 }, {
2885     %HTMLAttrStatus,
2886     autoplay => FEATURE_HTML5_LC,
2887     controls => FEATURE_HTML5_LC,
2888     end => FEATURE_HTML5_LC,
2889     loopend => FEATURE_HTML5_LC,
2890     loopstart => FEATURE_HTML5_LC,
2891     playcount => FEATURE_HTML5_LC,
2892     src => FEATURE_HTML5_LC,
2893     start => FEATURE_HTML5_LC,
2894 wakaba 1.42 }),
2895 wakaba 1.1 };
2896    
2897     $Element->{$HTML_NS}->{source} = {
2898 wakaba 1.40 %HTMLEmptyChecker,
2899 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2900 wakaba 1.40 check_attrs => sub {
2901     my ($self, $item, $element_state) = @_;
2902 wakaba 1.1 $GetHTMLAttrsChecker->({
2903     src => $HTMLURIAttrChecker,
2904     type => $HTMLIMTAttrChecker,
2905     media => $HTMLMQAttrChecker,
2906 wakaba 1.50 }, {
2907     %HTMLAttrStatus,
2908     media => FEATURE_HTML5_DEFAULT,
2909     src => FEATURE_HTML5_DEFAULT,
2910     type => FEATURE_HTML5_DEFAULT,
2911 wakaba 1.40 })->($self, $item, $element_state);
2912     unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2913     $self->{onerror}->(node => $item->{node},
2914 wakaba 1.1 type => 'attribute missing:src');
2915     }
2916     },
2917     };
2918    
2919     $Element->{$HTML_NS}->{canvas} = {
2920 wakaba 1.40 %HTMLTransparentChecker,
2921 wakaba 1.48 status => FEATURE_HTML5_LC,
2922 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2923 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2924     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2925 wakaba 1.50 }, {
2926     %HTMLAttrStatus,
2927     height => FEATURE_HTML5_LC,
2928     width => FEATURE_HTML5_LC,
2929 wakaba 1.1 }),
2930     };
2931    
2932     $Element->{$HTML_NS}->{map} = {
2933 wakaba 1.40 %HTMLProseContentChecker,
2934 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2935 wakaba 1.40 check_attrs => sub {
2936     my ($self, $item, $element_state) = @_;
2937 wakaba 1.4 my $has_id;
2938     $GetHTMLAttrsChecker->({
2939     id => sub {
2940     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2941     my ($self, $attr) = @_;
2942     my $value = $attr->value;
2943     if (length $value > 0) {
2944     if ($self->{id}->{$value}) {
2945     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2946     push @{$self->{id}->{$value}}, $attr;
2947     } else {
2948     $self->{id}->{$value} = [$attr];
2949     }
2950 wakaba 1.1 } else {
2951 wakaba 1.4 ## NOTE: MUST contain at least one character
2952     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2953 wakaba 1.1 }
2954 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2955     $self->{onerror}->(node => $attr, type => 'space in ID');
2956     }
2957     $self->{map}->{$value} ||= $attr;
2958     $has_id = 1;
2959     },
2960 wakaba 1.49 }, {
2961     %HTMLAttrStatus,
2962 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2963     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2964     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2965     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2966 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
2967 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2968     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2969     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2970     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2971     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2972     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2973     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2974     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2975     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2976     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2977     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2978 wakaba 1.40 })->($self, $item, $element_state);
2979     $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
2980 wakaba 1.4 unless $has_id;
2981     },
2982 wakaba 1.59 check_start => sub {
2983     my ($self, $item, $element_state) = @_;
2984     $element_state->{in_map_original} = $self->{flag}->{in_map};
2985     $self->{flag}->{in_map} = 1;
2986     },
2987     check_end => sub {
2988     my ($self, $item, $element_state) = @_;
2989     delete $self->{flag}->{in_map} unless $element_state->{in_map_original};
2990     $HTMLProseContentChecker{check_end}->(@_);
2991     },
2992 wakaba 1.1 };
2993    
2994     $Element->{$HTML_NS}->{area} = {
2995 wakaba 1.40 %HTMLEmptyChecker,
2996 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2997 wakaba 1.40 check_attrs => sub {
2998     my ($self, $item, $element_state) = @_;
2999 wakaba 1.1 my %attr;
3000     my $coords;
3001 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3002 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3003     $attr_ns = '' unless defined $attr_ns;
3004     my $attr_ln = $attr->manakai_local_name;
3005     my $checker;
3006     if ($attr_ns eq '') {
3007     $checker = {
3008     alt => sub { },
3009     ## NOTE: |alt| value has no conformance creteria.
3010     shape => $GetHTMLEnumeratedAttrChecker->({
3011     circ => -1, circle => 1,
3012     default => 1,
3013     poly => 1, polygon => -1,
3014     rect => 1, rectangle => -1,
3015     }),
3016     coords => sub {
3017     my ($self, $attr) = @_;
3018     my $value = $attr->value;
3019     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
3020     $coords = [split /,/, $value];
3021     } else {
3022     $self->{onerror}->(node => $attr,
3023     type => 'coords:syntax error');
3024     }
3025     },
3026     target => $HTMLTargetAttrChecker,
3027     href => $HTMLURIAttrChecker,
3028     ping => $HTMLSpaceURIsAttrChecker,
3029 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3030 wakaba 1.1 media => $HTMLMQAttrChecker,
3031     hreflang => $HTMLLanguageTagAttrChecker,
3032     type => $HTMLIMTAttrChecker,
3033     }->{$attr_ln};
3034     if ($checker) {
3035     $attr{$attr_ln} = $attr;
3036     } else {
3037     $checker = $HTMLAttrChecker->{$attr_ln};
3038     }
3039     }
3040     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3041     || $AttrChecker->{$attr_ns}->{''};
3042     if ($checker) {
3043     $checker->($self, $attr) if ref $checker;
3044 wakaba 1.49 } elsif ($attr_ns eq '') {
3045 wakaba 1.54 #
3046 wakaba 1.1 } else {
3047     $self->{onerror}->(node => $attr, level => 'unsupported',
3048     type => 'attribute');
3049     ## ISSUE: No comformance createria for unknown attributes in the spec
3050     }
3051 wakaba 1.49
3052     if ($attr_ns eq '') {
3053     $self->_attr_status_info ($attr, {
3054     %HTMLAttrStatus,
3055     %HTMLM12NCommonAttrStatus,
3056     accesskey => FEATURE_M12N10_REC,
3057 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3058     coords => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3059     href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3060 wakaba 1.54 hreflang => FEATURE_HTML5_DEFAULT,
3061 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3062     media => FEATURE_HTML5_DEFAULT,
3063 wakaba 1.49 nohref => FEATURE_M12N10_REC,
3064 wakaba 1.50 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3065     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3066     ping => FEATURE_HTML5_DEFAULT,
3067     rel => FEATURE_HTML5_DEFAULT,
3068     shape => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3069     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3070     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3071     type => FEATURE_HTML5_DEFAULT,
3072 wakaba 1.49 }->{$attr_ln});
3073     }
3074 wakaba 1.1 }
3075    
3076     if (defined $attr{href}) {
3077 wakaba 1.4 $self->{has_hyperlink_element} = 1;
3078 wakaba 1.1 unless (defined $attr{alt}) {
3079 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3080 wakaba 1.1 type => 'attribute missing:alt');
3081     }
3082     } else {
3083     for (qw/target ping rel media hreflang type alt/) {
3084     if (defined $attr{$_}) {
3085     $self->{onerror}->(node => $attr{$_},
3086     type => 'attribute not allowed');
3087     }
3088     }
3089     }
3090    
3091     my $shape = 'rectangle';
3092     if (defined $attr{shape}) {
3093     $shape = {
3094     circ => 'circle', circle => 'circle',
3095     default => 'default',
3096     poly => 'polygon', polygon => 'polygon',
3097     rect => 'rectangle', rectangle => 'rectangle',
3098     }->{lc $attr{shape}->value} || 'rectangle';
3099     ## TODO: ASCII lowercase?
3100     }
3101    
3102     if ($shape eq 'circle') {
3103     if (defined $attr{coords}) {
3104     if (defined $coords) {
3105     if (@$coords == 3) {
3106     if ($coords->[2] < 0) {
3107     $self->{onerror}->(node => $attr{coords},
3108     type => 'coords:out of range:2');
3109     }
3110     } else {
3111     $self->{onerror}->(node => $attr{coords},
3112     type => 'coords:number:3:'.@$coords);
3113     }
3114     } else {
3115     ## NOTE: A syntax error has been reported.
3116     }
3117     } else {
3118 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3119 wakaba 1.1 type => 'attribute missing:coords');
3120     }
3121     } elsif ($shape eq 'default') {
3122     if (defined $attr{coords}) {
3123     $self->{onerror}->(node => $attr{coords},
3124     type => 'attribute not allowed');
3125     }
3126     } elsif ($shape eq 'polygon') {
3127     if (defined $attr{coords}) {
3128     if (defined $coords) {
3129     if (@$coords >= 6) {
3130     unless (@$coords % 2 == 0) {
3131     $self->{onerror}->(node => $attr{coords},
3132     type => 'coords:number:even:'.@$coords);
3133     }
3134     } else {
3135     $self->{onerror}->(node => $attr{coords},
3136     type => 'coords:number:>=6:'.@$coords);
3137     }
3138     } else {
3139     ## NOTE: A syntax error has been reported.
3140     }
3141     } else {
3142 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3143 wakaba 1.1 type => 'attribute missing:coords');
3144     }
3145     } elsif ($shape eq 'rectangle') {
3146     if (defined $attr{coords}) {
3147     if (defined $coords) {
3148     if (@$coords == 4) {
3149     unless ($coords->[0] < $coords->[2]) {
3150     $self->{onerror}->(node => $attr{coords},
3151     type => 'coords:out of range:0');
3152     }
3153     unless ($coords->[1] < $coords->[3]) {
3154     $self->{onerror}->(node => $attr{coords},
3155     type => 'coords:out of range:1');
3156     }
3157     } else {
3158     $self->{onerror}->(node => $attr{coords},
3159     type => 'coords:number:4:'.@$coords);
3160     }
3161     } else {
3162     ## NOTE: A syntax error has been reported.
3163     }
3164     } else {
3165 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3166 wakaba 1.1 type => 'attribute missing:coords');
3167     }
3168     }
3169     },
3170 wakaba 1.59 check_start => sub {
3171     my ($self, $item, $element_state) = @_;
3172     unless ($self->{flag}->{in_map} or
3173     not $item->{node}->manakai_parent_element) {
3174     $self->{onerror}->(node => $item->{node},
3175     type => 'element not allowed:area',
3176     level => $self->{must_level});
3177     }
3178     },
3179 wakaba 1.1 };
3180    
3181     $Element->{$HTML_NS}->{table} = {
3182 wakaba 1.40 %HTMLChecker,
3183 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3184     check_attrs => $GetHTMLAttrsChecker->({}, {
3185     %HTMLAttrStatus,
3186     %HTMLM12NCommonAttrStatus,
3187     align => FEATURE_M12N10_REC_DEPRECATED,
3188     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3189     border => FEATURE_M12N10_REC,
3190     cellpadding => FEATURE_M12N10_REC,
3191     cellspacing => FEATURE_M12N10_REC,
3192     datafld => FEATURE_HTML4_REC_RESERVED,
3193     dataformatas => FEATURE_HTML4_REC_RESERVED,
3194     datapagesize => FEATURE_M12N10_REC,
3195     datasrc => FEATURE_HTML4_REC_RESERVED,
3196     frame => FEATURE_M12N10_REC,
3197 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3198 wakaba 1.49 rules => FEATURE_M12N10_REC,
3199     summary => FEATURE_M12N10_REC,
3200     width => FEATURE_M12N10_REC,
3201     }),
3202 wakaba 1.40 check_start => sub {
3203     my ($self, $item, $element_state) = @_;
3204     $element_state->{phase} = 'before caption';
3205     },
3206     check_child_element => sub {
3207     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3208     $child_is_transparent, $element_state) = @_;
3209     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3210     $self->{onerror}->(node => $child_el,
3211     type => 'element not allowed:minus',
3212     level => $self->{must_level});
3213     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3214     #
3215     } elsif ($element_state->{phase} eq 'in tbodys') {
3216     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3217     #$element_state->{phase} = 'in tbodys';
3218     } elsif (not $element_state->{has_tfoot} and
3219     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3220     $element_state->{phase} = 'after tfoot';
3221     $element_state->{has_tfoot} = 1;
3222     } else {
3223     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3224     }
3225     } elsif ($element_state->{phase} eq 'in trs') {
3226     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3227     #$element_state->{phase} = 'in trs';
3228     } elsif (not $element_state->{has_tfoot} and
3229     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3230     $element_state->{phase} = 'after tfoot';
3231     $element_state->{has_tfoot} = 1;
3232     } else {
3233     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3234     }
3235     } elsif ($element_state->{phase} eq 'after thead') {
3236     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3237     $element_state->{phase} = 'in tbodys';
3238     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3239     $element_state->{phase} = 'in trs';
3240     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3241     $element_state->{phase} = 'in tbodys';
3242     $element_state->{has_tfoot} = 1;
3243     } else {
3244     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3245     }
3246     } elsif ($element_state->{phase} eq 'in colgroup') {
3247     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3248     $element_state->{phase} = 'in colgroup';
3249     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3250     $element_state->{phase} = 'after thead';
3251     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3252     $element_state->{phase} = 'in tbodys';
3253     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3254     $element_state->{phase} = 'in trs';
3255     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3256     $element_state->{phase} = 'in tbodys';
3257     $element_state->{has_tfoot} = 1;
3258     } else {
3259     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3260     }
3261     } elsif ($element_state->{phase} eq 'before caption') {
3262     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
3263     $element_state->{phase} = 'in colgroup';
3264     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3265     $element_state->{phase} = 'in colgroup';
3266     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3267     $element_state->{phase} = 'after thead';
3268     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3269     $element_state->{phase} = 'in tbodys';
3270     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3271     $element_state->{phase} = 'in trs';
3272     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3273     $element_state->{phase} = 'in tbodys';
3274     $element_state->{has_tfoot} = 1;
3275     } else {
3276     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3277     }
3278     } elsif ($element_state->{phase} eq 'after tfoot') {
3279     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3280     } else {
3281     die "check_child_element: Bad |table| phase: $element_state->{phase}";
3282     }
3283     },
3284     check_child_text => sub {
3285     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3286     if ($has_significant) {
3287     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3288 wakaba 1.1 }
3289 wakaba 1.40 },
3290     check_end => sub {
3291     my ($self, $item, $element_state) = @_;
3292 wakaba 1.1
3293     ## Table model errors
3294     require Whatpm::HTMLTable;
3295 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
3296 wakaba 1.1 my %opt = @_;
3297     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
3298     });
3299 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
3300 wakaba 1.1
3301 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3302 wakaba 1.1 },
3303     };
3304    
3305     $Element->{$HTML_NS}->{caption} = {
3306 wakaba 1.40 %HTMLPhrasingContentChecker,
3307 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3308     check_attrs => $GetHTMLAttrsChecker->({}, {
3309     %HTMLAttrStatus,
3310     %HTMLM12NCommonAttrStatus,
3311     align => FEATURE_M12N10_REC_DEPRECATED,
3312 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3313 wakaba 1.49 }),
3314 wakaba 1.1 };
3315    
3316     $Element->{$HTML_NS}->{colgroup} = {
3317 wakaba 1.40 %HTMLEmptyChecker,
3318 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3319 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3320 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3321     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
3322     ## TODO: "attribute not supported" if |col|.
3323     ## ISSUE: MUST NOT if any |col|?
3324     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
3325 wakaba 1.49 }, {
3326     %HTMLAttrStatus,
3327     %HTMLM12NCommonAttrStatus,
3328     align => FEATURE_M12N10_REC,
3329     char => FEATURE_M12N10_REC,
3330     charoff => FEATURE_M12N10_REC,
3331 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3332     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3333 wakaba 1.49 valign => FEATURE_M12N10_REC,
3334     width => FEATURE_M12N10_REC,
3335 wakaba 1.1 }),
3336 wakaba 1.40 check_child_element => sub {
3337     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3338     $child_is_transparent, $element_state) = @_;
3339     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3340     $self->{onerror}->(node => $child_el,
3341     type => 'element not allowed:minus',
3342     level => $self->{must_level});
3343     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3344     #
3345     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
3346     #
3347     } else {
3348     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3349     }
3350     },
3351     check_child_text => sub {
3352     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3353     if ($has_significant) {
3354     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3355 wakaba 1.1 }
3356     },
3357     };
3358    
3359     $Element->{$HTML_NS}->{col} = {
3360 wakaba 1.40 %HTMLEmptyChecker,
3361 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3362 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3363 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3364 wakaba 1.49 }, {
3365     %HTMLAttrStatus,
3366     %HTMLM12NCommonAttrStatus,
3367     align => FEATURE_M12N10_REC,
3368     char => FEATURE_M12N10_REC,
3369     charoff => FEATURE_M12N10_REC,
3370 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3371     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3372 wakaba 1.49 valign => FEATURE_M12N10_REC,
3373     width => FEATURE_M12N10_REC,
3374 wakaba 1.1 }),
3375     };
3376    
3377     $Element->{$HTML_NS}->{tbody} = {
3378 wakaba 1.40 %HTMLChecker,
3379 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3380     check_attrs => $GetHTMLAttrsChecker->({}, {
3381     %HTMLAttrStatus,
3382     %HTMLM12NCommonAttrStatus,
3383     align => FEATURE_M12N10_REC,
3384     char => FEATURE_M12N10_REC,
3385     charoff => FEATURE_M12N10_REC,
3386 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3387 wakaba 1.49 valign => FEATURE_M12N10_REC,
3388     }),
3389 wakaba 1.40 check_child_element => sub {
3390     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3391     $child_is_transparent, $element_state) = @_;
3392     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3393     $self->{onerror}->(node => $child_el,
3394     type => 'element not allowed:minus',
3395     level => $self->{must_level});
3396     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3397     #
3398     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3399     $element_state->{has_tr} = 1;
3400     } else {
3401     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3402     }
3403     },
3404     check_child_text => sub {
3405     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3406     if ($has_significant) {
3407     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3408 wakaba 1.1 }
3409 wakaba 1.40 },
3410     check_end => sub {
3411     my ($self, $item, $element_state) = @_;
3412     unless ($element_state->{has_tr}) {
3413     $self->{onerror}->(node => $item->{node},
3414     type => 'child element missing:tr');
3415 wakaba 1.1 }
3416 wakaba 1.40
3417     $HTMLChecker{check_end}->(@_);
3418 wakaba 1.1 },
3419     };
3420    
3421     $Element->{$HTML_NS}->{thead} = {
3422 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3423 wakaba 1.1 };
3424    
3425     $Element->{$HTML_NS}->{tfoot} = {
3426 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3427 wakaba 1.1 };
3428    
3429     $Element->{$HTML_NS}->{tr} = {
3430 wakaba 1.40 %HTMLChecker,
3431 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3432     check_attrs => $GetHTMLAttrsChecker->({}, {
3433     %HTMLAttrStatus,
3434     %HTMLM12NCommonAttrStatus,
3435     align => FEATURE_M12N10_REC,
3436     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3437     char => FEATURE_M12N10_REC,
3438     charoff => FEATURE_M12N10_REC,
3439 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3440 wakaba 1.49 valign => FEATURE_M12N10_REC,
3441     }),
3442 wakaba 1.40 check_child_element => sub {
3443     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3444     $child_is_transparent, $element_state) = @_;
3445     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3446     $self->{onerror}->(node => $child_el,
3447     type => 'element not allowed:minus',
3448     level => $self->{must_level});
3449     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3450     #
3451     } elsif ($child_nsuri eq $HTML_NS and
3452     ($child_ln eq 'td' or $child_ln eq 'th')) {
3453     $element_state->{has_cell} = 1;
3454     } else {
3455     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3456     }
3457     },
3458     check_child_text => sub {
3459     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3460     if ($has_significant) {
3461     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3462 wakaba 1.1 }
3463 wakaba 1.40 },
3464     check_end => sub {
3465     my ($self, $item, $element_state) = @_;
3466     unless ($element_state->{has_cell}) {
3467     $self->{onerror}->(node => $item->{node},
3468     type => 'child element missing:td|th');
3469 wakaba 1.1 }
3470 wakaba 1.40
3471     $HTMLChecker{check_end}->(@_);
3472 wakaba 1.1 },
3473     };
3474    
3475     $Element->{$HTML_NS}->{td} = {
3476 wakaba 1.40 %HTMLProseContentChecker,
3477 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3478 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3479 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3480     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3481 wakaba 1.49 }, {
3482     %HTMLAttrStatus,
3483     %HTMLM12NCommonAttrStatus,
3484     abbr => FEATURE_M12N10_REC,
3485     align => FEATURE_M12N10_REC,
3486     axis => FEATURE_M12N10_REC,
3487     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3488     char => FEATURE_M12N10_REC,
3489     charoff => FEATURE_M12N10_REC,
3490 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3491 wakaba 1.49 headers => FEATURE_M12N10_REC,
3492     height => FEATURE_M12N10_REC_DEPRECATED,
3493 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3494 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3495 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3496 wakaba 1.49 scope => FEATURE_M12N10_REC,
3497     valign => FEATURE_M12N10_REC,
3498     width => FEATURE_M12N10_REC_DEPRECATED,
3499 wakaba 1.1 }),
3500     };
3501    
3502     $Element->{$HTML_NS}->{th} = {
3503 wakaba 1.40 %HTMLPhrasingContentChecker,
3504 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3505 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3506 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3507     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3508     scope => $GetHTMLEnumeratedAttrChecker
3509     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3510 wakaba 1.49 }, {
3511     %HTMLAttrStatus,
3512     %HTMLM12NCommonAttrStatus,
3513     abbr => FEATURE_M12N10_REC,
3514     align => FEATURE_M12N10_REC,
3515     axis => FEATURE_M12N10_REC,
3516     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3517     char => FEATURE_M12N10_REC,
3518     charoff => FEATURE_M12N10_REC,
3519 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3520 wakaba 1.49 headers => FEATURE_M12N10_REC,
3521     height => FEATURE_M12N10_REC_DEPRECATED,
3522 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3523 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3524 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3525     scope => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3526 wakaba 1.49 valign => FEATURE_M12N10_REC,
3527     width => FEATURE_M12N10_REC_DEPRECATED,
3528 wakaba 1.1 }),
3529     };
3530    
3531 wakaba 1.52 my $AttrCheckerNotImplemented = sub {
3532     my ($self, $attr) = @_;
3533     $self->{onerror}->(node => $attr, level => 'unsupported',
3534     type => 'attribute');
3535     };
3536    
3537     $Element->{$HTML_NS}->{form} = {
3538 wakaba 1.56 %HTMLProseContentChecker, ## NOTE: Flow* [WF2]
3539     ## TODO: form in form is allowed in XML [WF2]
3540 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3541     check_attrs => $GetHTMLAttrsChecker->({
3542 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3543 wakaba 1.52 'accept-charset' => $AttrCheckerNotImplemented, ## TODO: Charsets
3544     action => $HTMLURIAttrChecker, ## TODO: "User agent behavior for a value other than HTTP URI is undefined" [HTML4]
3545 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3546     enctype => $HTMLIMTAttrChecker, ## TODO: "multipart/form-data" should be used when type=file is used [HTML4] ## TODO: MUST NOT parameter [WF2]
3547     method => $GetHTMLEnumeratedAttrChecker->({
3548     get => 1, post => 1, put => 1, delete => 1,
3549     }),
3550 wakaba 1.52 ## NOTE: "get" SHOULD be used for idempotent submittion,
3551     ## "post" SHOULD be used otherwise [HTML4]. This cannot be tested.
3552     name => sub { }, # CDATA in HTML4 ## TODO: must be same as |id| (informative!) [XHTML10]
3553 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
3554     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3555 wakaba 1.52 target => $HTMLTargetAttrChecker,
3556     ## TODO: Warn for combination whose behavior is not defined.
3557     }, {
3558     %HTMLAttrStatus,
3559     %HTMLM12NCommonAttrStatus,
3560 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3561 wakaba 1.52 'accept-charset' => FEATURE_M12N10_REC,
3562 wakaba 1.56 action => FEATURE_WF2 | FEATURE_M12N10_REC,
3563     data => FEATURE_WF2,
3564     enctype => FEATURE_WF2 | FEATURE_M12N10_REC,
3565 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3566 wakaba 1.56 method => FEATURE_WF2 | FEATURE_M12N10_REC,
3567 wakaba 1.52 name => FEATURE_M12N10_REC_DEPRECATED,
3568 wakaba 1.56 onreceived => FEATURE_WF2,
3569 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3570     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3571 wakaba 1.56 replace => FEATURE_WF2,
3572 wakaba 1.52 target => FEATURE_M12N10_REC,
3573     }),
3574     ## TODO: Tests
3575     ## TODO: Tests for <nest/> in <form>
3576     };
3577    
3578     $Element->{$HTML_NS}->{fieldset} = {
3579     %HTMLProseContentChecker, ## NOTE: legend, %Flow; ## TODO: legend
3580     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3581 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3582     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3583     ## TODO: form [WF2]
3584     }, {
3585 wakaba 1.52 %HTMLAttrStatus,
3586     %HTMLM12NCommonAttrStatus,
3587 wakaba 1.56 disabled => FEATURE_WF2,
3588     form => FEATURE_WF2,
3589 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3590     }),
3591     ## TODO: Tests
3592     ## TODO: Tests for <nest/> in <fieldset>
3593     };
3594    
3595     $Element->{$HTML_NS}->{input} = {
3596 wakaba 1.56 %HTMLEmptyChecker, ## MUST [WF2]
3597 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3598     check_attrs => $GetHTMLAttrsChecker->({
3599 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3600 wakaba 1.52 accesskey => $AttrCheckerNotImplemented, ## TODO: Character
3601     ## TODO: "Note. Authors should consider the input method of the expected reader when specifying an accesskey." [HTML4]
3602     ## "We recommend that authors include the access key in label text or wherever the access key is to apply." [HTML4]
3603 wakaba 1.56 action => $HTMLURIAttrChecker,
3604 wakaba 1.52 align => $GetHTMLEnumeratedAttrChecker->({
3605     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
3606     }),
3607     alt => sub {}, ## NOTE: Text [M12N] ## TODO: |alt| should be provided for |type=image| [HTML4]
3608     ## NOTE: HTML4 has a "should" for accessibility, which cannot be tested
3609     ## here.
3610 wakaba 1.56 autocomplete => $GetHTMLEnumeratedAttrChecker->({on => 1, off => 1}),
3611     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3612 wakaba 1.52 checked => $GetHTMLBooleanAttrChecker->('checked'),
3613     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3614 wakaba 1.56 enctype => $HTMLIMTAttrChecker,
3615     ## TODO: form [WF2]
3616     ## TODO: inputmode [WF2]
3617 wakaba 1.52 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
3618 wakaba 1.56 ## TODO: list [WF2]
3619     ## TODO: max [WF2]
3620 wakaba 1.52 maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3621 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
3622     get => 1, post => 1, put => 1, delete => 1,
3623     }),
3624     ## TODO: min [WF2]
3625 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3626     readonly => $GetHTMLBooleanAttrChecker->('readonly'),
3627 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3628     required => $GetHTMLBooleanAttrChecker->('required'),
3629 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3630     src => $HTMLURIAttrChecker,
3631 wakaba 1.56 ## TODO: step [WF2]
3632     target => $HTMLTargetAttrChecker,
3633     ## TODO: template
3634 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
3635     text => 1, password => 1, checkbox => 1, radio => 1, submit => 1,
3636     reset => 1, file => 1, hidden => 1, image => 1, button => 1,
3637 wakaba 1.56 ## [WF2]
3638     datatime => 1, 'datetime-local' => 1, date => 1, month => 1, week => 1,
3639     time => 1, number => 1, range => 1, email => 1, url => 1,
3640     add => 1, remove => 1, 'move-up' => 1, 'move-down' => 1,
3641 wakaba 1.52 }),
3642     usemap => $HTMLUsemapAttrChecker,
3643 wakaba 1.56 value => sub {}, ## NOTE: CDATA [M12N] ## TODO: "optional except when the type attribute has the value "radio" or "checkbox"" [HTML4] ## TODO: constraints [WF2]
3644     ## TODO: "authors should ensure that in each set of radio buttons that one is initially "on"." [HTML4] [WF2]
3645 wakaba 1.52 }, {
3646     %HTMLAttrStatus,
3647     %HTMLM12NCommonAttrStatus,
3648 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3649 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
3650 wakaba 1.56 action => FEATURE_WF2,
3651 wakaba 1.52 align => FEATURE_M12N10_REC_DEPRECATED,
3652     alt => FEATURE_M12N10_REC,
3653 wakaba 1.56 autocomplete => FEATURE_WF2,
3654     autofocus => FEATURE_WF2,
3655 wakaba 1.52 checked => FEATURE_M12N10_REC,
3656     datafld => FEATURE_HTML4_REC_RESERVED,
3657     dataformatas => FEATURE_HTML4_REC_RESERVED,
3658     datasrc => FEATURE_HTML4_REC_RESERVED,
3659 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3660     form => FEATURE_WF2,
3661     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
3662 wakaba 1.52 ismap => FEATURE_M12N10_REC,
3663     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3664 wakaba 1.56 list => FEATURE_WF2,
3665     max => FEATURE_WF2,
3666     maxlength => FEATURE_WF2 | FEATURE_M12N10_REC,
3667     method => FEATURE_WF2,
3668     min => FEATURE_WF2,
3669 wakaba 1.52 name => FEATURE_M12N10_REC,
3670     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3671     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3672     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3673     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3674 wakaba 1.56 readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
3675     required => FEATURE_WF2,
3676     size => FEATURE_WF2_DEPRECATED | FEATURE_M12N10_REC,
3677 wakaba 1.52 src => FEATURE_M12N10_REC,
3678 wakaba 1.56 step => FEATURE_WF2,
3679 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3680 wakaba 1.56 template => FEATURE_WF2,
3681 wakaba 1.52 type => FEATURE_M12N10_REC,
3682     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
3683     value => FEATURE_M12N10_REC,
3684     }),
3685     ## TODO: Tests
3686     ## TODO: Tests for <nest/> in <input>
3687     };
3688    
3689 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
3690    
3691 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
3692     %HTMLProseContentChecker, ## NOTE: %Flow; - something [XHTML10]
3693     ## TODO: -A|%formctrl;|form|fieldset [HTML4]
3694     ## TODO: image map (img) in |button| is "illegal" [HTML4].
3695     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3696     check_attrs => $GetHTMLAttrsChecker->({
3697     accesskey => $AttrCheckerNotImplemented, ## TODO: Character
3698 wakaba 1.56 action => $HTMLURIAttrChecker,
3699     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3700 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3701 wakaba 1.56 ## TODO: form [WF2]
3702     method => $GetHTMLEnumeratedAttrChecker->({
3703     get => 1, post => 1, put => 1, delete => 1,
3704     }),
3705 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3706 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
3707     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3708     target => $HTMLTargetAttrChecker,
3709     ## TODO: template [WF2]
3710 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
3711     button => 1, submit => 1, reset => 1,
3712     }),
3713     value => sub {}, ## NOTE: CDATA [M12N]
3714     }, {
3715     %HTMLAttrStatus,
3716     %HTMLM12NCommonAttrStatus,
3717     accesskey => FEATURE_M12N10_REC,
3718 wakaba 1.56 action => FEATURE_WF2,
3719     autofocus => FEATURE_WF2,
3720 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
3721     dataformatas => FEATURE_HTML4_REC_RESERVED,
3722     datasrc => FEATURE_HTML4_REC_RESERVED,
3723 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3724     enctype => FEATURE_WF2,
3725     form => FEATURE_WF2,
3726 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3727 wakaba 1.56 method => FEATURE_WF2,
3728 wakaba 1.52 name => FEATURE_M12N10_REC,
3729     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3730     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3731 wakaba 1.56 oninvalid => FEATURE_WF2,
3732     replace => FEATURE_WF2,
3733 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3734 wakaba 1.56 target => FEATURE_WF2,
3735     template => FEATURE_WF2,
3736 wakaba 1.52 type => FEATURE_M12N10_REC,
3737     value => FEATURE_M12N10_REC,
3738     }),
3739     ## TODO: Tests
3740     ## TODO: Tests for <nest/> in <button>
3741     };
3742    
3743     $Element->{$HTML_NS}->{label} = {
3744     %HTMLPhrasingContentChecker, ## NOTE: %Inline - label [XHTML10] ## TODO: -label
3745 wakaba 1.56 ## TODO: At most one form control [WF2]
3746 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3747     check_attrs => $GetHTMLAttrsChecker->({
3748     accesskey => $AttrCheckerNotImplemented, ## TODO: Charcter
3749     for => $AttrCheckerNotImplemented, ## TODO: IDREF ## TODO: Must be |id| of control [HTML4] ## TODO: Or, "may only contain one control element"
3750     }, {
3751     %HTMLAttrStatus,
3752     %HTMLM12NCommonAttrStatus,
3753 wakaba 1.56 accesskey => FEATURE_WF2 | FEATURE_M12N10_REC,
3754 wakaba 1.52 for => FEATURE_M12N10_REC,
3755     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3756     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3757     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3758     }),
3759     ## TODO: Tests
3760     ## TODO: Tests for <nest/> in <label>
3761     };
3762    
3763     $Element->{$HTML_NS}->{select} = {
3764 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (optgroup|option)* [HTML4] + [WF2] ## TODO: SHOULD avoid empty and visible [WF2]
3765 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
3766     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
3767     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3768 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
3769 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
3770 wakaba 1.56 ## TODO: accesskey [WF2]
3771     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3772 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3773 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3774     ## TODO: form [WF2]
3775 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3776     name => sub {}, ## NOTE: CDATA [M12N]
3777 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
3778     ## TODO: pattern [WF2] ## TODO: |title| semantics
3779 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3780     }, {
3781     %HTMLAttrStatus,
3782     %HTMLM12NCommonAttrStatus,
3783 wakaba 1.56 accesskey => FEATURE_WF2,
3784     autofocus => FEATURE_WF2,
3785     data => FEATURE_WF2,
3786 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
3787     dataformatas => FEATURE_HTML4_REC_RESERVED,
3788     datasrc => FEATURE_HTML4_REC_RESERVED,
3789 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3790     form => FEATURE_WF2,
3791 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3792     multiple => FEATURE_M12N10_REC,
3793     name => FEATURE_M12N10_REC,
3794     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3795     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3796     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3797 wakaba 1.56 oninvalid => FEATURE_WF2,
3798     pattern => FEATURE_WF2,
3799 wakaba 1.52 size => FEATURE_M12N10_REC,
3800     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3801     }),
3802     ## TODO: Tests
3803     ## TODO: Tests for <nest/> in <select>
3804     };
3805 wakaba 1.1
3806 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
3807 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (transparent | option)*
3808     ## TODO: |option| child MUST be empty [WF2]
3809 wakaba 1.52 status => FEATURE_WF2,
3810 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3811     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3812     }, {
3813 wakaba 1.52 %HTMLAttrStatus,
3814 wakaba 1.56 data => FEATURE_WF2,
3815 wakaba 1.52 }),
3816     ## TODO: Tests
3817     ## TODO: Tests for <nest/> in <datalist>
3818     };
3819 wakaba 1.49
3820 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
3821 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (option|optgroup)* [HTML4] + [WF2] SHOULD avoid empty and visible [WF2]
3822 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3823     check_attrs => $GetHTMLAttrsChecker->({
3824     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3825     label => sub {}, ## NOTE: Text [M12N] ## TODO: required
3826     }, {
3827     %HTMLAttrStatus,
3828     %HTMLM12NCommonAttrStatus,
3829 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3830 wakaba 1.52 label => FEATURE_M12N10_REC,
3831     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3832     }),
3833     ## TODO: Tests
3834     ## TODO: Tests for <nest/> in <optgroup>
3835     };
3836    
3837     $Element->{$HTML_NS}->{option} = {
3838     %HTMLTextChecker,
3839     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3840     check_attrs => $GetHTMLAttrsChecker->({
3841     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3842     label => sub {}, ## NOTE: Text [M12N]
3843     selected => $GetHTMLBooleanAttrChecker->('selected'),
3844     value => sub {}, ## NOTE: CDATA [M12N]
3845     }, {
3846     %HTMLAttrStatus,
3847     %HTMLM12NCommonAttrStatus,
3848 wakaba 1.56 disabled => FEATURE_WF2, FEATURE_M12N10_REC,
3849 wakaba 1.52 label => FEATURE_M12N10_REC,
3850     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3851     selected => FEATURE_M12N10_REC,
3852     value => FEATURE_M12N10_REC,
3853     }),
3854     ## TODO: Tests
3855     ## TODO: Tests for <nest/> in <option>
3856     };
3857 wakaba 1.49
3858 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
3859     %HTMLTextChecker,
3860     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3861     check_attrs => $GetHTMLAttrsChecker->({
3862 wakaba 1.56 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type
3863 wakaba 1.52 accesskey => $AttrCheckerNotImplemented, ## TODO: Character
3864 wakaba 1.56 autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3865     cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## TODO: SHOULD if wrap=hard [WF2]
3866 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3867 wakaba 1.56 ## TODO: form [WF2]
3868     ## TODO: inputmode [WF2]
3869     maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3870 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3871 wakaba 1.56 ## TODO: pattern [WF2] ## TODO: |title| special semantics
3872 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
3873 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
3874     rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3875     oninvalid => $HTMLEventHandlerAttrChecker,
3876     wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
3877 wakaba 1.52 }, {
3878     %HTMLAttrStatus,
3879     %HTMLM12NCommonAttrStatus,
3880 wakaba 1.56 accept => FEATURE_WF2,
3881 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
3882 wakaba 1.56 autofocus => FEATURE_WF2,
3883 wakaba 1.52 cols => FEATURE_M12N10_REC,
3884     datafld => FEATURE_HTML4_REC_RESERVED,
3885 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
3886     datasrc => FEATURE_HTML4_REC_RESERVED,
3887 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3888     form => FEATURE_WF2,
3889     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
3890 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3891 wakaba 1.56 maxlength => FEATURE_WF2,
3892 wakaba 1.52 name => FEATURE_M12N10_REC,
3893     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3894     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3895     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3896 wakaba 1.56 oninvalid => FEATURE_WF2,
3897 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3898 wakaba 1.56 pattern => FEATURE_WF2,
3899     readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
3900     required => FEATURE_WF2,
3901 wakaba 1.52 rows => FEATURE_M12N10_REC,
3902     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3903 wakaba 1.56 wrap => FEATURE_WF2,
3904 wakaba 1.52 }),
3905     ## TODO: Tests
3906     ## TODO: Tests for <nest/> in <textarea>
3907     };
3908 wakaba 1.49
3909 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
3910 wakaba 1.56 %HTMLPhrasingContentChecker, ## Inline [WF2]
3911 wakaba 1.52 status => FEATURE_WF2,
3912 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3913     ## TODO: for [WF2]
3914     ## TODO: form [WF2]
3915     ## TODO: name [WF2]
3916     ## onformchange[WF2]
3917     ## onforminput[WF2]
3918     }, {
3919 wakaba 1.52 %HTMLAttrStatus,
3920 wakaba 1.56 for => FEATURE_WF2,
3921     form => FEATURE_WF2,
3922     name => FEATURE_WF2,
3923     onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
3924     onformchange => FEATURE_WF2,
3925     onforminput => FEATURE_WF2,
3926 wakaba 1.52 }),
3927     ## TODO: Tests
3928     ## TODO: Tests for <nest/> in <output>
3929 wakaba 1.56 ## NOTE: "The output element should be used when ..." [WF2]
3930 wakaba 1.52 };
3931    
3932     ## TODO: repetition template
3933    
3934     $Element->{$HTML_NS}->{isindex} = {
3935     %HTMLEmptyChecker,
3936 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
3937     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
3938 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
3939     prompt => sub {}, ## NOTE: Text [M12N]
3940     }, {
3941     %HTMLAttrStatus,
3942     class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3943     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3944     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3945     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3946     prompt => FEATURE_M12N10_REC_DEPRECATED,
3947     style => FEATURE_XHTML10_REC,
3948     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3949     }),
3950     ## TODO: Tests
3951     ## TODO: Tests for <nest/> in <isindex>
3952     };
3953 wakaba 1.49
3954 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
3955 wakaba 1.40 %HTMLChecker,
3956 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3957 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3958 wakaba 1.1 src => $HTMLURIAttrChecker,
3959     defer => $GetHTMLBooleanAttrChecker->('defer'),
3960     async => $GetHTMLBooleanAttrChecker->('async'),
3961     type => $HTMLIMTAttrChecker,
3962 wakaba 1.49 }, {
3963     %HTMLAttrStatus,
3964     %HTMLM12NCommonAttrStatus,
3965 wakaba 1.50 async => FEATURE_HTML5_DEFAULT,
3966 wakaba 1.49 charset => FEATURE_M12N10_REC,
3967 wakaba 1.50 defer => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3968 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
3969     for => FEATURE_HTML4_REC_RESERVED,
3970 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3971 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
3972 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3973     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3974 wakaba 1.9 }),
3975 wakaba 1.40 check_start => sub {
3976     my ($self, $item, $element_state) = @_;
3977 wakaba 1.1
3978 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
3979     $element_state->{must_be_empty} = 1;
3980 wakaba 1.1 } else {
3981     ## NOTE: No content model conformance in HTML5 spec.
3982 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
3983     my $language = $item->{node}->get_attribute_ns (undef, 'language');
3984 wakaba 1.1 if ((defined $type and $type eq '') or
3985     (defined $language and $language eq '')) {
3986     $type = 'text/javascript';
3987     } elsif (defined $type) {
3988     #
3989     } elsif (defined $language) {
3990     $type = 'text/' . $language;
3991     } else {
3992     $type = 'text/javascript';
3993     }
3994 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
3995     }
3996     },
3997     check_child_element => sub {
3998     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3999     $child_is_transparent, $element_state) = @_;
4000     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4001     $self->{onerror}->(node => $child_el,
4002     type => 'element not allowed:minus',
4003     level => $self->{must_level});
4004     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4005     #
4006     } else {
4007     if ($element_state->{must_be_empty}) {
4008     $self->{onerror}->(node => $child_el,
4009     type => 'element not allowed');
4010     }
4011     }
4012     },
4013     check_child_text => sub {
4014     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4015     if ($has_significant and
4016     $element_state->{must_be_empty}) {
4017     $self->{onerror}->(node => $child_node,
4018     type => 'character not allowed');
4019     }
4020     },
4021     check_end => sub {
4022     my ($self, $item, $element_state) = @_;
4023     unless ($element_state->{must_be_empty}) {
4024     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
4025     type => 'script:'.$element_state->{script_type});
4026     ## TODO: text/javascript support
4027    
4028     $HTMLChecker{check_end}->(@_);
4029 wakaba 1.1 }
4030     },
4031     };
4032 wakaba 1.25 ## ISSUE: Significant check and text child node
4033 wakaba 1.1
4034     ## NOTE: When script is disabled.
4035     $Element->{$HTML_NS}->{noscript} = {
4036 wakaba 1.40 %HTMLTransparentChecker,
4037 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4038     check_attrs => $GetHTMLAttrsChecker->({}, {
4039     %HTMLAttrStatus,
4040     %HTMLM12NCommonAttrStatus,
4041 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4042 wakaba 1.49 }),
4043 wakaba 1.40 check_start => sub {
4044     my ($self, $item, $element_state) = @_;
4045 wakaba 1.3
4046 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
4047     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
4048 wakaba 1.3 }
4049    
4050 wakaba 1.40 unless ($self->{flag}->{in_head}) {
4051     $self->_add_minus_elements ($element_state,
4052     {$HTML_NS => {noscript => 1}});
4053     }
4054 wakaba 1.3 },
4055 wakaba 1.40 check_child_element => sub {
4056     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4057     $child_is_transparent, $element_state) = @_;
4058     if ($self->{flag}->{in_head}) {
4059     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4060     $self->{onerror}->(node => $child_el,
4061     type => 'element not allowed:minus',
4062     level => $self->{must_level});
4063     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4064     #
4065     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
4066     #
4067     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
4068     if ($child_el->has_attribute_ns (undef, 'scoped')) {
4069     $self->{onerror}->(node => $child_el,
4070     type => 'element not allowed:head noscript',
4071     level => $self->{must_level});
4072     }
4073     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
4074 wakaba 1.47 my $http_equiv_attr
4075     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
4076     if ($http_equiv_attr) {
4077     ## TODO: case
4078     if (lc $http_equiv_attr->value eq 'content-type') {
4079 wakaba 1.40 $self->{onerror}->(node => $child_el,
4080 wakaba 1.34 type => 'element not allowed:head noscript',
4081     level => $self->{must_level});
4082 wakaba 1.47 } else {
4083     #
4084 wakaba 1.3 }
4085 wakaba 1.47 } else {
4086     $self->{onerror}->(node => $child_el,
4087     type => 'element not allowed:head noscript',
4088     level => $self->{must_level});
4089 wakaba 1.3 }
4090 wakaba 1.40 } else {
4091     $self->{onerror}->(node => $child_el,
4092     type => 'element not allowed:head noscript',
4093     level => $self->{must_level});
4094     }
4095     } else {
4096     $HTMLTransparentChecker{check_child_element}->(@_);
4097     }
4098     },
4099     check_child_text => sub {
4100     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4101     if ($self->{flag}->{in_head}) {
4102     if ($has_significant) {
4103     $self->{onerror}->(node => $child_node,
4104     type => 'character not allowed');
4105 wakaba 1.3 }
4106     } else {
4107 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
4108     }
4109     },
4110     check_end => sub {
4111     my ($self, $item, $element_state) = @_;
4112     $self->_remove_minus_elements ($element_state);
4113     if ($self->{flag}->{in_head}) {
4114     $HTMLChecker{check_end}->(@_);
4115     } else {
4116     $HTMLPhrasingContentChecker{check_end}->(@_);
4117 wakaba 1.3 }
4118 wakaba 1.1 },
4119     };
4120 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
4121 wakaba 1.1
4122     $Element->{$HTML_NS}->{'event-source'} = {
4123 wakaba 1.40 %HTMLEmptyChecker,
4124 wakaba 1.48 status => FEATURE_HTML5_LC,
4125 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4126 wakaba 1.1 src => $HTMLURIAttrChecker,
4127 wakaba 1.50 }, {
4128     %HTMLAttrStatus,
4129     src => FEATURE_HTML5_LC,
4130 wakaba 1.1 }),
4131     };
4132    
4133     $Element->{$HTML_NS}->{details} = {
4134 wakaba 1.40 %HTMLProseContentChecker,
4135 wakaba 1.48 status => FEATURE_HTML5_WD,
4136 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4137 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
4138 wakaba 1.50 }, {
4139     %HTMLAttrStatus,
4140 wakaba 1.59 open => FEATURE_HTML5_WD,
4141 wakaba 1.1 }),
4142 wakaba 1.43 ## NOTE: legend, Prose
4143     check_child_element => sub {
4144     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4145     $child_is_transparent, $element_state) = @_;
4146     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4147     $self->{onerror}->(node => $child_el,
4148     type => 'element not allowed:minus',
4149     level => $self->{must_level});
4150     $element_state->{has_non_legend} = 1;
4151     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4152     #
4153     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4154     if ($element_state->{has_non_legend}) {
4155     $self->{onerror}->(node => $child_el,
4156     type => 'element not allowed:details legend',
4157     level => $self->{must_level});
4158     }
4159     $element_state->{has_legend} = 1;
4160     $element_state->{has_non_legend} = 1;
4161     } else {
4162     $HTMLProseContentChecker{check_child_element}->(@_);
4163     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4164     ## ISSUE: |<details><object><legend>xx</legend></object>..</details>|
4165     ## is conforming?
4166     }
4167     },
4168     check_child_text => sub {
4169     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4170     if ($has_significant) {
4171     $element_state->{has_non_legend} = 1;
4172     }
4173     },
4174     check_end => sub {
4175     my ($self, $item, $element_state) = @_;
4176 wakaba 1.1
4177 wakaba 1.43 unless ($element_state->{has_legend}) {
4178     $self->{onerror}->(node => $item->{node},
4179     type => 'element missing:legend',
4180     level => $self->{must_level});
4181     }
4182    
4183     $HTMLProseContentChecker{check_end}->(@_);
4184     ## ISSUE: |<details><legend>aa</legend></details>| error?
4185 wakaba 1.1 },
4186     };
4187    
4188     $Element->{$HTML_NS}->{datagrid} = {
4189 wakaba 1.40 %HTMLProseContentChecker,
4190 wakaba 1.48 status => FEATURE_HTML5_WD,
4191 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4192 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4193     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
4194 wakaba 1.50 }, {
4195     %HTMLAttrStatus,
4196     disabled => FEATURE_HTML5_WD,
4197     multiple => FEATURE_HTML5_WD,
4198 wakaba 1.1 }),
4199 wakaba 1.40 check_start => sub {
4200     my ($self, $item, $element_state) = @_;
4201 wakaba 1.1
4202 wakaba 1.40 $self->_add_minus_elements ($element_state,
4203     {$HTML_NS => {a => 1, datagrid => 1}});
4204     $element_state->{phase} = 'any';
4205     },
4206     ## Prose -(text* table Prose*) | table | select | datalist | Empty
4207     check_child_element => sub {
4208     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4209     $child_is_transparent, $element_state) = @_;
4210     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4211     $self->{onerror}->(node => $child_el,
4212     type => 'element not allowed:minus',
4213     level => $self->{must_level});
4214     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4215     #
4216     } elsif ($element_state->{phase} eq 'prose') {
4217     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4218 wakaba 1.44 if (not $element_state->{has_element} and
4219 wakaba 1.40 $child_nsuri eq $HTML_NS and
4220     $child_ln eq 'table') {
4221     $self->{onerror}->(node => $child_el,
4222     type => 'element not allowed');
4223     } else {
4224 wakaba 1.8 #
4225 wakaba 1.1 }
4226 wakaba 1.40 } else {
4227     $self->{onerror}->(node => $child_el,
4228     type => 'element not allowed');
4229     }
4230 wakaba 1.43 $element_state->{has_element} = 1;
4231 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
4232     if ($child_nsuri eq $HTML_NS and
4233     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
4234     $element_state->{phase} = 'none';
4235     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4236     $element_state->{has_element} = 1;
4237     $element_state->{phase} = 'prose';
4238 wakaba 1.43 ## TODO: transparent?
4239 wakaba 1.40 } else {
4240     $self->{onerror}->(node => $child_el,
4241     type => 'element not allowed');
4242     }
4243     } elsif ($element_state->{phase} eq 'none') {
4244     $self->{onerror}->(node => $child_el,
4245     type => 'element not allowed');
4246     } else {
4247     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
4248     }
4249     },
4250     check_child_text => sub {
4251     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4252     if ($has_significant) {
4253     if ($element_state->{phase} eq 'prose') {
4254     #
4255     } elsif ($element_state->{phase} eq 'any') {
4256     $element_state->{phase} = 'prose';
4257     } else {
4258     $self->{onerror}->(node => $child_node,
4259     type => 'character not allowed');
4260 wakaba 1.1 }
4261     }
4262 wakaba 1.40 },
4263     check_end => sub {
4264     my ($self, $item, $element_state) = @_;
4265     $self->_remove_minus_elements ($element_state);
4266 wakaba 1.1
4267 wakaba 1.40 if ($element_state->{phase} eq 'none') {
4268     $HTMLChecker{check_end}->(@_);
4269     } else {
4270     $HTMLPhrasingContentChecker{check_end}->(@_);
4271     }
4272     },
4273 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
4274     ## are not disallowed (assuming that form control contents are also
4275     ## prose content).
4276 wakaba 1.1 };
4277    
4278     $Element->{$HTML_NS}->{command} = {
4279 wakaba 1.40 %HTMLEmptyChecker,
4280 wakaba 1.48 status => FEATURE_HTML5_WD,
4281 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4282 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
4283     default => $GetHTMLBooleanAttrChecker->('default'),
4284     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4285     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
4286     icon => $HTMLURIAttrChecker,
4287     label => sub { }, ## NOTE: No conformance creteria
4288     radiogroup => sub { }, ## NOTE: No conformance creteria
4289     type => sub {
4290     my ($self, $attr) = @_;
4291     my $value = $attr->value;
4292     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
4293     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
4294     }
4295     },
4296 wakaba 1.50 }, {
4297     %HTMLAttrStatus,
4298     checked => FEATURE_HTML5_WD,
4299     default => FEATURE_HTML5_WD,
4300     disabled => FEATURE_HTML5_WD,
4301     hidden => FEATURE_HTML5_WD,
4302     icon => FEATURE_HTML5_WD,
4303     label => FEATURE_HTML5_WD,
4304     radiogroup => FEATURE_HTML5_WD,
4305     type => FEATURE_HTML5_WD,
4306 wakaba 1.1 }),
4307     };
4308    
4309     $Element->{$HTML_NS}->{menu} = {
4310 wakaba 1.40 %HTMLPhrasingContentChecker,
4311 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
4312     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
4313     ## NOTE: We don't want any |menu| element warned as deprecated.
4314 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4315 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
4316     id => sub {
4317     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
4318     my ($self, $attr) = @_;
4319     my $value = $attr->value;
4320     if (length $value > 0) {
4321     if ($self->{id}->{$value}) {
4322     $self->{onerror}->(node => $attr, type => 'duplicate ID');
4323     push @{$self->{id}->{$value}}, $attr;
4324     } else {
4325     $self->{id}->{$value} = [$attr];
4326     }
4327     } else {
4328     ## NOTE: MUST contain at least one character
4329     $self->{onerror}->(node => $attr, type => 'empty attribute value');
4330     }
4331     if ($value =~ /[\x09-\x0D\x20]/) {
4332     $self->{onerror}->(node => $attr, type => 'space in ID');
4333     }
4334     $self->{menu}->{$value} ||= $attr;
4335     ## ISSUE: <menu id=""><p contextmenu=""> match?
4336     },
4337     label => sub { }, ## NOTE: No conformance creteria
4338     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
4339 wakaba 1.49 }, {
4340     %HTMLAttrStatus,
4341     %HTMLM12NCommonAttrStatus,
4342 wakaba 1.50 autosubmit => FEATURE_HTML5_WD,
4343 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
4344 wakaba 1.50 label => FEATURE_HTML5_WD,
4345     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4346     type => FEATURE_HTML5_WD,
4347 wakaba 1.1 }),
4348 wakaba 1.40 check_start => sub {
4349     my ($self, $item, $element_state) = @_;
4350     $element_state->{phase} = 'li or phrasing';
4351     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
4352     $self->{flag}->{in_menu} = 1;
4353     },
4354     check_child_element => sub {
4355     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4356     $child_is_transparent, $element_state) = @_;
4357     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4358     $self->{onerror}->(node => $child_el,
4359     type => 'element not allowed:minus',
4360     level => $self->{must_level});
4361     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4362     #
4363     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
4364     if ($element_state->{phase} eq 'li') {
4365     #
4366     } elsif ($element_state->{phase} eq 'li or phrasing') {
4367     $element_state->{phase} = 'li';
4368     } else {
4369     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4370     }
4371     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4372     if ($element_state->{phase} eq 'phrasing') {
4373     #
4374     } elsif ($element_state->{phase} eq 'li or phrasing') {
4375     $element_state->{phase} = 'phrasing';
4376     } else {
4377     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4378     }
4379     } else {
4380     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4381     }
4382     },
4383     check_child_text => sub {
4384     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4385     if ($has_significant) {
4386     if ($element_state->{phase} eq 'phrasing') {
4387     #
4388     } elsif ($element_state->{phase} eq 'li or phrasing') {
4389     $element_state->{phase} = 'phrasing';
4390     } else {
4391     $self->{onerror}->(node => $child_node,
4392     type => 'character not allowed');
4393 wakaba 1.1 }
4394     }
4395 wakaba 1.40 },
4396     check_end => sub {
4397     my ($self, $item, $element_state) = @_;
4398     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
4399    
4400     if ($element_state->{phase} eq 'li') {
4401     $HTMLChecker{check_end}->(@_);
4402     } else { # 'phrasing' or 'li or phrasing'
4403     $HTMLPhrasingContentChecker{check_end}->(@_);
4404 wakaba 1.1 }
4405     },
4406 wakaba 1.8 };
4407    
4408     $Element->{$HTML_NS}->{datatemplate} = {
4409 wakaba 1.40 %HTMLChecker,
4410 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4411 wakaba 1.40 check_child_element => sub {
4412     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4413     $child_is_transparent, $element_state) = @_;
4414     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4415     $self->{onerror}->(node => $child_el,
4416     type => 'element not allowed:minus',
4417     level => $self->{must_level});
4418     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4419     #
4420     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
4421     #
4422     } else {
4423     $self->{onerror}->(node => $child_el,
4424     type => 'element not allowed:datatemplate');
4425     }
4426     },
4427     check_child_text => sub {
4428     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4429     if ($has_significant) {
4430     $self->{onerror}->(node => $child_node, type => 'character not allowed');
4431 wakaba 1.8 }
4432     },
4433     is_xml_root => 1,
4434     };
4435    
4436     $Element->{$HTML_NS}->{rule} = {
4437 wakaba 1.40 %HTMLChecker,
4438 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4439 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4440 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
4441 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
4442 wakaba 1.50 }, {
4443     %HTMLAttrStatus,
4444     condition => FEATURE_HTML5_AT_RISK,
4445     mode => FEATURE_HTML5_AT_RISK,
4446 wakaba 1.8 }),
4447 wakaba 1.40 check_start => sub {
4448     my ($self, $item, $element_state) = @_;
4449     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
4450     },
4451     check_child_element => sub { },
4452     check_child_text => sub { },
4453     check_end => sub {
4454     my ($self, $item, $element_state) = @_;
4455     $self->_remove_plus_elements ($element_state);
4456     $HTMLChecker{check_end}->(@_);
4457 wakaba 1.8 },
4458     ## NOTE: "MAY be anything that, when the parent |datatemplate|
4459     ## is applied to some conforming data, results in a conforming DOM tree.":
4460     ## We don't check against this.
4461     };
4462    
4463     $Element->{$HTML_NS}->{nest} = {
4464 wakaba 1.40 %HTMLEmptyChecker,
4465 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4466 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4467 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
4468     mode => sub {
4469     my ($self, $attr) = @_;
4470     my $value = $attr->value;
4471     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
4472     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
4473     }
4474     },
4475 wakaba 1.50 }, {
4476     %HTMLAttrStatus,
4477     filter => FEATURE_HTML5_AT_RISK,
4478     mode => FEATURE_HTML5_AT_RISK,
4479 wakaba 1.8 }),
4480 wakaba 1.1 };
4481    
4482     $Element->{$HTML_NS}->{legend} = {
4483 wakaba 1.40 %HTMLPhrasingContentChecker,
4484 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4485 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4486     # accesskey => $AttrCheckerNotImplemented, ## TODO: Character ## TODO: This attribute is not part of HTML5
4487     # align => $GetHTMLEnumeratedAttrChecker->({
4488     # top => 1, bottom => 1, left => 1, right => 1,
4489     # }),
4490     }, {
4491 wakaba 1.49 %HTMLAttrStatus,
4492     %HTMLM12NCommonAttrStatus,
4493     accesskey => FEATURE_M12N10_REC,
4494     align => FEATURE_M12N10_REC_DEPRECATED,
4495 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4496 wakaba 1.49 }),
4497 wakaba 1.1 };
4498    
4499     $Element->{$HTML_NS}->{div} = {
4500 wakaba 1.40 %HTMLProseContentChecker,
4501 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4502     check_attrs => $GetHTMLAttrsChecker->({}, {
4503     %HTMLAttrStatus,
4504     %HTMLM12NCommonAttrStatus,
4505     align => FEATURE_M12N10_REC_DEPRECATED,
4506     datafld => FEATURE_HTML4_REC_RESERVED,
4507     dataformatas => FEATURE_HTML4_REC_RESERVED,
4508     datasrc => FEATURE_HTML4_REC_RESERVED,
4509 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4510 wakaba 1.49 }),
4511 wakaba 1.1 };
4512    
4513     $Element->{$HTML_NS}->{font} = {
4514 wakaba 1.40 %HTMLTransparentChecker,
4515 wakaba 1.50 status => FEATURE_HTML5_AT_RISK | FEATURE_M12N10_REC_DEPRECATED,
4516 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({ ## TODO
4517     }, {
4518     %HTMLAttrStatus,
4519 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4520 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
4521 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4522 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
4523 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4524     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4525 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
4526 wakaba 1.50 style => FEATURE_HTML5_AT_RISK | FEATURE_XHTML10_REC,
4527     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4528 wakaba 1.49 }),
4529 wakaba 1.1 };
4530 wakaba 1.49
4531     ## TODO: frameset FEATURE_M12N10_REC
4532     ## class title id cols rows onload onunload style(x10)
4533     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
4534     ## noframes Common, lang(xhtml10)
4535    
4536     ## TODO: deprecated:
4537     ## basefont color face id size
4538     ## center Common lang(xhtml10)
4539     ## dir Common compat lang(xhtml10)
4540    
4541     ## TODO: CR: ruby rb rt rp rbc rtc @rbspan
4542 wakaba 1.56
4543     =pod
4544    
4545     WF2: Documents MUST comply to [CHARMOD].
4546     WF2: Vencor extensions MUST NOT be used.
4547    
4548     =cut
4549 wakaba 1.1
4550     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
4551    
4552     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24