/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.67 - (hide annotations) (download)
Sat Mar 22 05:45:37 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.66: +25 -7 lines
++ whatpm/t/ChangeLog	22 Mar 2008 05:45:31 -0000
	* content-model-2.dat: Test data on |@profile| and |@version| are
	added.

2008-03-22  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	22 Mar 2008 05:43:37 -0000
	* RDFXML.pm: Typo fixed.

2008-03-22  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	22 Mar 2008 05:45:17 -0000
	* HTML.pm: Typo fixed.  html/@version implemented (as do-nothing
	checker). head/@profile implemented.  meta/@scheme implemented (as
	do-nothing checker).

2008-03-22  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.58 sub FEATURE_HTML5_ROLE () {
8     Whatpm::ContentChecker::FEATURE_STATUS_WD
9     ## TODO: svg:*/@role
10     }
11    
12 wakaba 1.54 sub FEATURE_HTML5_LC () {
13     Whatpm::ContentChecker::FEATURE_STATUS_LC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16     sub FEATURE_HTML5_AT_RISK () {
17     Whatpm::ContentChecker::FEATURE_STATUS_WD |
18     Whatpm::ContentChecker::FEATURE_ALLOWED
19     }
20     sub FEATURE_HTML5_WD () {
21     Whatpm::ContentChecker::FEATURE_STATUS_WD |
22     Whatpm::ContentChecker::FEATURE_ALLOWED
23     }
24     sub FEATURE_HTML5_FD () {
25     Whatpm::ContentChecker::FEATURE_STATUS_WD |
26     Whatpm::ContentChecker::FEATURE_ALLOWED
27     }
28     sub FEATURE_HTML5_DEFAULT () {
29     Whatpm::ContentChecker::FEATURE_STATUS_WD |
30     Whatpm::ContentChecker::FEATURE_ALLOWED
31 wakaba 1.49 }
32 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
33     ## NOTE: Was part of HTML5, but was dropped.
34 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
35     }
36 wakaba 1.54 sub FEATURE_WF2 () {
37     Whatpm::ContentChecker::FEATURE_STATUS_LC |
38     Whatpm::ContentChecker::FEATURE_ALLOWED
39     }
40 wakaba 1.56 sub FEATURE_WF2_DEPRECATED () {
41     Whatpm::ContentChecker::FEATURE_STATUS_LC
42     ## NOTE: MUST NOT be used.
43     }
44 wakaba 1.49
45 wakaba 1.61 ## NOTE: Metainformation Attributes Module by W3C XHTML2 WG.
46     sub FEATURE_RDFA_LC () {
47     Whatpm::ContentChecker::FEATURE_STATUS_LC
48     }
49 wakaba 1.58
50     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
51     ## attribute can be used- the only requirements for that matter is:
52     ## "the attribute MUST be referenced using its namespace-qualified form" (and
53     ## this is a host language conformance!).
54    
55 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
56     ## NOTE: Only additions to M12N10_REC are marked.
57     Whatpm::ContentChecker::FEATURE_STATUS_CR
58     }
59     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
60     Whatpm::ContentChecker::FEATURE_STATUS_CR |
61     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
62     }
63    
64 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
65     ## It contains a number of problems. (However, again, it's a REC!)
66 wakaba 1.54 sub FEATURE_M12N10_REC () {
67     ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process suck!
68     Whatpm::ContentChecker::FEATURE_STATUS_REC
69     }
70     sub FEATURE_M12N10_REC_DEPRECATED () {
71     Whatpm::ContentChecker::FEATURE_STATUS_REC |
72     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
73     }
74 wakaba 1.58 ## NOTE: XHTML M12N 1.1 is a LC at the time of writing and no
75     ## addition from 1.0.
76 wakaba 1.49
77     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
78     ## (second edition). Only missing attributes from M12N10 abstract
79     ## definition are added.
80 wakaba 1.54 sub FEATURE_XHTML10_REC () {
81     Whatpm::ContentChecker::FEATURE_STATUS_CR
82     }
83    
84 wakaba 1.61 ## NOTE: Diff from HTML4.
85     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
86     Whatpm::ContentChecker::FEATURE_STATUS_CR
87     }
88 wakaba 1.58
89 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
90     ## 4.01). Only missing attributes from XHTML10 are added.
91 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
92     Whatpm::ContentChecker::FEATURE_STATUS_WD
93     }
94    
95     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
96     ## rather than presentational attributes (deprecated or not deprecated).
97 wakaba 1.48
98 wakaba 1.61 ## NOTE: Diff from HTML4.
99     sub FEATURE_HTML32_REC_OBSOLETE () {
100     Whatpm::ContentChecker::FEATURE_STATUS_CR |
101     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
102     ## NOTE: Lowercase normative "should".
103     }
104    
105     sub FEATURE_RFC2659 () { ## Experimental RFC
106     Whatpm::ContentChecker::FEATURE_STATUS_CR
107     }
108    
109     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
110     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
111     Whatpm::ContentChecker::FEATURE_STATUS_CR
112     }
113    
114     ## NOTE: Diff from HTML 2.0.
115     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
116     Whatpm::ContentChecker::FEATURE_STATUS_CR
117     }
118    
119     ## NOTE: Diff from HTML 3.2.
120     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
121     Whatpm::ContentChecker::FEATURE_STATUS_CR
122     }
123 wakaba 1.58
124 wakaba 1.29 ## December 2007 HTML5 Classification
125    
126     my $HTMLMetadataContent = {
127     $HTML_NS => {
128     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
129     'event-source' => 1, command => 1, datatemplate => 1,
130     ## NOTE: A |meta| with no |name| element is not allowed as
131     ## a metadata content other than |head| element.
132     meta => 1,
133 wakaba 1.56 ## NOTE: Only when empty [WF2]
134     form => 1,
135 wakaba 1.29 },
136     ## NOTE: RDF is mentioned in the HTML5 spec.
137     ## TODO: Other RDF elements?
138     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
139     };
140    
141     my $HTMLProseContent = {
142     $HTML_NS => {
143     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
144     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
145     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
146     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
147     details => 1, ## ISSUE: "Prose element" in spec.
148     datagrid => 1, ## ISSUE: "Prose element" in spec.
149     datatemplate => 1,
150     div => 1, ## ISSUE: No category in spec.
151     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
152     ## Additionally, it must be before any other element or
153     ## non-inter-element-whitespace text node.
154     style => 1,
155    
156 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
157 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
158     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
159     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
160     command => 1, font => 1,
161     a => 1,
162     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
163     ## NOTE: |area| is allowed only as a descendant of |map|.
164     area => 1,
165    
166     ins => 1, del => 1,
167    
168     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
169     menu => 1,
170    
171     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
172     canvas => 1,
173     },
174    
175     ## NOTE: Embedded
176     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
177     q<http://www.w3.org/2000/svg> => {svg => 1},
178     };
179    
180 wakaba 1.58 my $HTMLSectioningContent = {
181 wakaba 1.57 $HTML_NS => {
182     section => 1, nav => 1, article => 1, aside => 1,
183     ## NOTE: |body| is only allowed in |html| element.
184     body => 1,
185     },
186     };
187    
188 wakaba 1.58 my $HTMLSectioningRoot = {
189 wakaba 1.29 $HTML_NS => {
190 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
191 wakaba 1.29 },
192     };
193    
194     my $HTMLHeadingContent = {
195     $HTML_NS => {
196     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
197     },
198     };
199    
200     my $HTMLPhrasingContent = {
201     ## NOTE: All phrasing content is also prose content.
202     $HTML_NS => {
203 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
204 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
205     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
206     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
207     command => 1, font => 1,
208     a => 1,
209     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
210     ## NOTE: |area| is allowed only as a descendant of |map|.
211     area => 1,
212    
213     ## NOTE: Transparent.
214     ins => 1, del => 1,
215    
216     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
217     menu => 1,
218    
219     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
220     canvas => 1,
221 wakaba 1.56
222     ## NOTE: WF2
223     input => 1, ## NOTE: type=hidden
224     datalist => 1, ## NOTE: block | where |select| allowed
225 wakaba 1.29 },
226    
227     ## NOTE: Embedded
228     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
229     q<http://www.w3.org/2000/svg> => {svg => 1},
230    
231     ## NOTE: And non-inter-element-whitespace text nodes.
232     };
233    
234 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
235 wakaba 1.29
236     my $HTMLInteractiveContent = {
237     $HTML_NS => {
238     a => 1,
239 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
240 wakaba 1.29 },
241     };
242    
243 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
244     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
245    
246     ## -- Common attribute syntacx checkers
247    
248 wakaba 1.1 our $AttrChecker;
249    
250     my $GetHTMLEnumeratedAttrChecker = sub {
251     my $states = shift; # {value => conforming ? 1 : -1}
252     return sub {
253     my ($self, $attr) = @_;
254     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
255     if ($states->{$value} > 0) {
256     #
257     } elsif ($states->{$value}) {
258     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
259     } else {
260     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
261     }
262     };
263     }; # $GetHTMLEnumeratedAttrChecker
264    
265     my $GetHTMLBooleanAttrChecker = sub {
266     my $local_name = shift;
267     return sub {
268     my ($self, $attr) = @_;
269     my $value = $attr->value;
270     unless ($value eq $local_name or $value eq '') {
271     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
272     }
273     };
274     }; # $GetHTMLBooleanAttrChecker
275    
276 wakaba 1.8 ## Unordered set of space-separated tokens
277 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
278 wakaba 1.8 my ($self, $attr) = @_;
279     my %word;
280     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
281     unless ($word{$word}) {
282     $word{$word} = 1;
283     } else {
284     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
285     }
286     }
287 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
288 wakaba 1.8
289 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
290     ## whose allowed values are defined by the section on link types)
291     my $HTMLLinkTypesAttrChecker = sub {
292 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
293 wakaba 1.1 my %word;
294     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
295     unless ($word{$word}) {
296     $word{$word} = 1;
297 wakaba 1.18 } elsif ($word eq 'up') {
298     #
299 wakaba 1.1 } else {
300     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
301     }
302     }
303     ## NOTE: Case sensitive match (since HTML5 spec does not say link
304     ## types are case-insensitive and it says "The value should not
305     ## be confusingly similar to any other defined value (e.g.
306     ## differing only in case).").
307     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
308     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
309     ## values to be used conformingly.
310 wakaba 1.66
311     my $is_hyperlink;
312     my $is_resource;
313 wakaba 1.1 require Whatpm::_LinkTypeList;
314     our $LinkType;
315     for my $word (keys %word) {
316     my $def = $LinkType->{$word};
317     if (defined $def) {
318     if ($def->{status} eq 'accepted') {
319     if (defined $def->{effect}->[$a_or_area]) {
320     #
321     } else {
322     $self->{onerror}->(node => $attr,
323     type => 'link type:bad context:'.$word);
324     }
325     } elsif ($def->{status} eq 'proposal') {
326     $self->{onerror}->(node => $attr, level => 's',
327     type => 'link type:proposed:'.$word);
328 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
329     #
330     } else {
331     $self->{onerror}->(node => $attr,
332     type => 'link type:bad context:'.$word);
333     }
334 wakaba 1.1 } else { # rejected or synonym
335     $self->{onerror}->(node => $attr,
336     type => 'link type:non-conforming:'.$word);
337     }
338 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
339     if ($word eq 'alternate') {
340     #
341     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
342 wakaba 1.66 $is_hyperlink = 1;
343 wakaba 1.4 }
344     }
345 wakaba 1.1 if ($def->{unique}) {
346     unless ($self->{has_link_type}->{$word}) {
347     $self->{has_link_type}->{$word} = 1;
348     } else {
349     $self->{onerror}->(node => $attr,
350     type => 'link type:duplicate:'.$word);
351     }
352     }
353 wakaba 1.66
354     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
355     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
356     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
357     }
358 wakaba 1.1 } else {
359     $self->{onerror}->(node => $attr, level => 'unsupported',
360     type => 'link type:'.$word);
361     }
362     }
363 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
364 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
365     ## says that using both X-Pingback: header field and HTML
366     ## <link rel=pingback> is deprecated and if both appears they
367     ## SHOULD contain exactly the same value.
368     ## ISSUE: Pingback 1.0 specification defines the exact representation
369     ## of its link element, which cannot be tested by the current arch.
370     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
371     ## include any string that matches to the pattern for the rel=pingback link,
372     ## which again inpossible to test.
373     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
374 wakaba 1.12
375     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
376 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
377     ## then they SHOULD be described in different paragraphs.".
378 wakaba 1.66
379     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
380     if ($is_hyperlink or $a_or_area) {
381     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
382     }
383     if ($is_resource and not $a_or_area) {
384     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
385     }
386 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
387 wakaba 1.20
388     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
389 wakaba 1.1
390     ## URI (or IRI)
391     my $HTMLURIAttrChecker = sub {
392 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
393 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
394     my $value = $attr->value;
395     Whatpm::URIChecker->check_iri_reference ($value, sub {
396     my %opt = @_;
397     $self->{onerror}->(node => $attr, level => $opt{level},
398     type => 'URI::'.$opt{type}.
399     (defined $opt{position} ? ':'.$opt{position} : ''));
400     });
401 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
402 wakaba 1.66
403     my $attr_name = $attr->name;
404     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
405     ## TODO: absolute
406     push @{$self->{return}->{uri}->{$value} ||= []},
407     $element_state->{uri_info}->{$attr_name};
408 wakaba 1.1 }; # $HTMLURIAttrChecker
409    
410     ## A space separated list of one or more URIs (or IRIs)
411     my $HTMLSpaceURIsAttrChecker = sub {
412     my ($self, $attr) = @_;
413 wakaba 1.66
414     my $type = {ping => 'action',
415     profile => 'namespace',
416     archive => 'resource'}->{$attr->name};
417    
418 wakaba 1.1 my $i = 0;
419     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
420     Whatpm::URIChecker->check_iri_reference ($value, sub {
421     my %opt = @_;
422     $self->{onerror}->(node => $attr, level => $opt{level},
423 wakaba 1.2 type => 'URIs:'.':'.
424     $opt{type}.':'.$i.
425 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
426     });
427 wakaba 1.66
428     ## TODO: absolute
429     push @{$self->{return}->{uri}->{$value} ||= []},
430 wakaba 1.67 {node => $attr, type => {$type => 1}};
431 wakaba 1.66
432 wakaba 1.1 $i++;
433     }
434 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
435 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
436     ## ISSUE: A sequence of white space characters are conformant?
437     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
438     ## NOTE: Duplication seems not an error.
439 wakaba 1.4 $self->{has_uri_attr} = 1;
440 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
441    
442     my $HTMLDatetimeAttrChecker = sub {
443     my ($self, $attr) = @_;
444     my $value = $attr->value;
445     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
446     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
447     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
448     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
449     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
450     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
451     if $d < 1 or
452     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
453     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
454     if $M == 2 and $d == 29 and
455     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
456     } else {
457     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
458     }
459     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
460     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
461     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
462     if defined $s and $s > 59;
463     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
464     if $zh > 23;
465     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
466     if $zm > 59;
467     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
468     } else {
469     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
470     }
471     }; # $HTMLDatetimeAttrChecker
472    
473     my $HTMLIntegerAttrChecker = sub {
474     my ($self, $attr) = @_;
475     my $value = $attr->value;
476     unless ($value =~ /\A-?[0-9]+\z/) {
477     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
478     }
479     }; # $HTMLIntegerAttrChecker
480    
481     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
482     my $range_check = shift;
483     return sub {
484     my ($self, $attr) = @_;
485     my $value = $attr->value;
486     if ($value =~ /\A[0-9]+\z/) {
487     unless ($range_check->($value + 0)) {
488     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
489     }
490     } else {
491     $self->{onerror}->(node => $attr,
492     type => 'nninteger:syntax error');
493     }
494     };
495     }; # $GetHTMLNonNegativeIntegerAttrChecker
496    
497     my $GetHTMLFloatingPointNumberAttrChecker = sub {
498     my $range_check = shift;
499     return sub {
500     my ($self, $attr) = @_;
501     my $value = $attr->value;
502     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
503     unless ($range_check->($value + 0)) {
504     $self->{onerror}->(node => $attr, type => 'float:out of range');
505     }
506     } else {
507     $self->{onerror}->(node => $attr,
508     type => 'float:syntax error');
509     }
510     };
511     }; # $GetHTMLFloatingPointNumberAttrChecker
512    
513     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
514     ## ISSUE: RFC 2046 does not define syntax of media types.
515     ## ISSUE: The definition of "a valid MIME type" is unknown.
516     ## Syntactical correctness?
517     my $HTMLIMTAttrChecker = sub {
518     my ($self, $attr) = @_;
519     my $value = $attr->value;
520     ## ISSUE: RFC 2045 Content-Type header field allows insertion
521     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
522     ## ISSUE: RFC 2231 extension? Maybe no.
523     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
524     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
525     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
526     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
527     my @type = ($1, $2);
528     my $param = $3;
529     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
530     if (defined $2) {
531     push @type, $1 => $2;
532     } else {
533     my $n = $1;
534     my $v = $2;
535     $v =~ s/\\(.)/$1/gs;
536     push @type, $n => $v;
537     }
538     }
539     require Whatpm::IMTChecker;
540     Whatpm::IMTChecker->check_imt (sub {
541     my %opt = @_;
542     $self->{onerror}->(node => $attr, level => $opt{level},
543     type => 'IMT:'.$opt{type});
544     }, @type);
545     } else {
546     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
547     }
548     }; # $HTMLIMTAttrChecker
549    
550     my $HTMLLanguageTagAttrChecker = sub {
551 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
552    
553 wakaba 1.1 my ($self, $attr) = @_;
554 wakaba 1.6 my $value = $attr->value;
555     require Whatpm::LangTag;
556     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
557     my %opt = @_;
558     my $type = 'LangTag:'.$opt{type};
559     $type .= ':' . $opt{subtag} if defined $opt{subtag};
560     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
561     level => $opt{level});
562     });
563 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
564 wakaba 1.6
565     ## TODO: testdata
566 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
567    
568     ## "A valid media query [MQ]"
569     my $HTMLMQAttrChecker = sub {
570     my ($self, $attr) = @_;
571     $self->{onerror}->(node => $attr, level => 'unsupported',
572     type => 'media query');
573     ## ISSUE: What is "a valid media query"?
574     }; # $HTMLMQAttrChecker
575    
576     my $HTMLEventHandlerAttrChecker = sub {
577     my ($self, $attr) = @_;
578     $self->{onerror}->(node => $attr, level => 'unsupported',
579     type => 'event handler');
580     ## TODO: MUST contain valid ECMAScript code matching the
581     ## ECMAScript |FunctionBody| production. [ECMA262]
582     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
583     ## ISSUE: Automatic semicolon insertion does not apply?
584     ## ISSUE: Other script languages?
585     }; # $HTMLEventHandlerAttrChecker
586    
587     my $HTMLUsemapAttrChecker = sub {
588     my ($self, $attr) = @_;
589     ## MUST be a valid hashed ID reference to a |map| element
590     my $value = $attr->value;
591     if ($value =~ s/^#//) {
592     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
593     push @{$self->{usemap}}, [$value => $attr];
594     } else {
595     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
596     }
597     ## NOTE: Space characters in hashed ID references are conforming.
598     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
599     }; # $HTMLUsemapAttrChecker
600    
601     my $HTMLTargetAttrChecker = sub {
602     my ($self, $attr) = @_;
603     my $value = $attr->value;
604     if ($value =~ /^_/) {
605     $value = lc $value; ## ISSUE: ASCII case-insentitive?
606     unless ({
607     _self => 1, _parent => 1, _top => 1,
608     }->{$value}) {
609     $self->{onerror}->(node => $attr,
610     type => 'reserved browsing context name');
611     }
612     } else {
613 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
614 wakaba 1.1 }
615     }; # $HTMLTargetAttrChecker
616    
617 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
618     my ($self, $attr) = @_;
619    
620     ## ISSUE: Namespace resolution?
621    
622     my $value = $attr->value;
623    
624     require Whatpm::CSS::SelectorsParser;
625     my $p = Whatpm::CSS::SelectorsParser->new;
626     $p->{pseudo_class}->{$_} = 1 for qw/
627     active checked disabled empty enabled first-child first-of-type
628     focus hover indeterminate last-child last-of-type link only-child
629     only-of-type root target visited
630     lang nth-child nth-last-child nth-of-type nth-last-of-type not
631     -manakai-contains -manakai-current
632     /;
633    
634     $p->{pseudo_element}->{$_} = 1 for qw/
635     after before first-letter first-line
636     /;
637    
638     $p->{must_level} = $self->{must_level};
639     $p->{onerror} = sub {
640     my %opt = @_;
641     $opt{type} = 'selectors:'.$opt{type};
642     $self->{onerror}->(%opt, node => $attr);
643     };
644     $p->parse_string ($value);
645     }; # $HTMLSelectorsAttrChecker
646    
647 wakaba 1.66 my $HTMLAccesskeyAttrChecker = sub {
648     my ($self, $attr) = @_;
649    
650     ## NOTE: "character" or |%Character;| in HTML4.
651    
652     my $value = $attr->value;
653     if (length $value != 1) {
654     $self->{onerror}->(node => $attr, type => 'char:syntax error',
655     level => $self->{fact_level}); ## TODO: type
656     }
657    
658     ## NOTE: "Note. Authors should consider the input method of the expected
659     ## reader when specifying an accesskey." [HTML4] This is hard to implement,
660     ## since it depends on keyboard and so on.
661     ## NOTE: "We recommend that authors include the access key in label text
662     ## or wherever the access key is to apply." [HTML4] (informative)
663     }; # $HTMLAccesskeyAttrChecker
664    
665 wakaba 1.1 my $HTMLAttrChecker = {
666 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
667 wakaba 1.1 id => sub {
668     ## NOTE: |map| has its own variant of |id=""| checker
669     my ($self, $attr) = @_;
670     my $value = $attr->value;
671     if (length $value > 0) {
672     if ($self->{id}->{$value}) {
673     $self->{onerror}->(node => $attr, type => 'duplicate ID');
674     push @{$self->{id}->{$value}}, $attr;
675     } else {
676     $self->{id}->{$value} = [$attr];
677     }
678     if ($value =~ /[\x09-\x0D\x20]/) {
679     $self->{onerror}->(node => $attr, type => 'space in ID');
680     }
681     } else {
682     ## NOTE: MUST contain at least one character
683     $self->{onerror}->(node => $attr, type => 'empty attribute value');
684     }
685     },
686     title => sub {}, ## NOTE: No conformance creteria
687     lang => sub {
688     my ($self, $attr) = @_;
689 wakaba 1.6 my $value = $attr->value;
690     if ($value eq '') {
691     #
692     } else {
693     require Whatpm::LangTag;
694     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
695     my %opt = @_;
696     my $type = 'LangTag:'.$opt{type};
697     $type .= ':' . $opt{subtag} if defined $opt{subtag};
698     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
699     level => $opt{level});
700     });
701     }
702 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
703     unless ($attr->owner_document->manakai_is_html) {
704     $self->{onerror}->(node => $attr, type => 'in XML:lang');
705     }
706 wakaba 1.6
707     ## TODO: test data
708 wakaba 1.1 },
709     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
710     class => sub {
711     my ($self, $attr) = @_;
712     my %word;
713     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
714     unless ($word{$word}) {
715     $word{$word} = 1;
716     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
717     } else {
718     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
719     }
720     }
721     },
722 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
723     true => 1, false => 1, '' => 1,
724     }),
725 wakaba 1.1 contextmenu => sub {
726     my ($self, $attr) = @_;
727     my $value = $attr->value;
728     push @{$self->{contextmenu}}, [$value => $attr];
729     ## ISSUE: "The value must be the ID of a menu element in the DOM."
730     ## What is "in the DOM"? A menu Element node that is not part
731     ## of the Document tree is in the DOM? A menu Element node that
732     ## belong to another Document tree is in the DOM?
733     },
734 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
735 wakaba 1.56 ## TODO: repeat, repeat-start, repeat-min, repeat-max, repeat-template ## TODO: global
736 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
737 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
738     ## TODO: ref, template, registrationmark
739 wakaba 1.1 };
740    
741 wakaba 1.49 my %HTMLAttrStatus = (
742 wakaba 1.50 class => FEATURE_HTML5_DEFAULT,
743     contenteditable => FEATURE_HTML5_DEFAULT,
744     contextmenu => FEATURE_HTML5_WD,
745     dir => FEATURE_HTML5_DEFAULT,
746     draggable => FEATURE_HTML5_LC,
747     id => FEATURE_HTML5_DEFAULT,
748     irrelevant => FEATURE_HTML5_WD,
749     lang => FEATURE_HTML5_DEFAULT,
750     ref => FEATURE_HTML5_AT_RISK,
751     registrationmark => FEATURE_HTML5_AT_RISK,
752 wakaba 1.60 repeat => FEATURE_WF2,
753     'repeat-max' => FEATURE_WF2,
754     'repeat-min' => FEATURE_WF2,
755     'repeat-start' => FEATURE_WF2,
756     'repeat-template' => FEATURE_WF2,
757 wakaba 1.58 role => FEATURE_HTML5_ROLE,
758 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
759     template => FEATURE_HTML5_AT_RISK,
760     title => FEATURE_HTML5_DEFAULT,
761 wakaba 1.49 );
762    
763     my %HTMLM12NCommonAttrStatus = (
764 wakaba 1.61 about => FEATURE_RDFA_LC,
765 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
766 wakaba 1.61 content => FEATURE_RDFA_LC,
767     datatype => FEATURE_RDFA_LC,
768 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
769     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
770 wakaba 1.61 instanceof => FEATURE_RDFA_LC,
771 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
772     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
773     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
774     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
775     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
776     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
777     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
778     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
779     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
780     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
781 wakaba 1.61 property => FEATURE_RDFA_LC,
782     rel => FEATURE_RDFA_LC,
783     resource => FEATURE_RDFA_LC,
784     rev => FEATURE_RDFA_LC,
785 wakaba 1.55 style => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
786     FEATURE_M12N10_REC,
787 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
788 wakaba 1.49 );
789    
790 wakaba 1.1 for (qw/
791     onabort onbeforeunload onblur onchange onclick oncontextmenu
792     ondblclick ondrag ondragend ondragenter ondragleave ondragover
793     ondragstart ondrop onerror onfocus onkeydown onkeypress
794     onkeyup onload onmessage onmousedown onmousemove onmouseout
795     onmouseover onmouseup onmousewheel onresize onscroll onselect
796     onsubmit onunload
797     /) {
798     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
799 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
800 wakaba 1.1 }
801    
802     my $GetHTMLAttrsChecker = sub {
803     my $element_specific_checker = shift;
804 wakaba 1.49 my $element_specific_status = shift;
805 wakaba 1.1 return sub {
806 wakaba 1.40 my ($self, $item, $element_state) = @_;
807     for my $attr (@{$item->{node}->attributes}) {
808 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
809     $attr_ns = '' unless defined $attr_ns;
810     my $attr_ln = $attr->manakai_local_name;
811     my $checker;
812     if ($attr_ns eq '') {
813     $checker = $element_specific_checker->{$attr_ln}
814 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
815 wakaba 1.1 }
816     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
817 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
818 wakaba 1.1 if ($checker) {
819 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
820 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
821 wakaba 1.54 #
822 wakaba 1.1 } else {
823     $self->{onerror}->(node => $attr, level => 'unsupported',
824     type => 'attribute');
825 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
826     }
827     if ($attr_ns eq '') {
828     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
829 wakaba 1.1 }
830 wakaba 1.49 ## TODO: global attribute
831 wakaba 1.1 }
832     };
833     }; # $GetHTMLAttrsChecker
834    
835 wakaba 1.40 my %HTMLChecker = (
836     %Whatpm::ContentChecker::AnyChecker,
837 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
838 wakaba 1.40 );
839    
840     my %HTMLEmptyChecker = (
841     %HTMLChecker,
842     check_child_element => sub {
843     my ($self, $item, $child_el, $child_nsuri, $child_ln,
844     $child_is_transparent, $element_state) = @_;
845     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
846     $self->{onerror}->(node => $child_el,
847     type => 'element not allowed:minus',
848     level => $self->{must_level});
849     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
850     #
851     } else {
852     $self->{onerror}->(node => $child_el,
853     type => 'element not allowed:empty',
854     level => $self->{must_level});
855     }
856     },
857     check_child_text => sub {
858     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
859     if ($has_significant) {
860     $self->{onerror}->(node => $child_node,
861     type => 'character not allowed:empty',
862     level => $self->{must_level});
863     }
864     },
865     );
866    
867     my %HTMLTextChecker = (
868     %HTMLChecker,
869     check_child_element => sub {
870     my ($self, $item, $child_el, $child_nsuri, $child_ln,
871     $child_is_transparent, $element_state) = @_;
872     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
873     $self->{onerror}->(node => $child_el,
874     type => 'element not allowed:minus',
875     level => $self->{must_level});
876     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
877     #
878     } else {
879     $self->{onerror}->(node => $child_el, type => 'element not allowed');
880     }
881     },
882     );
883    
884 wakaba 1.58 ## TODO: Rename as "FlowContent" (HTML5 revision 1261)
885 wakaba 1.40 my %HTMLProseContentChecker = (
886     %HTMLChecker,
887     check_child_element => sub {
888     my ($self, $item, $child_el, $child_nsuri, $child_ln,
889     $child_is_transparent, $element_state) = @_;
890     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
891     $self->{onerror}->(node => $child_el,
892     type => 'element not allowed:minus',
893     level => $self->{must_level});
894     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
895     #
896     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
897     if ($element_state->{has_non_style} or
898     not $child_el->has_attribute_ns (undef, 'scoped')) {
899     $self->{onerror}->(node => $child_el,
900     type => 'element not allowed:prose style',
901     level => $self->{must_level});
902     }
903     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
904 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
905 wakaba 1.40 } else {
906     $element_state->{has_non_style} = 1;
907     $self->{onerror}->(node => $child_el,
908     type => 'element not allowed:prose',
909     level => $self->{must_level})
910     }
911     },
912     check_child_text => sub {
913     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
914     if ($has_significant) {
915     $element_state->{has_non_style} = 1;
916     }
917     },
918     check_end => sub {
919     my ($self, $item, $element_state) = @_;
920     if ($element_state->{has_significant}) {
921 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
922 wakaba 1.40 } elsif ($item->{transparent}) {
923     #
924     } else {
925     $self->{onerror}->(node => $item->{node},
926     level => $self->{should_level},
927     type => 'no significant content');
928     }
929     },
930     );
931    
932     my %HTMLPhrasingContentChecker = (
933     %HTMLChecker,
934     check_child_element => sub {
935     my ($self, $item, $child_el, $child_nsuri, $child_ln,
936     $child_is_transparent, $element_state) = @_;
937     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
938     $self->{onerror}->(node => $child_el,
939     type => 'element not allowed:minus',
940     level => $self->{must_level});
941     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
942     #
943     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
944     #
945     } else {
946     $self->{onerror}->(node => $child_el,
947     type => 'element not allowed:phrasing',
948     level => $self->{must_level});
949     }
950     },
951     check_end => $HTMLProseContentChecker{check_end},
952     ## NOTE: The definition for |li| assumes that the only differences
953     ## between prose and phrasing content checkers are |check_child_element|
954     ## and |check_child_text|.
955     );
956    
957     my %HTMLTransparentChecker = %HTMLProseContentChecker;
958     ## ISSUE: Significant content rule should be applied to transparent element
959 wakaba 1.46 ## with parent?
960 wakaba 1.40
961 wakaba 1.1 our $Element;
962     our $ElementDefault;
963    
964     $Element->{$HTML_NS}->{''} = {
965 wakaba 1.40 %HTMLChecker,
966 wakaba 1.1 };
967    
968     $Element->{$HTML_NS}->{html} = {
969 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
970 wakaba 1.1 is_root => 1,
971 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
972 wakaba 1.16 manifest => $HTMLURIAttrChecker,
973 wakaba 1.1 xmlns => sub {
974     my ($self, $attr) = @_;
975     my $value = $attr->value;
976     unless ($value eq $HTML_NS) {
977     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
978     }
979     unless ($attr->owner_document->manakai_is_html) {
980     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
981     ## TODO: Test
982     }
983 wakaba 1.66
984     ## TODO: Should be resolved?
985     push @{$self->{return}->{uri}->{$value} ||= []},
986     {node => $attr, type => {namespace => 1}};
987 wakaba 1.1 },
988 wakaba 1.67 version => sub {
989     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
990     ## Though DTDs of various versions of HTML define the attribute
991     ## as |#FIXED|, this conformance checker does no check for
992     ## the attribute value, since what kind of check should be done
993     ## is unknown.
994     },
995 wakaba 1.49 }, {
996     %HTMLAttrStatus,
997 wakaba 1.61 class => FEATURE_HTML5_DEFAULT | FEATURE_HTML2X_RFC,
998 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
999     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1000     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1001     manifest => FEATURE_HTML5_DEFAULT,
1002 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1003 wakaba 1.49 version => FEATURE_M12N10_REC,
1004 wakaba 1.50 xmlns => FEATURE_HTML5_DEFAULT,
1005 wakaba 1.1 }),
1006 wakaba 1.40 check_start => sub {
1007     my ($self, $item, $element_state) = @_;
1008     $element_state->{phase} = 'before head';
1009 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1010 wakaba 1.40 },
1011     check_child_element => sub {
1012     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1013     $child_is_transparent, $element_state) = @_;
1014     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1015     $self->{onerror}->(node => $child_el,
1016     type => 'element not allowed:minus',
1017     level => $self->{must_level});
1018     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1019     #
1020     } elsif ($element_state->{phase} eq 'before head') {
1021     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1022     $element_state->{phase} = 'after head';
1023     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1024     $self->{onerror}->(node => $child_el,
1025     type => 'ps element missing:head');
1026     $element_state->{phase} = 'after body';
1027     } else {
1028     $self->{onerror}->(node => $child_el,
1029     type => 'element not allowed');
1030     }
1031     } elsif ($element_state->{phase} eq 'after head') {
1032     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1033     $element_state->{phase} = 'after body';
1034     } else {
1035     $self->{onerror}->(node => $child_el,
1036     type => 'element not allowed');
1037     }
1038     } elsif ($element_state->{phase} eq 'after body') {
1039     $self->{onerror}->(node => $child_el,
1040     type => 'element not allowed');
1041     } else {
1042     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1043     }
1044     },
1045     check_child_text => sub {
1046     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1047     if ($has_significant) {
1048     $self->{onerror}->(node => $child_node,
1049     type => 'character not allowed');
1050     }
1051     },
1052     check_end => sub {
1053     my ($self, $item, $element_state) = @_;
1054     if ($element_state->{phase} eq 'after body') {
1055     #
1056     } elsif ($element_state->{phase} eq 'before head') {
1057     $self->{onerror}->(node => $item->{node},
1058     type => 'child element missing:head');
1059     $self->{onerror}->(node => $item->{node},
1060     type => 'child element missing:body');
1061     } elsif ($element_state->{phase} eq 'after head') {
1062     $self->{onerror}->(node => $item->{node},
1063     type => 'child element missing:body');
1064     } else {
1065     die "check_end: Bad |html| phase: $element_state->{phase}";
1066     }
1067 wakaba 1.1
1068 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1069     },
1070     };
1071 wakaba 1.25
1072 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1073 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1074 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1075     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1076     }, {
1077 wakaba 1.49 %HTMLAttrStatus,
1078 wakaba 1.61 class => FEATURE_HTML5_DEFAULT | FEATURE_HTML2X_RFC,
1079 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1080     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1081     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1082 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1083     }),
1084 wakaba 1.40 check_child_element => sub {
1085     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1086     $child_is_transparent, $element_state) = @_;
1087     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1088     $self->{onerror}->(node => $child_el,
1089     type => 'element not allowed:minus',
1090     level => $self->{must_level});
1091     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1092     #
1093     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1094     unless ($element_state->{has_title}) {
1095     $element_state->{has_title} = 1;
1096     } else {
1097     $self->{onerror}->(node => $child_el,
1098     type => 'element not allowed:head title',
1099     level => $self->{must_level});
1100     }
1101     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1102     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1103     $self->{onerror}->(node => $child_el,
1104     type => 'element not allowed:head style',
1105     level => $self->{must_level});
1106 wakaba 1.1 }
1107 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1108     #
1109    
1110     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1111     ## a |meta| element with none of |charset|, |name|,
1112     ## or |http-equiv| attribute is not allowed. It is non-conforming
1113     ## anyway.
1114 wakaba 1.56
1115     ## TODO: |form| MUST be empty and in XML [WF2].
1116 wakaba 1.40 } else {
1117     $self->{onerror}->(node => $child_el,
1118     type => 'element not allowed:metadata',
1119     level => $self->{must_level});
1120     }
1121     $element_state->{in_head_original} = $self->{flag}->{in_head};
1122     $self->{flag}->{in_head} = 1;
1123     },
1124     check_child_text => sub {
1125     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1126     if ($has_significant) {
1127     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1128 wakaba 1.1 }
1129 wakaba 1.40 },
1130     check_end => sub {
1131     my ($self, $item, $element_state) = @_;
1132     unless ($element_state->{has_title}) {
1133     $self->{onerror}->(node => $item->{node},
1134     type => 'child element missing:title');
1135 wakaba 1.1 }
1136 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1137 wakaba 1.1
1138 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1139 wakaba 1.1 },
1140     };
1141    
1142 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1143     %HTMLTextChecker,
1144 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1145     check_attrs => $GetHTMLAttrsChecker->({}, {
1146     %HTMLAttrStatus,
1147 wakaba 1.61 class => FEATURE_HTML5_DEFAULT | FEATURE_HTML2X_RFC,
1148 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1149     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1150     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1151 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1152 wakaba 1.49 }),
1153 wakaba 1.40 };
1154 wakaba 1.1
1155 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1156 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1157 wakaba 1.40 %HTMLEmptyChecker,
1158     check_attrs => sub {
1159     my ($self, $item, $element_state) = @_;
1160 wakaba 1.1
1161 wakaba 1.40 if ($self->{has_base}) {
1162     $self->{onerror}->(node => $item->{node},
1163     type => 'element not allowed:base');
1164     } else {
1165     $self->{has_base} = 1;
1166 wakaba 1.29 }
1167    
1168 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1169     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1170 wakaba 1.14
1171     if ($self->{has_uri_attr} and $has_href) {
1172 wakaba 1.4 ## ISSUE: Are these examples conforming?
1173     ## <head profile="a b c"><base href> (except for |profile|'s
1174     ## non-conformance)
1175     ## <title xml:base="relative"/><base href/> (maybe it should be)
1176     ## <unknown xmlns="relative"/><base href/> (assuming that
1177     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1178     ## <style>@import 'relative';</style><base href>
1179     ## <script>location.href = 'relative';</script><base href>
1180 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1181     ## an exception.
1182 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1183 wakaba 1.4 type => 'basehref after URI attribute');
1184     }
1185 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1186 wakaba 1.4 ## ISSUE: Are these examples conforming?
1187     ## <head><title xlink:href=""/><base target="name"/></head>
1188     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1189     ## (assuming that |xbl:xbl| is allowed before |base|)
1190     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1191     ## <link href=""/><base target="name"/>
1192     ## <link rel=unknown href=""><base target=name>
1193 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1194 wakaba 1.4 type => 'basetarget after hyperlink');
1195     }
1196    
1197 wakaba 1.14 if (not $has_href and not $has_target) {
1198 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1199 wakaba 1.14 type => 'attribute missing:href|target');
1200     }
1201    
1202 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
1203    
1204 wakaba 1.4 return $GetHTMLAttrsChecker->({
1205     href => $HTMLURIAttrChecker,
1206     target => $HTMLTargetAttrChecker,
1207 wakaba 1.49 }, {
1208     %HTMLAttrStatus,
1209 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1210     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1211     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1212 wakaba 1.40 })->($self, $item, $element_state);
1213 wakaba 1.4 },
1214 wakaba 1.1 };
1215    
1216     $Element->{$HTML_NS}->{link} = {
1217 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1218 wakaba 1.40 %HTMLEmptyChecker,
1219     check_attrs => sub {
1220     my ($self, $item, $element_state) = @_;
1221 wakaba 1.1 $GetHTMLAttrsChecker->({
1222     href => $HTMLURIAttrChecker,
1223 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
1224 wakaba 1.1 media => $HTMLMQAttrChecker,
1225     hreflang => $HTMLLanguageTagAttrChecker,
1226     type => $HTMLIMTAttrChecker,
1227     ## NOTE: Though |title| has special semantics,
1228     ## syntactically same as the |title| as global attribute.
1229 wakaba 1.49 }, {
1230     %HTMLAttrStatus,
1231     %HTMLM12NCommonAttrStatus,
1232     charset => FEATURE_M12N10_REC,
1233 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1234     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1235     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1236     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1237 wakaba 1.61 methods => FEATURE_HTML20_RFC,
1238 wakaba 1.50 rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1239 wakaba 1.49 rev => FEATURE_M12N10_REC,
1240 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
1241 wakaba 1.49 target => FEATURE_M12N10_REC,
1242 wakaba 1.50 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1243 wakaba 1.61 urn => FEATURE_HTML20_RFC,
1244 wakaba 1.40 })->($self, $item, $element_state);
1245     if ($item->{node}->has_attribute_ns (undef, 'href')) {
1246     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
1247 wakaba 1.4 } else {
1248 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1249 wakaba 1.1 type => 'attribute missing:href');
1250     }
1251 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
1252     $self->{onerror}->(node => $item->{node},
1253 wakaba 1.1 type => 'attribute missing:rel');
1254     }
1255     },
1256     };
1257    
1258     $Element->{$HTML_NS}->{meta} = {
1259 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1260 wakaba 1.40 %HTMLEmptyChecker,
1261     check_attrs => sub {
1262     my ($self, $item, $element_state) = @_;
1263 wakaba 1.1 my $name_attr;
1264     my $http_equiv_attr;
1265     my $charset_attr;
1266     my $content_attr;
1267 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1268 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1269     $attr_ns = '' unless defined $attr_ns;
1270     my $attr_ln = $attr->manakai_local_name;
1271     my $checker;
1272     if ($attr_ns eq '') {
1273     if ($attr_ln eq 'content') {
1274     $content_attr = $attr;
1275     $checker = 1;
1276     } elsif ($attr_ln eq 'name') {
1277     $name_attr = $attr;
1278     $checker = 1;
1279     } elsif ($attr_ln eq 'http-equiv') {
1280     $http_equiv_attr = $attr;
1281     $checker = 1;
1282     } elsif ($attr_ln eq 'charset') {
1283     $charset_attr = $attr;
1284     $checker = 1;
1285 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
1286     $checker = sub {};
1287     ## NOTE: According to HTML4, values for the |scheme| attribute
1288     ## depend on |name| attribute and |profile| of |head|. Otherwise
1289     ## it is "cdata". The only profile with any scheme value defined
1290     ## is <http://dublincore.org/documents/dcq-html/> (and those
1291     ## references that profile; see
1292     ## <http://suika.fam.cx/gate/2005/sw/scheme#anchor-55> for more
1293     ## information).
1294     ## TODO: Should we implement the checking against the profile above?
1295     ## (But we don't want to implement its namespace bits. It is
1296     ## suck and obsolete in favor of HTML5's new ecosystem.)
1297 wakaba 1.1 } else {
1298     $checker = $HTMLAttrChecker->{$attr_ln}
1299 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
1300 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
1301     }
1302     } else {
1303     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1304     || $AttrChecker->{$attr_ns}->{''};
1305     }
1306 wakaba 1.62
1307     my $status = {
1308     %HTMLAttrStatus,
1309     charset => FEATURE_HTML5_DEFAULT,
1310     content => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1311     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1312     'http-equiv' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1313     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1314     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1315     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1316     scheme => FEATURE_M12N10_REC,
1317     }->{$attr_ln};
1318    
1319 wakaba 1.1 if ($checker) {
1320 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
1321 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
1322 wakaba 1.54 #
1323 wakaba 1.1 } else {
1324     $self->{onerror}->(node => $attr, level => 'unsupported',
1325     type => 'attribute');
1326 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1327     }
1328    
1329     if ($attr_ns eq '') {
1330 wakaba 1.62 $self->_attr_status_info ($attr, $status);
1331 wakaba 1.1 }
1332     }
1333    
1334     if (defined $name_attr) {
1335     if (defined $http_equiv_attr) {
1336     $self->{onerror}->(node => $http_equiv_attr,
1337     type => 'attribute not allowed');
1338     } elsif (defined $charset_attr) {
1339     $self->{onerror}->(node => $charset_attr,
1340     type => 'attribute not allowed');
1341     }
1342     my $metadata_name = $name_attr->value;
1343     my $metadata_value;
1344     if (defined $content_attr) {
1345     $metadata_value = $content_attr->value;
1346     } else {
1347 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1348 wakaba 1.1 type => 'attribute missing:content');
1349     $metadata_value = '';
1350     }
1351     } elsif (defined $http_equiv_attr) {
1352     if (defined $charset_attr) {
1353     $self->{onerror}->(node => $charset_attr,
1354     type => 'attribute not allowed');
1355     }
1356     unless (defined $content_attr) {
1357 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1358 wakaba 1.1 type => 'attribute missing:content');
1359     }
1360     } elsif (defined $charset_attr) {
1361     if (defined $content_attr) {
1362     $self->{onerror}->(node => $content_attr,
1363     type => 'attribute not allowed');
1364     }
1365     } else {
1366     if (defined $content_attr) {
1367     $self->{onerror}->(node => $content_attr,
1368     type => 'attribute not allowed');
1369 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1370 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1371     } else {
1372 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1373 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1374     }
1375     }
1376    
1377 wakaba 1.32 my $check_charset_decl = sub () {
1378 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1379 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1380     for my $el (@{$parent->child_nodes}) {
1381     next unless $el->node_type == 1; # ELEMENT_NODE
1382 wakaba 1.40 unless ($el eq $item->{node}) {
1383 wakaba 1.29 ## NOTE: Not the first child element.
1384 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1385 wakaba 1.32 type => 'element not allowed:meta charset',
1386     level => $self->{must_level});
1387 wakaba 1.29 }
1388     last;
1389     ## NOTE: Entity references are not supported.
1390     }
1391     } else {
1392 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1393 wakaba 1.32 type => 'element not allowed:meta charset',
1394     level => $self->{must_level});
1395 wakaba 1.29 }
1396    
1397 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1398     $self->{onerror}->(node => $item->{node},
1399 wakaba 1.32 type => 'in XML:charset',
1400     level => $self->{must_level});
1401 wakaba 1.1 }
1402 wakaba 1.32 }; # $check_charset_decl
1403 wakaba 1.21
1404 wakaba 1.32 my $check_charset = sub ($$) {
1405     my ($attr, $charset_value) = @_;
1406 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1407     ## is not explicitly spelled in the HTML5 spec, the Character Set
1408     ## registry of IANA, which is referenced from HTML5 spec, says that
1409     ## charset name is case-insensitive.
1410     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1411    
1412     require Message::Charset::Info;
1413     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1414 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1415 wakaba 1.21 if (defined $ic) {
1416     ## TODO: Test for this case
1417     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1418     if ($charset ne $ic_charset) {
1419 wakaba 1.32 $self->{onerror}->(node => $attr,
1420 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1421 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1422     level => $self->{must_level});
1423 wakaba 1.21 }
1424     } else {
1425     ## NOTE: MUST, but not checkable, since the document is not originally
1426     ## in serialized form (or the parser does not preserve the input
1427     ## encoding information).
1428 wakaba 1.32 $self->{onerror}->(node => $attr,
1429     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1430 wakaba 1.21 level => 'unsupported');
1431     }
1432    
1433     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1434     ## Syntactically valid and registered? What about x-charset names?
1435     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1436     ($charset_value)) {
1437 wakaba 1.32 $self->{onerror}->(node => $attr,
1438     type => 'charset:syntax error:'.$charset_value, ## TODO
1439     level => $self->{must_level});
1440 wakaba 1.21 }
1441    
1442     if ($charset) {
1443     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1444     ## with no "preferred MIME name" label)?
1445     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1446     if (($charset_status &
1447     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1448     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1449 wakaba 1.32 $self->{onerror}->(node => $attr,
1450 wakaba 1.21 type => 'charset:not preferred:'.
1451 wakaba 1.32 $charset_value, ## TODO
1452     level => $self->{must_level});
1453 wakaba 1.21 }
1454     if (($charset_status &
1455     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1456     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1457     if ($charset_value =~ /^x-/) {
1458 wakaba 1.32 $self->{onerror}->(node => $attr,
1459     type => 'charset:private:'.$charset_value, ## TODO
1460 wakaba 1.21 level => $self->{good_level});
1461     } else {
1462 wakaba 1.32 $self->{onerror}->(node => $attr,
1463 wakaba 1.21 type => 'charset:not registered:'.
1464 wakaba 1.32 $charset_value, ## TODO
1465 wakaba 1.21 level => $self->{good_level});
1466     }
1467     }
1468     } elsif ($charset_value =~ /^x-/) {
1469 wakaba 1.32 $self->{onerror}->(node => $attr,
1470     type => 'charset:private:'.$charset_value, ## TODO
1471 wakaba 1.21 level => $self->{good_level});
1472     } else {
1473 wakaba 1.32 $self->{onerror}->(node => $attr,
1474     type => 'charset:not registered:'.$charset_value, ## TODO
1475 wakaba 1.21 level => $self->{good_level});
1476     }
1477    
1478 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1479     $self->{onerror}->(node => $attr,
1480 wakaba 1.22 type => 'character reference in charset',
1481     level => $self->{must_level});
1482     }
1483 wakaba 1.32 }; # $check_charset
1484    
1485     ## TODO: metadata conformance
1486    
1487     ## TODO: pragma conformance
1488     if (defined $http_equiv_attr) { ## An enumerated attribute
1489     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1490     if ({
1491     'refresh' => 1,
1492     'default-style' => 1,
1493     }->{$keyword}) {
1494     #
1495 wakaba 1.33
1496     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1497 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1498 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
1499 wakaba 1.33
1500 wakaba 1.32 $check_charset_decl->();
1501     if ($content_attr) {
1502     my $content = $content_attr->value;
1503 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
1504     [\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
1505     =(.+)\z!sx) {
1506 wakaba 1.32 $check_charset->($content_attr, $1);
1507     } else {
1508     $self->{onerror}->(node => $content_attr,
1509     type => 'meta content-type syntax error',
1510     level => $self->{must_level});
1511     }
1512     }
1513     } else {
1514     $self->{onerror}->(node => $http_equiv_attr,
1515     type => 'enumerated:invalid');
1516     }
1517     }
1518    
1519     if (defined $charset_attr) {
1520     $check_charset_decl->();
1521     $check_charset->($charset_attr, $charset_attr->value);
1522 wakaba 1.1 }
1523     },
1524     };
1525    
1526     $Element->{$HTML_NS}->{style} = {
1527 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1528 wakaba 1.40 %HTMLChecker,
1529     check_attrs => $GetHTMLAttrsChecker->({
1530 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1531     media => $HTMLMQAttrChecker,
1532     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1533     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1534     ## not different
1535 wakaba 1.49 }, {
1536     %HTMLAttrStatus,
1537 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1538     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1539     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1540     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1541     scoped => FEATURE_HTML5_DEFAULT,
1542     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1543     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1544 wakaba 1.1 }),
1545 wakaba 1.40 check_start => sub {
1546     my ($self, $item, $element_state) = @_;
1547    
1548 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1549 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1550 wakaba 1.27 if (not defined $type or
1551     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1552 wakaba 1.40 $element_state->{allow_element} = 0;
1553     $element_state->{style_type} = 'text/css';
1554     } else {
1555     $element_state->{allow_element} = 1; # unknown
1556     $element_state->{style_type} = $type; ## TODO: $type normalization
1557     }
1558     },
1559     check_child_element => sub {
1560     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1561     $child_is_transparent, $element_state) = @_;
1562     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1563     $self->{onerror}->(node => $child_el,
1564     type => 'element not allowed:minus',
1565     level => $self->{must_level});
1566     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1567     #
1568     } elsif ($element_state->{allow_element}) {
1569     #
1570     } else {
1571     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1572     }
1573     },
1574     check_child_text => sub {
1575     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1576     $element_state->{text} .= $child_node->text_content;
1577     },
1578     check_end => sub {
1579     my ($self, $item, $element_state) = @_;
1580     if ($element_state->{style_type} eq 'text/css') {
1581     $self->{onsubdoc}->({s => $element_state->{text},
1582     container_node => $item->{node},
1583 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1584 wakaba 1.27 } else {
1585 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1586     type => 'style:'.$element_state->{style_type});
1587 wakaba 1.27 }
1588 wakaba 1.40
1589     $HTMLChecker{check_end}->(@_);
1590 wakaba 1.1 },
1591     };
1592 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1593 wakaba 1.1
1594     $Element->{$HTML_NS}->{body} = {
1595 wakaba 1.40 %HTMLProseContentChecker,
1596 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1597     check_attrs => $GetHTMLAttrsChecker->({}, {
1598     %HTMLAttrStatus,
1599     %HTMLM12NCommonAttrStatus,
1600     alink => FEATURE_M12N10_REC_DEPRECATED,
1601     background => FEATURE_M12N10_REC_DEPRECATED,
1602     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
1603 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1604 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
1605 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1606     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1607 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
1608     vlink => FEATURE_M12N10_REC_DEPRECATED,
1609     }),
1610 wakaba 1.1 };
1611    
1612     $Element->{$HTML_NS}->{section} = {
1613 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1614 wakaba 1.40 %HTMLProseContentChecker,
1615 wakaba 1.1 };
1616    
1617     $Element->{$HTML_NS}->{nav} = {
1618 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1619 wakaba 1.40 %HTMLProseContentChecker,
1620 wakaba 1.1 };
1621    
1622     $Element->{$HTML_NS}->{article} = {
1623 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1624 wakaba 1.40 %HTMLProseContentChecker,
1625 wakaba 1.1 };
1626    
1627     $Element->{$HTML_NS}->{blockquote} = {
1628 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1629 wakaba 1.40 %HTMLProseContentChecker,
1630     check_attrs => $GetHTMLAttrsChecker->({
1631 wakaba 1.1 cite => $HTMLURIAttrChecker,
1632 wakaba 1.49 }, {
1633     %HTMLAttrStatus,
1634     %HTMLM12NCommonAttrStatus,
1635 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1636 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1637     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1638 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1639 wakaba 1.1 }),
1640 wakaba 1.66 check_start => sub {
1641     my ($self, $item, $element_state) = @_;
1642    
1643     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
1644     },
1645 wakaba 1.1 };
1646    
1647     $Element->{$HTML_NS}->{aside} = {
1648 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1649 wakaba 1.40 %HTMLProseContentChecker,
1650 wakaba 1.1 };
1651    
1652     $Element->{$HTML_NS}->{h1} = {
1653 wakaba 1.40 %HTMLPhrasingContentChecker,
1654 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1655     check_attrs => $GetHTMLAttrsChecker->({}, {
1656     %HTMLAttrStatus,
1657     %HTMLM12NCommonAttrStatus,
1658     align => FEATURE_M12N10_REC_DEPRECATED,
1659 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1660 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1661 wakaba 1.49 }),
1662 wakaba 1.40 check_start => sub {
1663     my ($self, $item, $element_state) = @_;
1664     $self->{flag}->{has_hn} = 1;
1665 wakaba 1.1 },
1666     };
1667    
1668 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1669 wakaba 1.1
1670 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1671 wakaba 1.1
1672 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1673 wakaba 1.1
1674 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1675 wakaba 1.1
1676 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1677 wakaba 1.1
1678 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1679    
1680 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1681 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1682 wakaba 1.40 %HTMLProseContentChecker,
1683     check_start => sub {
1684     my ($self, $item, $element_state) = @_;
1685     $self->_add_minus_elements ($element_state,
1686     {$HTML_NS => {qw/header 1 footer 1/}},
1687 wakaba 1.58 $HTMLSectioningContent);
1688 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1689     $self->{flag}->{has_hn} = 0;
1690     },
1691     check_end => sub {
1692     my ($self, $item, $element_state) = @_;
1693     $self->_remove_minus_elements ($element_state);
1694     unless ($self->{flag}->{has_hn}) {
1695     $self->{onerror}->(node => $item->{node},
1696     type => 'element missing:hn');
1697     }
1698     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1699 wakaba 1.1
1700 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1701 wakaba 1.1 },
1702 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1703 wakaba 1.1 };
1704    
1705     $Element->{$HTML_NS}->{footer} = {
1706 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1707 wakaba 1.40 %HTMLProseContentChecker,
1708     check_start => sub {
1709     my ($self, $item, $element_state) = @_;
1710     $self->_add_minus_elements ($element_state,
1711     {$HTML_NS => {footer => 1}},
1712 wakaba 1.58 $HTMLSectioningContent,
1713 wakaba 1.57 $HTMLHeadingContent);
1714 wakaba 1.40 },
1715     check_end => sub {
1716     my ($self, $item, $element_state) = @_;
1717     $self->_remove_minus_elements ($element_state);
1718 wakaba 1.1
1719 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1720 wakaba 1.1 },
1721     };
1722    
1723     $Element->{$HTML_NS}->{address} = {
1724 wakaba 1.40 %HTMLProseContentChecker,
1725 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1726     check_attrs => $GetHTMLAttrsChecker->({}, {
1727     %HTMLAttrStatus,
1728     %HTMLM12NCommonAttrStatus,
1729 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1730 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1731 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1732     sdapref => FEATURE_HTML20_RFC,
1733 wakaba 1.49 }),
1734 wakaba 1.40 check_start => sub {
1735     my ($self, $item, $element_state) = @_;
1736     $self->_add_minus_elements ($element_state,
1737     {$HTML_NS => {footer => 1, address => 1}},
1738     $HTMLSectioningContent, $HTMLHeadingContent);
1739     },
1740     check_end => sub {
1741     my ($self, $item, $element_state) = @_;
1742     $self->_remove_minus_elements ($element_state);
1743 wakaba 1.29
1744 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1745 wakaba 1.29 },
1746 wakaba 1.1 };
1747    
1748     $Element->{$HTML_NS}->{p} = {
1749 wakaba 1.40 %HTMLPhrasingContentChecker,
1750 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1751     check_attrs => $GetHTMLAttrsChecker->({}, {
1752     %HTMLAttrStatus,
1753     %HTMLM12NCommonAttrStatus,
1754     align => FEATURE_M12N10_REC_DEPRECATED,
1755 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1756 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1757 wakaba 1.49 }),
1758 wakaba 1.1 };
1759    
1760     $Element->{$HTML_NS}->{hr} = {
1761 wakaba 1.40 %HTMLEmptyChecker,
1762 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1763     check_attrs => $GetHTMLAttrsChecker->({}, {
1764     %HTMLAttrStatus,
1765     %HTMLM12NCommonAttrStatus,
1766     align => FEATURE_M12N10_REC_DEPRECATED,
1767 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1768 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
1769 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
1770 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
1771     width => FEATURE_M12N10_REC_DEPRECATED,
1772     }),
1773 wakaba 1.1 };
1774    
1775     $Element->{$HTML_NS}->{br} = {
1776 wakaba 1.40 %HTMLEmptyChecker,
1777 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1778     check_attrs => $GetHTMLAttrsChecker->({}, {
1779     %HTMLAttrStatus,
1780 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1781 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
1782 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1783 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
1784 wakaba 1.49 style => FEATURE_XHTML10_REC,
1785 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1786 wakaba 1.49 }),
1787 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1788     ## (This requirement is semantic so that we cannot check.)
1789 wakaba 1.1 };
1790    
1791     $Element->{$HTML_NS}->{dialog} = {
1792 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1793 wakaba 1.40 %HTMLChecker,
1794     check_start => sub {
1795     my ($self, $item, $element_state) = @_;
1796     $element_state->{phase} = 'before dt';
1797     },
1798     check_child_element => sub {
1799     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1800     $child_is_transparent, $element_state) = @_;
1801     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1802     $self->{onerror}->(node => $child_el,
1803     type => 'element not allowed:minus',
1804     level => $self->{must_level});
1805     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1806     #
1807     } elsif ($element_state->{phase} eq 'before dt') {
1808     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1809     $element_state->{phase} = 'before dd';
1810     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1811     $self->{onerror}
1812     ->(node => $child_el, type => 'ps element missing:dt');
1813     $element_state->{phase} = 'before dt';
1814     } else {
1815     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1816     }
1817     } elsif ($element_state->{phase} eq 'before dd') {
1818     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1819     $element_state->{phase} = 'before dt';
1820     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1821     $self->{onerror}
1822     ->(node => $child_el, type => 'ps element missing:dd');
1823     $element_state->{phase} = 'before dd';
1824     } else {
1825     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1826 wakaba 1.1 }
1827 wakaba 1.40 } else {
1828     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1829     }
1830     },
1831     check_child_text => sub {
1832     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1833     if ($has_significant) {
1834     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1835 wakaba 1.1 }
1836 wakaba 1.40 },
1837     check_end => sub {
1838     my ($self, $item, $element_state) = @_;
1839     if ($element_state->{phase} eq 'before dd') {
1840     $self->{onerror}->(node => $item->{node},
1841     type => 'child element missing:dd');
1842 wakaba 1.1 }
1843 wakaba 1.40
1844     $HTMLChecker{check_end}->(@_);
1845 wakaba 1.1 },
1846     };
1847    
1848     $Element->{$HTML_NS}->{pre} = {
1849 wakaba 1.40 %HTMLPhrasingContentChecker,
1850 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1851     check_attrs => $GetHTMLAttrsChecker->({}, {
1852     %HTMLAttrStatus,
1853     %HTMLM12NCommonAttrStatus,
1854 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1855 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1856 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
1857     }),
1858 wakaba 1.1 };
1859    
1860     $Element->{$HTML_NS}->{ol} = {
1861 wakaba 1.40 %HTMLChecker,
1862 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1863 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1864 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1865 wakaba 1.53 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
1866 wakaba 1.49 }, {
1867     %HTMLAttrStatus,
1868     %HTMLM12NCommonAttrStatus,
1869 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1870 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
1871 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1872 wakaba 1.53 reversed => FEATURE_HTML5_DEFAULT,
1873 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1874 wakaba 1.54 #start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
1875     start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1876 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1877 wakaba 1.1 }),
1878 wakaba 1.40 check_child_element => sub {
1879     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1880     $child_is_transparent, $element_state) = @_;
1881     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1882     $self->{onerror}->(node => $child_el,
1883     type => 'element not allowed:minus',
1884     level => $self->{must_level});
1885     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1886     #
1887     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1888     #
1889     } else {
1890     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1891 wakaba 1.1 }
1892 wakaba 1.40 },
1893     check_child_text => sub {
1894     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1895     if ($has_significant) {
1896     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1897 wakaba 1.1 }
1898     },
1899     };
1900    
1901     $Element->{$HTML_NS}->{ul} = {
1902 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1903 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1904     check_attrs => $GetHTMLAttrsChecker->({}, {
1905     %HTMLAttrStatus,
1906     %HTMLM12NCommonAttrStatus,
1907 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1908 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
1909 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1910 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1911 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1912     }),
1913 wakaba 1.1 };
1914    
1915 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
1916     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
1917     %{$Element->{$HTML_NS}->{ul}},
1918     status => FEATURE_M12N10_REC_DEPRECATED,
1919     check_attrs => $GetHTMLAttrsChecker->({}, {
1920     %HTMLAttrStatus,
1921     %HTMLM12NCommonAttrStatus,
1922     align => FEATURE_HTML2X_RFC,
1923     compact => FEATURE_M12N10_REC_DEPRECATED,
1924     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1925     sdaform => FEATURE_HTML20_RFC,
1926     sdapref => FEATURE_HTML20_RFC,
1927     }),
1928     };
1929    
1930 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
1931 wakaba 1.40 %HTMLProseContentChecker,
1932 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1933 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1934 wakaba 1.49 value => sub {
1935 wakaba 1.1 my ($self, $attr) = @_;
1936     my $parent = $attr->owner_element->manakai_parent_element;
1937     if (defined $parent) {
1938     my $parent_ns = $parent->namespace_uri;
1939     $parent_ns = '' unless defined $parent_ns;
1940     my $parent_ln = $parent->manakai_local_name;
1941     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1942     $self->{onerror}->(node => $attr, level => 'unsupported',
1943     type => 'attribute');
1944     }
1945     }
1946     $HTMLIntegerAttrChecker->($self, $attr);
1947 wakaba 1.49 }, ## TODO: test
1948     }, {
1949     %HTMLAttrStatus,
1950     %HTMLM12NCommonAttrStatus,
1951 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1952 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1953 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1954 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1955 wakaba 1.55 #value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
1956     # FEATURE_M12N10_REC_DEPRECATED,
1957     value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
1958     FEATURE_M12N10_REC,
1959 wakaba 1.1 }),
1960 wakaba 1.40 check_child_element => sub {
1961     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1962     $child_is_transparent, $element_state) = @_;
1963     if ($self->{flag}->{in_menu}) {
1964     $HTMLPhrasingContentChecker{check_child_element}->(@_);
1965     } else {
1966     $HTMLProseContentChecker{check_child_element}->(@_);
1967     }
1968     },
1969     check_child_text => sub {
1970     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1971     if ($self->{flag}->{in_menu}) {
1972     $HTMLPhrasingContentChecker{check_child_text}->(@_);
1973 wakaba 1.1 } else {
1974 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
1975 wakaba 1.1 }
1976     },
1977     };
1978    
1979     $Element->{$HTML_NS}->{dl} = {
1980 wakaba 1.40 %HTMLChecker,
1981 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1982     check_attrs => $GetHTMLAttrsChecker->({}, {
1983     %HTMLAttrStatus,
1984     %HTMLM12NCommonAttrStatus,
1985     compact => FEATURE_M12N10_REC_DEPRECATED,
1986 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1987 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1988     sdapref => FEATURE_HTML20_RFC,
1989 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1990     }),
1991 wakaba 1.40 check_start => sub {
1992     my ($self, $item, $element_state) = @_;
1993     $element_state->{phase} = 'before dt';
1994     },
1995     check_child_element => sub {
1996     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1997     $child_is_transparent, $element_state) = @_;
1998     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1999     $self->{onerror}->(node => $child_el,
2000     type => 'element not allowed:minus',
2001     level => $self->{must_level});
2002     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2003     #
2004     } elsif ($element_state->{phase} eq 'in dds') {
2005     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2006     #$element_state->{phase} = 'in dds';
2007     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2008     $element_state->{phase} = 'in dts';
2009     } else {
2010     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2011     }
2012     } elsif ($element_state->{phase} eq 'in dts') {
2013     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2014     #$element_state->{phase} = 'in dts';
2015     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2016     $element_state->{phase} = 'in dds';
2017     } else {
2018     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2019     }
2020     } elsif ($element_state->{phase} eq 'before dt') {
2021     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2022     $element_state->{phase} = 'in dts';
2023     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2024     $self->{onerror}
2025     ->(node => $child_el, type => 'ps element missing:dt');
2026     $element_state->{phase} = 'in dds';
2027     } else {
2028     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2029 wakaba 1.1 }
2030 wakaba 1.40 } else {
2031     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
2032 wakaba 1.1 }
2033 wakaba 1.40 },
2034     check_child_text => sub {
2035     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2036     if ($has_significant) {
2037     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2038     }
2039     },
2040     check_end => sub {
2041     my ($self, $item, $element_state) = @_;
2042     if ($element_state->{phase} eq 'in dts') {
2043     $self->{onerror}->(node => $item->{node},
2044     type => 'child element missing:dd');
2045 wakaba 1.1 }
2046    
2047 wakaba 1.40 $HTMLChecker{check_end}->(@_);
2048 wakaba 1.1 },
2049     };
2050    
2051     $Element->{$HTML_NS}->{dt} = {
2052 wakaba 1.40 %HTMLPhrasingContentChecker,
2053 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2054     check_attrs => $GetHTMLAttrsChecker->({}, {
2055     %HTMLAttrStatus,
2056     %HTMLM12NCommonAttrStatus,
2057 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2058 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2059 wakaba 1.49 }),
2060 wakaba 1.1 };
2061    
2062     $Element->{$HTML_NS}->{dd} = {
2063 wakaba 1.40 %HTMLProseContentChecker,
2064 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2065     check_attrs => $GetHTMLAttrsChecker->({}, {
2066     %HTMLAttrStatus,
2067     %HTMLM12NCommonAttrStatus,
2068 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2069 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2070 wakaba 1.49 }),
2071 wakaba 1.1 };
2072    
2073     $Element->{$HTML_NS}->{a} = {
2074 wakaba 1.40 %HTMLPhrasingContentChecker,
2075 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2076 wakaba 1.40 check_attrs => sub {
2077     my ($self, $item, $element_state) = @_;
2078 wakaba 1.1 my %attr;
2079 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2080 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2081     $attr_ns = '' unless defined $attr_ns;
2082     my $attr_ln = $attr->manakai_local_name;
2083     my $checker;
2084     if ($attr_ns eq '') {
2085     $checker = {
2086 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
2087 wakaba 1.1 target => $HTMLTargetAttrChecker,
2088     href => $HTMLURIAttrChecker,
2089     ping => $HTMLSpaceURIsAttrChecker,
2090 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
2091 wakaba 1.1 media => $HTMLMQAttrChecker,
2092     hreflang => $HTMLLanguageTagAttrChecker,
2093     type => $HTMLIMTAttrChecker,
2094     }->{$attr_ln};
2095     if ($checker) {
2096     $attr{$attr_ln} = $attr;
2097     } else {
2098     $checker = $HTMLAttrChecker->{$attr_ln};
2099     }
2100     }
2101     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2102     || $AttrChecker->{$attr_ns}->{''};
2103 wakaba 1.62
2104     my $status = {
2105     %HTMLAttrStatus,
2106     %HTMLM12NCommonAttrStatus,
2107     accesskey => FEATURE_M12N10_REC,
2108     charset => FEATURE_M12N10_REC,
2109     coords => FEATURE_M12N10_REC,
2110     cryptopts => FEATURE_RFC2659,
2111     dn => FEATURE_RFC2659,
2112     href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2113     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2114     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2115     media => FEATURE_HTML5_DEFAULT,
2116     methods => FEATURE_HTML20_RFC,
2117     name => FEATURE_M12N10_REC_DEPRECATED,
2118     nonce => FEATURE_RFC2659,
2119     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2120     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2121     ping => FEATURE_HTML5_DEFAULT,
2122     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2123     rev => FEATURE_M12N10_REC,
2124     sdapref => FEATURE_HTML20_RFC,
2125     shape => FEATURE_M12N10_REC,
2126     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2127     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2128     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2129     urn => FEATURE_HTML20_RFC,
2130     }->{$attr_ln};
2131    
2132 wakaba 1.1 if ($checker) {
2133 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2134 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2135 wakaba 1.54 #
2136 wakaba 1.1 } else {
2137     $self->{onerror}->(node => $attr, level => 'unsupported',
2138     type => 'attribute');
2139 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
2140 wakaba 1.1 }
2141 wakaba 1.49
2142     if ($attr_ns eq '') {
2143 wakaba 1.62 $self->_attr_status_info ($attr, $status);
2144 wakaba 1.49 }
2145 wakaba 1.1 }
2146    
2147 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
2148 wakaba 1.4 if (defined $attr{href}) {
2149     $self->{has_hyperlink_element} = 1;
2150 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
2151 wakaba 1.4 } else {
2152 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
2153     if (defined $attr{$_}) {
2154     $self->{onerror}->(node => $attr{$_},
2155     type => 'attribute not allowed');
2156     }
2157     }
2158     }
2159 wakaba 1.66
2160     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
2161 wakaba 1.1 },
2162 wakaba 1.40 check_start => sub {
2163     my ($self, $item, $element_state) = @_;
2164     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
2165     },
2166     check_end => sub {
2167     my ($self, $item, $element_state) = @_;
2168     $self->_remove_minus_elements ($element_state);
2169 wakaba 1.59 delete $self->{flag}->{in_a_href}
2170     unless $element_state->{in_a_href_original};
2171 wakaba 1.1
2172 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2173 wakaba 1.1 },
2174     };
2175    
2176     $Element->{$HTML_NS}->{q} = {
2177 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2178 wakaba 1.40 %HTMLPhrasingContentChecker,
2179     check_attrs => $GetHTMLAttrsChecker->({
2180 wakaba 1.50 cite => $HTMLURIAttrChecker,
2181     }, {
2182 wakaba 1.49 %HTMLAttrStatus,
2183     %HTMLM12NCommonAttrStatus,
2184 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2185     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2186 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
2187     sdasuff => FEATURE_HTML2X_RFC,
2188 wakaba 1.1 }),
2189 wakaba 1.66 check_start => sub {
2190     my ($self, $item, $element_state) = @_;
2191    
2192     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2193     },
2194 wakaba 1.1 };
2195    
2196     $Element->{$HTML_NS}->{cite} = {
2197 wakaba 1.40 %HTMLPhrasingContentChecker,
2198 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2199     check_attrs => $GetHTMLAttrsChecker->({}, {
2200     %HTMLAttrStatus,
2201     %HTMLM12NCommonAttrStatus,
2202 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2203 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2204 wakaba 1.49 }),
2205 wakaba 1.1 };
2206    
2207     $Element->{$HTML_NS}->{em} = {
2208 wakaba 1.40 %HTMLPhrasingContentChecker,
2209 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2210     check_attrs => $GetHTMLAttrsChecker->({}, {
2211     %HTMLAttrStatus,
2212     %HTMLM12NCommonAttrStatus,
2213 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2214 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2215 wakaba 1.49 }),
2216 wakaba 1.1 };
2217    
2218     $Element->{$HTML_NS}->{strong} = {
2219 wakaba 1.40 %HTMLPhrasingContentChecker,
2220 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2221     check_attrs => $GetHTMLAttrsChecker->({}, {
2222     %HTMLAttrStatus,
2223     %HTMLM12NCommonAttrStatus,
2224 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2225 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2226 wakaba 1.49 }),
2227 wakaba 1.1 };
2228    
2229     $Element->{$HTML_NS}->{small} = {
2230 wakaba 1.40 %HTMLPhrasingContentChecker,
2231 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2232     check_attrs => $GetHTMLAttrsChecker->({}, {
2233     %HTMLAttrStatus,
2234     %HTMLM12NCommonAttrStatus,
2235 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2236 wakaba 1.49 }),
2237 wakaba 1.1 };
2238    
2239 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
2240     %HTMLPhrasingContentChecker,
2241     status => FEATURE_M12N10_REC,
2242     check_attrs => $GetHTMLAttrsChecker->({}, {
2243     %HTMLAttrStatus,
2244     %HTMLM12NCommonAttrStatus,
2245     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2246     }),
2247     };
2248    
2249 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
2250 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2251 wakaba 1.40 %HTMLPhrasingContentChecker,
2252 wakaba 1.1 };
2253    
2254     $Element->{$HTML_NS}->{dfn} = {
2255 wakaba 1.40 %HTMLPhrasingContentChecker,
2256 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2257     check_attrs => $GetHTMLAttrsChecker->({}, {
2258     %HTMLAttrStatus,
2259     %HTMLM12NCommonAttrStatus,
2260 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2261 wakaba 1.49 }),
2262 wakaba 1.40 check_start => sub {
2263     my ($self, $item, $element_state) = @_;
2264     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
2265 wakaba 1.1
2266 wakaba 1.40 my $node = $item->{node};
2267 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
2268     unless (defined $term) {
2269     for my $child (@{$node->child_nodes}) {
2270     if ($child->node_type == 1) { # ELEMENT_NODE
2271     if (defined $term) {
2272     undef $term;
2273     last;
2274     } elsif ($child->manakai_local_name eq 'abbr') {
2275     my $nsuri = $child->namespace_uri;
2276     if (defined $nsuri and $nsuri eq $HTML_NS) {
2277     my $attr = $child->get_attribute_node_ns (undef, 'title');
2278     if ($attr) {
2279     $term = $attr->value;
2280     }
2281     }
2282     }
2283     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2284     ## TEXT_NODE or CDATA_SECTION_NODE
2285     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2286     next;
2287     }
2288     undef $term;
2289     last;
2290     }
2291     }
2292     unless (defined $term) {
2293     $term = $node->text_content;
2294     }
2295     }
2296     if ($self->{term}->{$term}) {
2297     $self->{onerror}->(node => $node, type => 'duplicate term');
2298     push @{$self->{term}->{$term}}, $node;
2299     } else {
2300     $self->{term}->{$term} = [$node];
2301     }
2302     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2303     ## has |title|.
2304 wakaba 1.40 },
2305     check_end => sub {
2306     my ($self, $item, $element_state) = @_;
2307     $self->_remove_minus_elements ($element_state);
2308 wakaba 1.1
2309 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2310 wakaba 1.1 },
2311     };
2312    
2313     $Element->{$HTML_NS}->{abbr} = {
2314 wakaba 1.40 %HTMLPhrasingContentChecker,
2315 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2316     check_attrs => $GetHTMLAttrsChecker->({}, {
2317     %HTMLAttrStatus,
2318     %HTMLM12NCommonAttrStatus,
2319 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2320 wakaba 1.49 }),
2321     };
2322    
2323     $Element->{$HTML_NS}->{acronym} = {
2324     %HTMLPhrasingContentChecker,
2325     status => FEATURE_M12N10_REC,
2326     check_attrs => $GetHTMLAttrsChecker->({}, {
2327     %HTMLAttrStatus,
2328     %HTMLM12NCommonAttrStatus,
2329 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2330 wakaba 1.49 }),
2331 wakaba 1.1 };
2332    
2333     $Element->{$HTML_NS}->{time} = {
2334 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2335 wakaba 1.40 %HTMLPhrasingContentChecker,
2336     check_attrs => $GetHTMLAttrsChecker->({
2337 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
2338 wakaba 1.49 }, {
2339     %HTMLAttrStatus,
2340     %HTMLM12NCommonAttrStatus,
2341 wakaba 1.50 datetime => FEATURE_HTML5_DEFAULT,
2342 wakaba 1.1 }),
2343     ## TODO: Write tests
2344 wakaba 1.40 check_end => sub {
2345     my ($self, $item, $element_state) = @_;
2346 wakaba 1.1
2347 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
2348 wakaba 1.1 my $input;
2349     my $reg_sp;
2350     my $input_node;
2351     if ($attr) {
2352     $input = $attr->value;
2353     $reg_sp = qr/[\x09-\x0D\x20]*/;
2354     $input_node = $attr;
2355     } else {
2356 wakaba 1.40 $input = $item->{node}->text_content;
2357 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
2358 wakaba 1.40 $input_node = $item->{node};
2359 wakaba 1.1
2360     ## ISSUE: What is the definition for "successfully extracts a date
2361     ## or time"? If the algorithm says the string is invalid but
2362     ## return some date or time, is it "successfully"?
2363     }
2364    
2365     my $hour;
2366     my $minute;
2367     my $second;
2368     if ($input =~ /
2369     \A
2370     [\x09-\x0D\x20]*
2371     ([0-9]+) # 1
2372     (?>
2373     -([0-9]+) # 2
2374     -([0-9]+) # 3
2375     [\x09-\x0D\x20]*
2376     (?>
2377     T
2378     [\x09-\x0D\x20]*
2379     )?
2380     ([0-9]+) # 4
2381     :([0-9]+) # 5
2382     (?>
2383     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2384     )?
2385     [\x09-\x0D\x20]*
2386     (?>
2387     Z
2388     [\x09-\x0D\x20]*
2389     |
2390     [+-]([0-9]+):([0-9]+) # 7, 8
2391     [\x09-\x0D\x20]*
2392     )?
2393     \z
2394     |
2395     :([0-9]+) # 9
2396     (?>
2397     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2398     )?
2399     [\x09-\x0D\x20]*\z
2400     )
2401     /x) {
2402     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2403     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2404     length $4 != 2 or length $5 != 2) {
2405     $self->{onerror}->(node => $input_node,
2406     type => 'dateortime:syntax error');
2407     }
2408    
2409     if (1 <= $2 and $2 <= 12) {
2410     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2411     if $3 < 1 or
2412     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2413     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2414     if $2 == 2 and $3 == 29 and
2415     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2416     } else {
2417     $self->{onerror}->(node => $input_node,
2418     type => 'datetime:bad month');
2419     }
2420    
2421     ($hour, $minute, $second) = ($4, $5, $6);
2422    
2423     if (defined $7) { ## [+-]hh:mm
2424     if (length $7 != 2 or length $8 != 2) {
2425     $self->{onerror}->(node => $input_node,
2426     type => 'dateortime:syntax error');
2427     }
2428    
2429     $self->{onerror}->(node => $input_node,
2430     type => 'datetime:bad timezone hour')
2431     if $7 > 23;
2432     $self->{onerror}->(node => $input_node,
2433     type => 'datetime:bad timezone minute')
2434     if $8 > 59;
2435     }
2436     } else { ## hh:mm
2437     if (length $1 != 2 or length $9 != 2) {
2438     $self->{onerror}->(node => $input_node,
2439     type => qq'dateortime:syntax error');
2440     }
2441    
2442     ($hour, $minute, $second) = ($1, $9, $10);
2443     }
2444    
2445     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2446     if $hour > 23;
2447     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2448     if $minute > 59;
2449    
2450     if (defined $second) { ## s
2451     ## NOTE: Integer part of second don't have to have length of two.
2452    
2453     if (substr ($second, 0, 1) eq '.') {
2454     $self->{onerror}->(node => $input_node,
2455     type => 'dateortime:syntax error');
2456     }
2457    
2458     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2459     if $second >= 60;
2460     }
2461     } else {
2462     $self->{onerror}->(node => $input_node,
2463     type => 'dateortime:syntax error');
2464     }
2465    
2466 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2467 wakaba 1.1 },
2468     };
2469    
2470     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2471 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2472 wakaba 1.40 %HTMLPhrasingContentChecker,
2473     check_attrs => $GetHTMLAttrsChecker->({
2474 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2475     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2476     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2477     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2478     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2479     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2480 wakaba 1.50 }, {
2481     %HTMLAttrStatus,
2482     high => FEATURE_HTML5_DEFAULT,
2483     low => FEATURE_HTML5_DEFAULT,
2484     max => FEATURE_HTML5_DEFAULT,
2485     min => FEATURE_HTML5_DEFAULT,
2486     optimum => FEATURE_HTML5_DEFAULT,
2487     value => FEATURE_HTML5_DEFAULT,
2488 wakaba 1.1 }),
2489     };
2490    
2491     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2492 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2493 wakaba 1.40 %HTMLPhrasingContentChecker,
2494     check_attrs => $GetHTMLAttrsChecker->({
2495 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2496     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2497 wakaba 1.50 }, {
2498     %HTMLAttrStatus,
2499     max => FEATURE_HTML5_DEFAULT,
2500     value => FEATURE_HTML5_DEFAULT,
2501 wakaba 1.1 }),
2502     };
2503    
2504     $Element->{$HTML_NS}->{code} = {
2505 wakaba 1.40 %HTMLPhrasingContentChecker,
2506 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2507     check_attrs => $GetHTMLAttrsChecker->({}, {
2508     %HTMLAttrStatus,
2509     %HTMLM12NCommonAttrStatus,
2510 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2511 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2512 wakaba 1.49 }),
2513 wakaba 1.1 };
2514    
2515     $Element->{$HTML_NS}->{var} = {
2516 wakaba 1.40 %HTMLPhrasingContentChecker,
2517 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2518     check_attrs => $GetHTMLAttrsChecker->({}, {
2519     %HTMLAttrStatus,
2520     %HTMLM12NCommonAttrStatus,
2521 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2522 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2523 wakaba 1.49 }),
2524 wakaba 1.1 };
2525    
2526     $Element->{$HTML_NS}->{samp} = {
2527 wakaba 1.40 %HTMLPhrasingContentChecker,
2528 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2529     check_attrs => $GetHTMLAttrsChecker->({}, {
2530     %HTMLAttrStatus,
2531     %HTMLM12NCommonAttrStatus,
2532 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2533 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2534 wakaba 1.49 }),
2535 wakaba 1.1 };
2536    
2537     $Element->{$HTML_NS}->{kbd} = {
2538 wakaba 1.40 %HTMLPhrasingContentChecker,
2539 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2540     check_attrs => $GetHTMLAttrsChecker->({}, {
2541     %HTMLAttrStatus,
2542     %HTMLM12NCommonAttrStatus,
2543 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2544 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2545 wakaba 1.49 }),
2546 wakaba 1.1 };
2547    
2548     $Element->{$HTML_NS}->{sub} = {
2549 wakaba 1.40 %HTMLPhrasingContentChecker,
2550 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2551     check_attrs => $GetHTMLAttrsChecker->({}, {
2552     %HTMLAttrStatus,
2553     %HTMLM12NCommonAttrStatus,
2554 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2555 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
2556 wakaba 1.49 }),
2557 wakaba 1.1 };
2558    
2559 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
2560 wakaba 1.1
2561     $Element->{$HTML_NS}->{span} = {
2562 wakaba 1.40 %HTMLPhrasingContentChecker,
2563 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2564     check_attrs => $GetHTMLAttrsChecker->({}, {
2565     %HTMLAttrStatus,
2566     %HTMLM12NCommonAttrStatus,
2567     datafld => FEATURE_HTML4_REC_RESERVED,
2568     dataformatas => FEATURE_HTML4_REC_RESERVED,
2569     datasrc => FEATURE_HTML4_REC_RESERVED,
2570 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2571 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
2572 wakaba 1.49 }),
2573 wakaba 1.1 };
2574    
2575     $Element->{$HTML_NS}->{i} = {
2576 wakaba 1.40 %HTMLPhrasingContentChecker,
2577 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2578     check_attrs => $GetHTMLAttrsChecker->({}, {
2579     %HTMLAttrStatus,
2580     %HTMLM12NCommonAttrStatus,
2581 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2582 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2583 wakaba 1.49 }),
2584 wakaba 1.1 };
2585    
2586 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
2587    
2588 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
2589     %HTMLPhrasingContentChecker,
2590     status => FEATURE_M12N10_REC,
2591     check_attrs => $GetHTMLAttrsChecker->({}, {
2592     %HTMLAttrStatus,
2593     %HTMLM12NCommonAttrStatus,
2594     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2595     sdaform => FEATURE_HTML20_RFC,
2596     }),
2597     };
2598 wakaba 1.51
2599     $Element->{$HTML_NS}->{s} = {
2600 wakaba 1.40 %HTMLPhrasingContentChecker,
2601 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
2602 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
2603     %HTMLAttrStatus,
2604     %HTMLM12NCommonAttrStatus,
2605 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2606 wakaba 1.49 }),
2607 wakaba 1.1 };
2608    
2609 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
2610    
2611     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
2612    
2613 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
2614 wakaba 1.40 %HTMLPhrasingContentChecker,
2615 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2616 wakaba 1.40 check_attrs => sub {
2617     my ($self, $item, $element_state) = @_;
2618 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
2619     %HTMLAttrStatus,
2620 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2621     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2622     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2623 wakaba 1.49 style => FEATURE_XHTML10_REC,
2624 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2625     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2626 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
2627     sdasuff => FEATURE_HTML2X_RFC,
2628 wakaba 1.49 })->($self, $item, $element_state);
2629 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
2630     $self->{onerror}->(node => $item->{node},
2631     type => 'attribute missing:dir');
2632 wakaba 1.1 }
2633     },
2634     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2635     };
2636    
2637 wakaba 1.29 =pod
2638    
2639     ## TODO:
2640    
2641     +
2642     + <p>Partly because of the confusion described above, authors are
2643     + strongly recommended to always mark up all paragraphs with the
2644     + <code>p</code> element, and to not have any <code>ins</code> or
2645     + <code>del</code> elements that cross across any <span
2646     + title="paragraph">implied paragraphs</span>.</p>
2647     +
2648     (An informative note)
2649    
2650     <p><code>ins</code> elements should not cross <span
2651     + title="paragraph">implied paragraph</span> boundaries.</p>
2652     (normative)
2653    
2654     + <p><code>del</code> elements should not cross <span
2655     + title="paragraph">implied paragraph</span> boundaries.</p>
2656     (normative)
2657    
2658     =cut
2659    
2660 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2661 wakaba 1.40 %HTMLTransparentChecker,
2662 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2663 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2664 wakaba 1.1 cite => $HTMLURIAttrChecker,
2665     datetime => $HTMLDatetimeAttrChecker,
2666 wakaba 1.49 }, {
2667     %HTMLAttrStatus,
2668     %HTMLM12NCommonAttrStatus,
2669 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2670     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2671     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2672 wakaba 1.1 }),
2673 wakaba 1.66 check_start => sub {
2674     my ($self, $item, $element_state) = @_;
2675    
2676     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2677     },
2678 wakaba 1.1 };
2679    
2680     $Element->{$HTML_NS}->{del} = {
2681 wakaba 1.40 %HTMLTransparentChecker,
2682 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2683 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2684 wakaba 1.1 cite => $HTMLURIAttrChecker,
2685     datetime => $HTMLDatetimeAttrChecker,
2686 wakaba 1.49 }, {
2687     %HTMLAttrStatus,
2688     %HTMLM12NCommonAttrStatus,
2689 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2690     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2691     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2692 wakaba 1.1 }),
2693 wakaba 1.40 check_end => sub {
2694     my ($self, $item, $element_state) = @_;
2695     if ($element_state->{has_significant}) {
2696     ## NOTE: Significantness flag does not propagate.
2697     } elsif ($item->{transparent}) {
2698     #
2699     } else {
2700     $self->{onerror}->(node => $item->{node},
2701     level => $self->{should_level},
2702     type => 'no significant content');
2703     }
2704 wakaba 1.1 },
2705 wakaba 1.66 check_start => sub {
2706     my ($self, $item, $element_state) = @_;
2707    
2708     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2709     },
2710 wakaba 1.1 };
2711    
2712 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
2713 wakaba 1.40 %HTMLProseContentChecker,
2714 wakaba 1.48 status => FEATURE_HTML5_FD,
2715 wakaba 1.53 ## NOTE: legend, Prose | Prose, legend?
2716 wakaba 1.41 check_child_element => sub {
2717     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2718     $child_is_transparent, $element_state) = @_;
2719     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2720     $self->{onerror}->(node => $child_el,
2721     type => 'element not allowed:minus',
2722     level => $self->{must_level});
2723     $element_state->{has_non_legend} = 1;
2724     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2725     #
2726     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
2727     if ($element_state->{has_legend_at_first}) {
2728     $self->{onerror}->(node => $child_el,
2729     type => 'element not allowed:figure legend',
2730     level => $self->{must_level});
2731     } elsif ($element_state->{has_legend}) {
2732     $self->{onerror}->(node => $element_state->{has_legend},
2733     type => 'element not allowed:figure legend',
2734     level => $self->{must_level});
2735     $element_state->{has_legend} = $child_el;
2736     } elsif ($element_state->{has_non_legend}) {
2737     $element_state->{has_legend} = $child_el;
2738     } else {
2739     $element_state->{has_legend_at_first} = 1;
2740 wakaba 1.35 }
2741 wakaba 1.41 delete $element_state->{has_non_legend};
2742     } else {
2743     $HTMLProseContentChecker{check_child_element}->(@_);
2744 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
2745 wakaba 1.41 }
2746     },
2747     check_child_text => sub {
2748     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2749     if ($has_significant) {
2750     $element_state->{has_non_legend} = 1;
2751 wakaba 1.35 }
2752 wakaba 1.41 },
2753     check_end => sub {
2754     my ($self, $item, $element_state) = @_;
2755 wakaba 1.35
2756 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2757     #
2758     } elsif ($element_state->{has_legend}) {
2759     if ($element_state->{has_non_legend}) {
2760     $self->{onerror}->(node => $element_state->{has_legend},
2761 wakaba 1.35 type => 'element not allowed:figure legend',
2762     level => $self->{must_level});
2763     }
2764     }
2765 wakaba 1.41
2766     $HTMLProseContentChecker{check_end}->(@_);
2767     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2768 wakaba 1.35 },
2769     };
2770 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2771 wakaba 1.1
2772     $Element->{$HTML_NS}->{img} = {
2773 wakaba 1.40 %HTMLEmptyChecker,
2774 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2775 wakaba 1.40 check_attrs => sub {
2776     my ($self, $item, $element_state) = @_;
2777 wakaba 1.1 $GetHTMLAttrsChecker->({
2778     alt => sub { }, ## NOTE: No syntactical requirement
2779     src => $HTMLURIAttrChecker,
2780     usemap => $HTMLUsemapAttrChecker,
2781     ismap => sub {
2782 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2783     if (not $self->{flag}->{in_a_href}) {
2784 wakaba 1.15 $self->{onerror}->(node => $attr,
2785 wakaba 1.59 type => 'attribute not allowed:ismap',
2786     level => $self->{must_level});
2787 wakaba 1.1 }
2788 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2789 wakaba 1.1 },
2790     ## TODO: height
2791     ## TODO: width
2792 wakaba 1.49 }, {
2793     %HTMLAttrStatus,
2794     %HTMLM12NCommonAttrStatus,
2795     align => FEATURE_M12N10_REC_DEPRECATED,
2796 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2797 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
2798 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2799 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
2800 wakaba 1.50 ismap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2801     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2802 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2803     name => FEATURE_M12N10_REC_DEPRECATED,
2804 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2805 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2806     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2807 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
2808 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2809 wakaba 1.66 })->($self, $item, $element_state);
2810 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2811     $self->{onerror}->(node => $item->{node},
2812 wakaba 1.37 type => 'attribute missing:alt',
2813     level => $self->{should_level});
2814 wakaba 1.1 }
2815 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2816     $self->{onerror}->(node => $item->{node},
2817     type => 'attribute missing:src');
2818 wakaba 1.1 }
2819 wakaba 1.66
2820     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
2821     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
2822     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
2823     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
2824 wakaba 1.1 },
2825     };
2826    
2827     $Element->{$HTML_NS}->{iframe} = {
2828 wakaba 1.40 %HTMLTextChecker,
2829 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2830     ## NOTE: Not part of M12N10 Strict
2831 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2832 wakaba 1.1 src => $HTMLURIAttrChecker,
2833 wakaba 1.49 }, {
2834     %HTMLAttrStatus,
2835     %HTMLM12NCommonAttrStatus,
2836     align => FEATURE_XHTML10_REC,
2837 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2838 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
2839     height => FEATURE_M12N10_REC,
2840 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2841 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2842     marginheight => FEATURE_M12N10_REC,
2843     marginwidth => FEATURE_M12N10_REC,
2844     name => FEATURE_M12N10_REC_DEPRECATED,
2845     scrolling => FEATURE_M12N10_REC,
2846 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2847     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2848 wakaba 1.49 width => FEATURE_M12N10_REC,
2849 wakaba 1.1 }),
2850 wakaba 1.66 check_start => sub {
2851     my ($self, $item, $element_state) = @_;
2852    
2853     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
2854     },
2855 wakaba 1.40 };
2856    
2857 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2858 wakaba 1.40 %HTMLEmptyChecker,
2859 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2860 wakaba 1.40 check_attrs => sub {
2861     my ($self, $item, $element_state) = @_;
2862 wakaba 1.1 my $has_src;
2863 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2864 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2865     $attr_ns = '' unless defined $attr_ns;
2866     my $attr_ln = $attr->manakai_local_name;
2867     my $checker;
2868     if ($attr_ns eq '') {
2869     if ($attr_ln eq 'src') {
2870     $checker = $HTMLURIAttrChecker;
2871     $has_src = 1;
2872     } elsif ($attr_ln eq 'type') {
2873     $checker = $HTMLIMTAttrChecker;
2874     } else {
2875     ## TODO: height
2876     ## TODO: width
2877     $checker = $HTMLAttrChecker->{$attr_ln}
2878     || sub { }; ## NOTE: Any local attribute is ok.
2879     }
2880     }
2881     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2882     || $AttrChecker->{$attr_ns}->{''};
2883 wakaba 1.62
2884     my $status = {
2885     %HTMLAttrStatus,
2886     height => FEATURE_HTML5_DEFAULT,
2887     src => FEATURE_HTML5_DEFAULT,
2888     type => FEATURE_HTML5_DEFAULT,
2889     width => FEATURE_HTML5_DEFAULT,
2890     }->{$attr_ln};
2891    
2892 wakaba 1.1 if ($checker) {
2893 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
2894 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2895 wakaba 1.54 #
2896 wakaba 1.1 } else {
2897     $self->{onerror}->(node => $attr, level => 'unsupported',
2898     type => 'attribute');
2899 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
2900     }
2901    
2902     if ($attr_ns eq '') {
2903     $self->_attr_status_info ($attr, $status) if $status;
2904 wakaba 1.1 }
2905     }
2906    
2907     unless ($has_src) {
2908 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2909 wakaba 1.1 type => 'attribute missing:src');
2910     }
2911 wakaba 1.66
2912     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
2913 wakaba 1.1 },
2914     };
2915    
2916 wakaba 1.49 ## TODO:
2917     ## {applet} FEATURE_M12N10_REC_DEPRECATED
2918     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
2919    
2920 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
2921 wakaba 1.40 %HTMLTransparentChecker,
2922 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2923 wakaba 1.40 check_attrs => sub {
2924     my ($self, $item, $element_state) = @_;
2925 wakaba 1.1 $GetHTMLAttrsChecker->({
2926     data => $HTMLURIAttrChecker,
2927     type => $HTMLIMTAttrChecker,
2928     usemap => $HTMLUsemapAttrChecker,
2929     ## TODO: width
2930     ## TODO: height
2931 wakaba 1.49 }, {
2932     %HTMLAttrStatus,
2933     %HTMLM12NCommonAttrStatus,
2934     align => FEATURE_XHTML10_REC,
2935     archive => FEATURE_M12N10_REC,
2936     border => FEATURE_XHTML10_REC,
2937     classid => FEATURE_M12N10_REC,
2938     codebase => FEATURE_M12N10_REC,
2939     codetype => FEATURE_M12N10_REC,
2940 wakaba 1.50 data => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2941 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
2942     dataformatas => FEATURE_HTML4_REC_RESERVED,
2943     datasrc => FEATURE_HTML4_REC_RESERVED,
2944     declare => FEATURE_M12N10_REC,
2945 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2946 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
2947 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2948 wakaba 1.49 name => FEATURE_M12N10_REC,
2949     standby => FEATURE_M12N10_REC,
2950 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2951     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2952     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2953 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
2954 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2955 wakaba 1.66 })->($self, $item, $element_state);
2956 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
2957     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
2958     $self->{onerror}->(node => $item->{node},
2959 wakaba 1.1 type => 'attribute missing:data|type');
2960     }
2961     }
2962 wakaba 1.66
2963     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
2964     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
2965     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
2966     ## TODO: archive
2967     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
2968 wakaba 1.1 },
2969 wakaba 1.41 ## NOTE: param*, transparent (Prose)
2970     check_child_element => sub {
2971     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2972     $child_is_transparent, $element_state) = @_;
2973     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2974     $self->{onerror}->(node => $child_el,
2975     type => 'element not allowed:minus',
2976     level => $self->{must_level});
2977     $element_state->{has_non_legend} = 1;
2978     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2979     #
2980     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
2981     if ($element_state->{has_non_param}) {
2982     $self->{onerror}->(node => $child_el,
2983     type => 'element not allowed:prose',
2984     level => $self->{must_level});
2985 wakaba 1.39 }
2986 wakaba 1.41 } else {
2987     $HTMLProseContentChecker{check_child_element}->(@_);
2988     $element_state->{has_non_param} = 1;
2989 wakaba 1.39 }
2990 wakaba 1.25 },
2991 wakaba 1.41 check_child_text => sub {
2992     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2993     if ($has_significant) {
2994     $element_state->{has_non_param} = 1;
2995     }
2996 wakaba 1.42 },
2997     check_end => sub {
2998     my ($self, $item, $element_state) = @_;
2999     if ($element_state->{has_significant}) {
3000 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
3001 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
3002     ## NOTE: Transparent.
3003     } else {
3004     $self->{onerror}->(node => $item->{node},
3005     level => $self->{should_level},
3006     type => 'no significant content');
3007     }
3008     },
3009 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
3010 wakaba 1.1 };
3011 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
3012     ## What about |<section><object data><style scoped></style>x</object></section>|?
3013     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
3014 wakaba 1.1
3015     $Element->{$HTML_NS}->{param} = {
3016 wakaba 1.40 %HTMLEmptyChecker,
3017 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3018 wakaba 1.40 check_attrs => sub {
3019     my ($self, $item, $element_state) = @_;
3020 wakaba 1.1 $GetHTMLAttrsChecker->({
3021     name => sub { },
3022     value => sub { },
3023 wakaba 1.49 }, {
3024     %HTMLAttrStatus,
3025 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3026     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3027 wakaba 1.49 type => FEATURE_M12N10_REC,
3028 wakaba 1.50 value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3029 wakaba 1.49 valuetype => FEATURE_M12N10_REC,
3030 wakaba 1.66 })->(@_);
3031 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
3032     $self->{onerror}->(node => $item->{node},
3033 wakaba 1.1 type => 'attribute missing:name');
3034     }
3035 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
3036     $self->{onerror}->(node => $item->{node},
3037 wakaba 1.1 type => 'attribute missing:value');
3038     }
3039 wakaba 1.66
3040     $element_state->{uri_info}->{value}->{type}->{resource} = 1;
3041 wakaba 1.1 },
3042     };
3043    
3044     $Element->{$HTML_NS}->{video} = {
3045 wakaba 1.40 %HTMLTransparentChecker,
3046 wakaba 1.48 status => FEATURE_HTML5_LC,
3047 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3048 wakaba 1.1 src => $HTMLURIAttrChecker,
3049     ## TODO: start, loopstart, loopend, end
3050     ## ISSUE: they MUST be "value time offset"s. Value?
3051 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
3052 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
3053     controls => $GetHTMLBooleanAttrChecker->('controls'),
3054 wakaba 1.59 poster => $HTMLURIAttrChecker,
3055 wakaba 1.42 ## TODO: width, height
3056 wakaba 1.50 }, {
3057     %HTMLAttrStatus,
3058     autoplay => FEATURE_HTML5_LC,
3059     controls => FEATURE_HTML5_LC,
3060     end => FEATURE_HTML5_LC,
3061     height => FEATURE_HTML5_LC,
3062     loopend => FEATURE_HTML5_LC,
3063     loopstart => FEATURE_HTML5_LC,
3064     playcount => FEATURE_HTML5_LC,
3065     poster => FEATURE_HTML5_LC,
3066     src => FEATURE_HTML5_LC,
3067     start => FEATURE_HTML5_LC,
3068     width => FEATURE_HTML5_LC,
3069 wakaba 1.1 }),
3070 wakaba 1.42 check_start => sub {
3071     my ($self, $item, $element_state) = @_;
3072     $element_state->{allow_source}
3073     = not $item->{node}->has_attribute_ns (undef, 'src');
3074     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
3075     ## NOTE: It might be set true by |check_element|.
3076 wakaba 1.66
3077     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
3078     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
3079 wakaba 1.42 },
3080     check_child_element => sub {
3081     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3082     $child_is_transparent, $element_state) = @_;
3083     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3084     $self->{onerror}->(node => $child_el,
3085     type => 'element not allowed:minus',
3086     level => $self->{must_level});
3087     delete $element_state->{allow_source};
3088     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3089     #
3090     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
3091 wakaba 1.45 unless ($element_state->{allow_source}) {
3092 wakaba 1.42 $self->{onerror}->(node => $child_el,
3093     type => 'element not allowed:prose',
3094     level => $self->{must_level});
3095     }
3096 wakaba 1.45 $element_state->{has_source} = 1;
3097 wakaba 1.1 } else {
3098 wakaba 1.42 delete $element_state->{allow_source};
3099     $HTMLProseContentChecker{check_child_element}->(@_);
3100     }
3101     },
3102     check_child_text => sub {
3103     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3104     if ($has_significant) {
3105     delete $element_state->{allow_source};
3106     }
3107     $HTMLProseContentChecker{check_child_text}->(@_);
3108     },
3109     check_end => sub {
3110     my ($self, $item, $element_state) = @_;
3111     if ($element_state->{has_source} == -1) {
3112     $self->{onerror}->(node => $item->{node},
3113     type => 'element missing:source',
3114     level => $self->{must_level});
3115 wakaba 1.1 }
3116 wakaba 1.42
3117     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
3118 wakaba 1.1 },
3119     };
3120    
3121     $Element->{$HTML_NS}->{audio} = {
3122 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
3123 wakaba 1.48 status => FEATURE_HTML5_LC,
3124 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
3125     src => $HTMLURIAttrChecker,
3126     ## TODO: start, loopstart, loopend, end
3127     ## ISSUE: they MUST be "value time offset"s. Value?
3128     ## ISSUE: playcount has no conformance creteria
3129     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
3130     controls => $GetHTMLBooleanAttrChecker->('controls'),
3131 wakaba 1.50 }, {
3132     %HTMLAttrStatus,
3133     autoplay => FEATURE_HTML5_LC,
3134     controls => FEATURE_HTML5_LC,
3135     end => FEATURE_HTML5_LC,
3136     loopend => FEATURE_HTML5_LC,
3137     loopstart => FEATURE_HTML5_LC,
3138     playcount => FEATURE_HTML5_LC,
3139     src => FEATURE_HTML5_LC,
3140     start => FEATURE_HTML5_LC,
3141 wakaba 1.42 }),
3142 wakaba 1.1 };
3143    
3144     $Element->{$HTML_NS}->{source} = {
3145 wakaba 1.40 %HTMLEmptyChecker,
3146 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3147 wakaba 1.40 check_attrs => sub {
3148     my ($self, $item, $element_state) = @_;
3149 wakaba 1.1 $GetHTMLAttrsChecker->({
3150     src => $HTMLURIAttrChecker,
3151     type => $HTMLIMTAttrChecker,
3152     media => $HTMLMQAttrChecker,
3153 wakaba 1.50 }, {
3154     %HTMLAttrStatus,
3155     media => FEATURE_HTML5_DEFAULT,
3156     src => FEATURE_HTML5_DEFAULT,
3157     type => FEATURE_HTML5_DEFAULT,
3158 wakaba 1.66 })->(@_);
3159 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
3160     $self->{onerror}->(node => $item->{node},
3161 wakaba 1.1 type => 'attribute missing:src');
3162     }
3163 wakaba 1.66
3164     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
3165 wakaba 1.1 },
3166     };
3167    
3168     $Element->{$HTML_NS}->{canvas} = {
3169 wakaba 1.40 %HTMLTransparentChecker,
3170 wakaba 1.48 status => FEATURE_HTML5_LC,
3171 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3172 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3173     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3174 wakaba 1.50 }, {
3175     %HTMLAttrStatus,
3176     height => FEATURE_HTML5_LC,
3177     width => FEATURE_HTML5_LC,
3178 wakaba 1.1 }),
3179     };
3180    
3181     $Element->{$HTML_NS}->{map} = {
3182 wakaba 1.40 %HTMLProseContentChecker,
3183 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3184 wakaba 1.40 check_attrs => sub {
3185     my ($self, $item, $element_state) = @_;
3186 wakaba 1.4 my $has_id;
3187     $GetHTMLAttrsChecker->({
3188     id => sub {
3189     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
3190     my ($self, $attr) = @_;
3191     my $value = $attr->value;
3192     if (length $value > 0) {
3193     if ($self->{id}->{$value}) {
3194     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3195     push @{$self->{id}->{$value}}, $attr;
3196     } else {
3197     $self->{id}->{$value} = [$attr];
3198     }
3199 wakaba 1.1 } else {
3200 wakaba 1.4 ## NOTE: MUST contain at least one character
3201     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3202 wakaba 1.1 }
3203 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
3204     $self->{onerror}->(node => $attr, type => 'space in ID');
3205     }
3206     $self->{map}->{$value} ||= $attr;
3207     $has_id = 1;
3208     },
3209 wakaba 1.49 }, {
3210     %HTMLAttrStatus,
3211 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3212     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3213     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3214     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3215 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
3216 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3217     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3218     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3219     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3220     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3221     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3222     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3223     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3224     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3225     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3226     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3227 wakaba 1.66 })->(@_);
3228 wakaba 1.40 $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
3229 wakaba 1.4 unless $has_id;
3230     },
3231 wakaba 1.59 check_start => sub {
3232     my ($self, $item, $element_state) = @_;
3233     $element_state->{in_map_original} = $self->{flag}->{in_map};
3234     $self->{flag}->{in_map} = 1;
3235     },
3236     check_end => sub {
3237     my ($self, $item, $element_state) = @_;
3238     delete $self->{flag}->{in_map} unless $element_state->{in_map_original};
3239     $HTMLProseContentChecker{check_end}->(@_);
3240     },
3241 wakaba 1.1 };
3242    
3243     $Element->{$HTML_NS}->{area} = {
3244 wakaba 1.40 %HTMLEmptyChecker,
3245 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3246 wakaba 1.40 check_attrs => sub {
3247     my ($self, $item, $element_state) = @_;
3248 wakaba 1.1 my %attr;
3249     my $coords;
3250 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3251 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3252     $attr_ns = '' unless defined $attr_ns;
3253     my $attr_ln = $attr->manakai_local_name;
3254     my $checker;
3255     if ($attr_ns eq '') {
3256     $checker = {
3257 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
3258 wakaba 1.1 alt => sub { },
3259     ## NOTE: |alt| value has no conformance creteria.
3260     shape => $GetHTMLEnumeratedAttrChecker->({
3261     circ => -1, circle => 1,
3262     default => 1,
3263     poly => 1, polygon => -1,
3264     rect => 1, rectangle => -1,
3265     }),
3266     coords => sub {
3267     my ($self, $attr) = @_;
3268     my $value = $attr->value;
3269     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
3270     $coords = [split /,/, $value];
3271     } else {
3272     $self->{onerror}->(node => $attr,
3273     type => 'coords:syntax error');
3274     }
3275     },
3276     target => $HTMLTargetAttrChecker,
3277     href => $HTMLURIAttrChecker,
3278     ping => $HTMLSpaceURIsAttrChecker,
3279 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3280 wakaba 1.1 media => $HTMLMQAttrChecker,
3281     hreflang => $HTMLLanguageTagAttrChecker,
3282     type => $HTMLIMTAttrChecker,
3283     }->{$attr_ln};
3284     if ($checker) {
3285     $attr{$attr_ln} = $attr;
3286     } else {
3287     $checker = $HTMLAttrChecker->{$attr_ln};
3288     }
3289     }
3290     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3291     || $AttrChecker->{$attr_ns}->{''};
3292 wakaba 1.62
3293     my $status = {
3294     %HTMLAttrStatus,
3295     %HTMLM12NCommonAttrStatus,
3296     accesskey => FEATURE_M12N10_REC,
3297     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3298     coords => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3299     href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3300     hreflang => FEATURE_HTML5_DEFAULT,
3301     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3302     media => FEATURE_HTML5_DEFAULT,
3303     nohref => FEATURE_M12N10_REC,
3304     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3305     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3306     ping => FEATURE_HTML5_DEFAULT,
3307     rel => FEATURE_HTML5_DEFAULT,
3308     shape => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3309     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3310     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3311     type => FEATURE_HTML5_DEFAULT,
3312     }->{$attr_ln};
3313    
3314 wakaba 1.1 if ($checker) {
3315 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3316 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3317 wakaba 1.54 #
3318 wakaba 1.1 } else {
3319     $self->{onerror}->(node => $attr, level => 'unsupported',
3320     type => 'attribute');
3321     ## ISSUE: No comformance createria for unknown attributes in the spec
3322     }
3323 wakaba 1.49
3324     if ($attr_ns eq '') {
3325 wakaba 1.62 $self->_attr_status_info ($attr, $status);
3326 wakaba 1.49 }
3327 wakaba 1.1 }
3328    
3329     if (defined $attr{href}) {
3330 wakaba 1.4 $self->{has_hyperlink_element} = 1;
3331 wakaba 1.1 unless (defined $attr{alt}) {
3332 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3333 wakaba 1.1 type => 'attribute missing:alt');
3334     }
3335     } else {
3336     for (qw/target ping rel media hreflang type alt/) {
3337     if (defined $attr{$_}) {
3338     $self->{onerror}->(node => $attr{$_},
3339     type => 'attribute not allowed');
3340     }
3341     }
3342     }
3343    
3344     my $shape = 'rectangle';
3345     if (defined $attr{shape}) {
3346     $shape = {
3347     circ => 'circle', circle => 'circle',
3348     default => 'default',
3349     poly => 'polygon', polygon => 'polygon',
3350     rect => 'rectangle', rectangle => 'rectangle',
3351     }->{lc $attr{shape}->value} || 'rectangle';
3352     ## TODO: ASCII lowercase?
3353     }
3354    
3355     if ($shape eq 'circle') {
3356     if (defined $attr{coords}) {
3357     if (defined $coords) {
3358     if (@$coords == 3) {
3359     if ($coords->[2] < 0) {
3360     $self->{onerror}->(node => $attr{coords},
3361     type => 'coords:out of range:2');
3362     }
3363     } else {
3364     $self->{onerror}->(node => $attr{coords},
3365     type => 'coords:number:3:'.@$coords);
3366     }
3367     } else {
3368     ## NOTE: A syntax error has been reported.
3369     }
3370     } else {
3371 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3372 wakaba 1.1 type => 'attribute missing:coords');
3373     }
3374     } elsif ($shape eq 'default') {
3375     if (defined $attr{coords}) {
3376     $self->{onerror}->(node => $attr{coords},
3377     type => 'attribute not allowed');
3378     }
3379     } elsif ($shape eq 'polygon') {
3380     if (defined $attr{coords}) {
3381     if (defined $coords) {
3382     if (@$coords >= 6) {
3383     unless (@$coords % 2 == 0) {
3384     $self->{onerror}->(node => $attr{coords},
3385     type => 'coords:number:even:'.@$coords);
3386     }
3387     } else {
3388     $self->{onerror}->(node => $attr{coords},
3389     type => 'coords:number:>=6:'.@$coords);
3390     }
3391     } else {
3392     ## NOTE: A syntax error has been reported.
3393     }
3394     } else {
3395 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3396 wakaba 1.1 type => 'attribute missing:coords');
3397     }
3398     } elsif ($shape eq 'rectangle') {
3399     if (defined $attr{coords}) {
3400     if (defined $coords) {
3401     if (@$coords == 4) {
3402     unless ($coords->[0] < $coords->[2]) {
3403     $self->{onerror}->(node => $attr{coords},
3404     type => 'coords:out of range:0');
3405     }
3406     unless ($coords->[1] < $coords->[3]) {
3407     $self->{onerror}->(node => $attr{coords},
3408     type => 'coords:out of range:1');
3409     }
3410     } else {
3411     $self->{onerror}->(node => $attr{coords},
3412     type => 'coords:number:4:'.@$coords);
3413     }
3414     } else {
3415     ## NOTE: A syntax error has been reported.
3416     }
3417     } else {
3418 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3419 wakaba 1.1 type => 'attribute missing:coords');
3420     }
3421     }
3422 wakaba 1.66
3423     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3424 wakaba 1.1 },
3425 wakaba 1.59 check_start => sub {
3426     my ($self, $item, $element_state) = @_;
3427     unless ($self->{flag}->{in_map} or
3428     not $item->{node}->manakai_parent_element) {
3429     $self->{onerror}->(node => $item->{node},
3430     type => 'element not allowed:area',
3431     level => $self->{must_level});
3432     }
3433     },
3434 wakaba 1.1 };
3435    
3436     $Element->{$HTML_NS}->{table} = {
3437 wakaba 1.40 %HTMLChecker,
3438 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3439     check_attrs => $GetHTMLAttrsChecker->({}, {
3440     %HTMLAttrStatus,
3441     %HTMLM12NCommonAttrStatus,
3442     align => FEATURE_M12N10_REC_DEPRECATED,
3443     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3444     border => FEATURE_M12N10_REC,
3445     cellpadding => FEATURE_M12N10_REC,
3446     cellspacing => FEATURE_M12N10_REC,
3447 wakaba 1.61 cols => FEATURE_RFC1942,
3448 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3449     dataformatas => FEATURE_HTML4_REC_RESERVED,
3450     datapagesize => FEATURE_M12N10_REC,
3451     datasrc => FEATURE_HTML4_REC_RESERVED,
3452     frame => FEATURE_M12N10_REC,
3453 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3454 wakaba 1.49 rules => FEATURE_M12N10_REC,
3455     summary => FEATURE_M12N10_REC,
3456     width => FEATURE_M12N10_REC,
3457     }),
3458 wakaba 1.40 check_start => sub {
3459     my ($self, $item, $element_state) = @_;
3460     $element_state->{phase} = 'before caption';
3461 wakaba 1.66
3462     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
3463 wakaba 1.40 },
3464     check_child_element => sub {
3465     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3466     $child_is_transparent, $element_state) = @_;
3467     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3468     $self->{onerror}->(node => $child_el,
3469     type => 'element not allowed:minus',
3470     level => $self->{must_level});
3471     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3472     #
3473     } elsif ($element_state->{phase} eq 'in tbodys') {
3474     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3475     #$element_state->{phase} = 'in tbodys';
3476     } elsif (not $element_state->{has_tfoot} and
3477     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3478     $element_state->{phase} = 'after tfoot';
3479     $element_state->{has_tfoot} = 1;
3480     } else {
3481     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3482     }
3483     } elsif ($element_state->{phase} eq 'in trs') {
3484     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3485     #$element_state->{phase} = 'in trs';
3486     } elsif (not $element_state->{has_tfoot} and
3487     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3488     $element_state->{phase} = 'after tfoot';
3489     $element_state->{has_tfoot} = 1;
3490     } else {
3491     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3492     }
3493     } elsif ($element_state->{phase} eq 'after thead') {
3494     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3495     $element_state->{phase} = 'in tbodys';
3496     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3497     $element_state->{phase} = 'in trs';
3498     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3499     $element_state->{phase} = 'in tbodys';
3500     $element_state->{has_tfoot} = 1;
3501     } else {
3502     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3503     }
3504     } elsif ($element_state->{phase} eq 'in colgroup') {
3505     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3506     $element_state->{phase} = 'in colgroup';
3507     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3508     $element_state->{phase} = 'after thead';
3509     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3510     $element_state->{phase} = 'in tbodys';
3511     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3512     $element_state->{phase} = 'in trs';
3513     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3514     $element_state->{phase} = 'in tbodys';
3515     $element_state->{has_tfoot} = 1;
3516     } else {
3517     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3518     }
3519     } elsif ($element_state->{phase} eq 'before caption') {
3520     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
3521     $element_state->{phase} = 'in colgroup';
3522     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3523     $element_state->{phase} = 'in colgroup';
3524     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3525     $element_state->{phase} = 'after thead';
3526     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3527     $element_state->{phase} = 'in tbodys';
3528     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3529     $element_state->{phase} = 'in trs';
3530     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3531     $element_state->{phase} = 'in tbodys';
3532     $element_state->{has_tfoot} = 1;
3533     } else {
3534     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3535     }
3536     } elsif ($element_state->{phase} eq 'after tfoot') {
3537     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3538     } else {
3539     die "check_child_element: Bad |table| phase: $element_state->{phase}";
3540     }
3541     },
3542     check_child_text => sub {
3543     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3544     if ($has_significant) {
3545     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3546 wakaba 1.1 }
3547 wakaba 1.40 },
3548     check_end => sub {
3549     my ($self, $item, $element_state) = @_;
3550 wakaba 1.1
3551     ## Table model errors
3552     require Whatpm::HTMLTable;
3553 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
3554 wakaba 1.1 my %opt = @_;
3555     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
3556     });
3557 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
3558 wakaba 1.1
3559 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3560 wakaba 1.1 },
3561     };
3562    
3563     $Element->{$HTML_NS}->{caption} = {
3564 wakaba 1.40 %HTMLPhrasingContentChecker,
3565 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3566     check_attrs => $GetHTMLAttrsChecker->({}, {
3567     %HTMLAttrStatus,
3568     %HTMLM12NCommonAttrStatus,
3569     align => FEATURE_M12N10_REC_DEPRECATED,
3570 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3571 wakaba 1.49 }),
3572 wakaba 1.1 };
3573    
3574     $Element->{$HTML_NS}->{colgroup} = {
3575 wakaba 1.40 %HTMLEmptyChecker,
3576 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3577 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3578 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3579     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
3580     ## TODO: "attribute not supported" if |col|.
3581     ## ISSUE: MUST NOT if any |col|?
3582     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
3583 wakaba 1.49 }, {
3584     %HTMLAttrStatus,
3585     %HTMLM12NCommonAttrStatus,
3586     align => FEATURE_M12N10_REC,
3587     char => FEATURE_M12N10_REC,
3588     charoff => FEATURE_M12N10_REC,
3589 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3590     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3591 wakaba 1.49 valign => FEATURE_M12N10_REC,
3592     width => FEATURE_M12N10_REC,
3593 wakaba 1.1 }),
3594 wakaba 1.40 check_child_element => sub {
3595     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3596     $child_is_transparent, $element_state) = @_;
3597     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3598     $self->{onerror}->(node => $child_el,
3599     type => 'element not allowed:minus',
3600     level => $self->{must_level});
3601     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3602     #
3603     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
3604     #
3605     } else {
3606     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3607     }
3608     },
3609     check_child_text => sub {
3610     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3611     if ($has_significant) {
3612     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3613 wakaba 1.1 }
3614     },
3615     };
3616    
3617     $Element->{$HTML_NS}->{col} = {
3618 wakaba 1.40 %HTMLEmptyChecker,
3619 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3620 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3621 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3622 wakaba 1.49 }, {
3623     %HTMLAttrStatus,
3624     %HTMLM12NCommonAttrStatus,
3625     align => FEATURE_M12N10_REC,
3626     char => FEATURE_M12N10_REC,
3627     charoff => FEATURE_M12N10_REC,
3628 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3629     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3630 wakaba 1.49 valign => FEATURE_M12N10_REC,
3631     width => FEATURE_M12N10_REC,
3632 wakaba 1.1 }),
3633     };
3634    
3635     $Element->{$HTML_NS}->{tbody} = {
3636 wakaba 1.40 %HTMLChecker,
3637 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3638     check_attrs => $GetHTMLAttrsChecker->({}, {
3639     %HTMLAttrStatus,
3640     %HTMLM12NCommonAttrStatus,
3641     align => FEATURE_M12N10_REC,
3642     char => FEATURE_M12N10_REC,
3643     charoff => FEATURE_M12N10_REC,
3644 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3645 wakaba 1.49 valign => FEATURE_M12N10_REC,
3646     }),
3647 wakaba 1.40 check_child_element => sub {
3648     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3649     $child_is_transparent, $element_state) = @_;
3650     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3651     $self->{onerror}->(node => $child_el,
3652     type => 'element not allowed:minus',
3653     level => $self->{must_level});
3654     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3655     #
3656     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3657     $element_state->{has_tr} = 1;
3658     } else {
3659     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3660     }
3661     },
3662     check_child_text => sub {
3663     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3664     if ($has_significant) {
3665     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3666 wakaba 1.1 }
3667 wakaba 1.40 },
3668     check_end => sub {
3669     my ($self, $item, $element_state) = @_;
3670     unless ($element_state->{has_tr}) {
3671     $self->{onerror}->(node => $item->{node},
3672     type => 'child element missing:tr');
3673 wakaba 1.1 }
3674 wakaba 1.40
3675     $HTMLChecker{check_end}->(@_);
3676 wakaba 1.1 },
3677     };
3678    
3679     $Element->{$HTML_NS}->{thead} = {
3680 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3681 wakaba 1.1 };
3682    
3683     $Element->{$HTML_NS}->{tfoot} = {
3684 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3685 wakaba 1.1 };
3686    
3687     $Element->{$HTML_NS}->{tr} = {
3688 wakaba 1.40 %HTMLChecker,
3689 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3690     check_attrs => $GetHTMLAttrsChecker->({}, {
3691     %HTMLAttrStatus,
3692     %HTMLM12NCommonAttrStatus,
3693     align => FEATURE_M12N10_REC,
3694     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3695     char => FEATURE_M12N10_REC,
3696     charoff => FEATURE_M12N10_REC,
3697 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3698 wakaba 1.49 valign => FEATURE_M12N10_REC,
3699     }),
3700 wakaba 1.40 check_child_element => sub {
3701     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3702     $child_is_transparent, $element_state) = @_;
3703     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3704     $self->{onerror}->(node => $child_el,
3705     type => 'element not allowed:minus',
3706     level => $self->{must_level});
3707     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3708     #
3709     } elsif ($child_nsuri eq $HTML_NS and
3710     ($child_ln eq 'td' or $child_ln eq 'th')) {
3711     $element_state->{has_cell} = 1;
3712     } else {
3713     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3714     }
3715     },
3716     check_child_text => sub {
3717     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3718     if ($has_significant) {
3719     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3720 wakaba 1.1 }
3721 wakaba 1.40 },
3722     check_end => sub {
3723     my ($self, $item, $element_state) = @_;
3724     unless ($element_state->{has_cell}) {
3725     $self->{onerror}->(node => $item->{node},
3726     type => 'child element missing:td|th');
3727 wakaba 1.1 }
3728 wakaba 1.40
3729     $HTMLChecker{check_end}->(@_);
3730 wakaba 1.1 },
3731     };
3732    
3733     $Element->{$HTML_NS}->{td} = {
3734 wakaba 1.40 %HTMLProseContentChecker,
3735 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3736 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3737 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3738     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3739 wakaba 1.49 }, {
3740     %HTMLAttrStatus,
3741     %HTMLM12NCommonAttrStatus,
3742     abbr => FEATURE_M12N10_REC,
3743     align => FEATURE_M12N10_REC,
3744     axis => FEATURE_M12N10_REC,
3745     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3746     char => FEATURE_M12N10_REC,
3747     charoff => FEATURE_M12N10_REC,
3748 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3749 wakaba 1.49 headers => FEATURE_M12N10_REC,
3750     height => FEATURE_M12N10_REC_DEPRECATED,
3751 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3752 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3753 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3754 wakaba 1.49 scope => FEATURE_M12N10_REC,
3755     valign => FEATURE_M12N10_REC,
3756     width => FEATURE_M12N10_REC_DEPRECATED,
3757 wakaba 1.1 }),
3758     };
3759    
3760     $Element->{$HTML_NS}->{th} = {
3761 wakaba 1.40 %HTMLPhrasingContentChecker,
3762 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3763 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3764 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3765     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3766     scope => $GetHTMLEnumeratedAttrChecker
3767     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3768 wakaba 1.49 }, {
3769     %HTMLAttrStatus,
3770     %HTMLM12NCommonAttrStatus,
3771     abbr => FEATURE_M12N10_REC,
3772     align => FEATURE_M12N10_REC,
3773     axis => FEATURE_M12N10_REC,
3774     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3775     char => FEATURE_M12N10_REC,
3776     charoff => FEATURE_M12N10_REC,
3777 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3778 wakaba 1.49 headers => FEATURE_M12N10_REC,
3779     height => FEATURE_M12N10_REC_DEPRECATED,
3780 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3781 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3782 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3783     scope => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3784 wakaba 1.49 valign => FEATURE_M12N10_REC,
3785     width => FEATURE_M12N10_REC_DEPRECATED,
3786 wakaba 1.1 }),
3787     };
3788    
3789 wakaba 1.52 my $AttrCheckerNotImplemented = sub {
3790     my ($self, $attr) = @_;
3791     $self->{onerror}->(node => $attr, level => 'unsupported',
3792     type => 'attribute');
3793     };
3794    
3795     $Element->{$HTML_NS}->{form} = {
3796 wakaba 1.56 %HTMLProseContentChecker, ## NOTE: Flow* [WF2]
3797     ## TODO: form in form is allowed in XML [WF2]
3798 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3799     check_attrs => $GetHTMLAttrsChecker->({
3800 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3801 wakaba 1.52 'accept-charset' => $AttrCheckerNotImplemented, ## TODO: Charsets
3802     action => $HTMLURIAttrChecker, ## TODO: "User agent behavior for a value other than HTTP URI is undefined" [HTML4]
3803 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3804     enctype => $HTMLIMTAttrChecker, ## TODO: "multipart/form-data" should be used when type=file is used [HTML4] ## TODO: MUST NOT parameter [WF2]
3805     method => $GetHTMLEnumeratedAttrChecker->({
3806     get => 1, post => 1, put => 1, delete => 1,
3807     }),
3808 wakaba 1.52 ## NOTE: "get" SHOULD be used for idempotent submittion,
3809     ## "post" SHOULD be used otherwise [HTML4]. This cannot be tested.
3810     name => sub { }, # CDATA in HTML4 ## TODO: must be same as |id| (informative!) [XHTML10]
3811 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
3812     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3813 wakaba 1.52 target => $HTMLTargetAttrChecker,
3814     ## TODO: Warn for combination whose behavior is not defined.
3815     }, {
3816     %HTMLAttrStatus,
3817     %HTMLM12NCommonAttrStatus,
3818 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3819 wakaba 1.52 'accept-charset' => FEATURE_M12N10_REC,
3820 wakaba 1.56 action => FEATURE_WF2 | FEATURE_M12N10_REC,
3821     data => FEATURE_WF2,
3822     enctype => FEATURE_WF2 | FEATURE_M12N10_REC,
3823 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3824 wakaba 1.56 method => FEATURE_WF2 | FEATURE_M12N10_REC,
3825 wakaba 1.52 name => FEATURE_M12N10_REC_DEPRECATED,
3826 wakaba 1.56 onreceived => FEATURE_WF2,
3827 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3828     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3829 wakaba 1.56 replace => FEATURE_WF2,
3830 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
3831     sdasuff => FEATURE_HTML20_RFC,
3832 wakaba 1.52 target => FEATURE_M12N10_REC,
3833     }),
3834     ## TODO: Tests
3835     ## TODO: Tests for <nest/> in <form>
3836 wakaba 1.66 check_start => sub {
3837     my ($self, $item, $element_state) = @_;
3838    
3839     $element_state->{uri_info}->{action}->{type}->{action} = 1;
3840     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
3841     },
3842 wakaba 1.52 };
3843    
3844     $Element->{$HTML_NS}->{fieldset} = {
3845     %HTMLProseContentChecker, ## NOTE: legend, %Flow; ## TODO: legend
3846     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3847 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3848     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3849     ## TODO: form [WF2]
3850     }, {
3851 wakaba 1.52 %HTMLAttrStatus,
3852     %HTMLM12NCommonAttrStatus,
3853 wakaba 1.56 disabled => FEATURE_WF2,
3854     form => FEATURE_WF2,
3855 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3856     }),
3857     ## TODO: Tests
3858     ## TODO: Tests for <nest/> in <fieldset>
3859     };
3860    
3861     $Element->{$HTML_NS}->{input} = {
3862 wakaba 1.56 %HTMLEmptyChecker, ## MUST [WF2]
3863 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3864     check_attrs => $GetHTMLAttrsChecker->({
3865 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3866 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
3867 wakaba 1.56 action => $HTMLURIAttrChecker,
3868 wakaba 1.52 align => $GetHTMLEnumeratedAttrChecker->({
3869     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
3870     }),
3871     alt => sub {}, ## NOTE: Text [M12N] ## TODO: |alt| should be provided for |type=image| [HTML4]
3872     ## NOTE: HTML4 has a "should" for accessibility, which cannot be tested
3873     ## here.
3874 wakaba 1.56 autocomplete => $GetHTMLEnumeratedAttrChecker->({on => 1, off => 1}),
3875     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3876 wakaba 1.52 checked => $GetHTMLBooleanAttrChecker->('checked'),
3877     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3878 wakaba 1.56 enctype => $HTMLIMTAttrChecker,
3879     ## TODO: form [WF2]
3880     ## TODO: inputmode [WF2]
3881 wakaba 1.52 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
3882 wakaba 1.56 ## TODO: list [WF2]
3883     ## TODO: max [WF2]
3884 wakaba 1.52 maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3885 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
3886     get => 1, post => 1, put => 1, delete => 1,
3887     }),
3888     ## TODO: min [WF2]
3889 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3890     readonly => $GetHTMLBooleanAttrChecker->('readonly'),
3891 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3892     required => $GetHTMLBooleanAttrChecker->('required'),
3893 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3894     src => $HTMLURIAttrChecker,
3895 wakaba 1.56 ## TODO: step [WF2]
3896     target => $HTMLTargetAttrChecker,
3897     ## TODO: template
3898 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
3899     text => 1, password => 1, checkbox => 1, radio => 1, submit => 1,
3900     reset => 1, file => 1, hidden => 1, image => 1, button => 1,
3901 wakaba 1.56 ## [WF2]
3902     datatime => 1, 'datetime-local' => 1, date => 1, month => 1, week => 1,
3903     time => 1, number => 1, range => 1, email => 1, url => 1,
3904     add => 1, remove => 1, 'move-up' => 1, 'move-down' => 1,
3905 wakaba 1.52 }),
3906     usemap => $HTMLUsemapAttrChecker,
3907 wakaba 1.56 value => sub {}, ## NOTE: CDATA [M12N] ## TODO: "optional except when the type attribute has the value "radio" or "checkbox"" [HTML4] ## TODO: constraints [WF2]
3908     ## TODO: "authors should ensure that in each set of radio buttons that one is initially "on"." [HTML4] [WF2]
3909 wakaba 1.52 }, {
3910     %HTMLAttrStatus,
3911     %HTMLM12NCommonAttrStatus,
3912 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3913 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
3914 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
3915 wakaba 1.56 action => FEATURE_WF2,
3916 wakaba 1.52 align => FEATURE_M12N10_REC_DEPRECATED,
3917     alt => FEATURE_M12N10_REC,
3918 wakaba 1.56 autocomplete => FEATURE_WF2,
3919     autofocus => FEATURE_WF2,
3920 wakaba 1.52 checked => FEATURE_M12N10_REC,
3921     datafld => FEATURE_HTML4_REC_RESERVED,
3922     dataformatas => FEATURE_HTML4_REC_RESERVED,
3923     datasrc => FEATURE_HTML4_REC_RESERVED,
3924 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3925 wakaba 1.65 enctype => FEATURE_WF2,
3926 wakaba 1.56 form => FEATURE_WF2,
3927     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
3928 wakaba 1.52 ismap => FEATURE_M12N10_REC,
3929     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3930 wakaba 1.56 list => FEATURE_WF2,
3931     max => FEATURE_WF2,
3932     maxlength => FEATURE_WF2 | FEATURE_M12N10_REC,
3933     method => FEATURE_WF2,
3934     min => FEATURE_WF2,
3935 wakaba 1.52 name => FEATURE_M12N10_REC,
3936     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3937     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3938     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3939     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3940 wakaba 1.56 readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
3941 wakaba 1.65 replace => FEATURE_WF2,
3942 wakaba 1.56 required => FEATURE_WF2,
3943 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
3944 wakaba 1.56 size => FEATURE_WF2_DEPRECATED | FEATURE_M12N10_REC,
3945 wakaba 1.52 src => FEATURE_M12N10_REC,
3946 wakaba 1.56 step => FEATURE_WF2,
3947 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3948 wakaba 1.65 target => FEATURE_WF2,
3949 wakaba 1.56 template => FEATURE_WF2,
3950 wakaba 1.52 type => FEATURE_M12N10_REC,
3951     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
3952     value => FEATURE_M12N10_REC,
3953     }),
3954     ## TODO: Tests
3955     ## TODO: Tests for <nest/> in <input>
3956 wakaba 1.66 check_start => sub {
3957     my ($self, $item, $element_state) = @_;
3958    
3959     $element_state->{uri_info}->{action}->{type}->{action} = 1;
3960     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
3961     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
3962     },
3963 wakaba 1.52 };
3964    
3965 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
3966    
3967 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
3968     %HTMLProseContentChecker, ## NOTE: %Flow; - something [XHTML10]
3969     ## TODO: -A|%formctrl;|form|fieldset [HTML4]
3970     ## TODO: image map (img) in |button| is "illegal" [HTML4].
3971     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3972     check_attrs => $GetHTMLAttrsChecker->({
3973 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
3974 wakaba 1.56 action => $HTMLURIAttrChecker,
3975     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3976 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3977 wakaba 1.56 ## TODO: form [WF2]
3978     method => $GetHTMLEnumeratedAttrChecker->({
3979     get => 1, post => 1, put => 1, delete => 1,
3980     }),
3981 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3982 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
3983     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3984     target => $HTMLTargetAttrChecker,
3985     ## TODO: template [WF2]
3986 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
3987     button => 1, submit => 1, reset => 1,
3988     }),
3989     value => sub {}, ## NOTE: CDATA [M12N]
3990     }, {
3991     %HTMLAttrStatus,
3992     %HTMLM12NCommonAttrStatus,
3993     accesskey => FEATURE_M12N10_REC,
3994 wakaba 1.56 action => FEATURE_WF2,
3995     autofocus => FEATURE_WF2,
3996 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
3997     dataformatas => FEATURE_HTML4_REC_RESERVED,
3998     datasrc => FEATURE_HTML4_REC_RESERVED,
3999 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4000     enctype => FEATURE_WF2,
4001     form => FEATURE_WF2,
4002 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4003 wakaba 1.56 method => FEATURE_WF2,
4004 wakaba 1.52 name => FEATURE_M12N10_REC,
4005     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4006     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4007 wakaba 1.56 oninvalid => FEATURE_WF2,
4008     replace => FEATURE_WF2,
4009 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4010 wakaba 1.56 target => FEATURE_WF2,
4011     template => FEATURE_WF2,
4012 wakaba 1.52 type => FEATURE_M12N10_REC,
4013     value => FEATURE_M12N10_REC,
4014     }),
4015     ## TODO: Tests
4016     ## TODO: Tests for <nest/> in <button>
4017 wakaba 1.66 check_start => sub {
4018     my ($self, $item, $element_state) = @_;
4019    
4020     $element_state->{uri_info}->{action}->{type}->{action} = 1;
4021     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4022     },
4023 wakaba 1.52 };
4024    
4025     $Element->{$HTML_NS}->{label} = {
4026     %HTMLPhrasingContentChecker, ## NOTE: %Inline - label [XHTML10] ## TODO: -label
4027 wakaba 1.56 ## TODO: At most one form control [WF2]
4028 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
4029     check_attrs => $GetHTMLAttrsChecker->({
4030 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4031 wakaba 1.52 for => $AttrCheckerNotImplemented, ## TODO: IDREF ## TODO: Must be |id| of control [HTML4] ## TODO: Or, "may only contain one control element"
4032     }, {
4033     %HTMLAttrStatus,
4034     %HTMLM12NCommonAttrStatus,
4035 wakaba 1.56 accesskey => FEATURE_WF2 | FEATURE_M12N10_REC,
4036 wakaba 1.52 for => FEATURE_M12N10_REC,
4037     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4038     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4039     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4040     }),
4041     ## TODO: Tests
4042     ## TODO: Tests for <nest/> in <label>
4043     };
4044    
4045     $Element->{$HTML_NS}->{select} = {
4046 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (optgroup|option)* [HTML4] + [WF2] ## TODO: SHOULD avoid empty and visible [WF2]
4047 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
4048     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
4049     status => FEATURE_WF2 | FEATURE_M12N10_REC,
4050 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
4051 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4052 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4053 wakaba 1.56 autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
4054 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4055 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
4056     ## TODO: form [WF2]
4057 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
4058     name => sub {}, ## NOTE: CDATA [M12N]
4059 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
4060     ## TODO: pattern [WF2] ## TODO: |title| semantics
4061 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4062     }, {
4063     %HTMLAttrStatus,
4064     %HTMLM12NCommonAttrStatus,
4065 wakaba 1.56 accesskey => FEATURE_WF2,
4066     autofocus => FEATURE_WF2,
4067     data => FEATURE_WF2,
4068 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
4069     dataformatas => FEATURE_HTML4_REC_RESERVED,
4070     datasrc => FEATURE_HTML4_REC_RESERVED,
4071 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4072     form => FEATURE_WF2,
4073 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4074     multiple => FEATURE_M12N10_REC,
4075     name => FEATURE_M12N10_REC,
4076     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4077     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4078     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4079 wakaba 1.56 oninvalid => FEATURE_WF2,
4080     pattern => FEATURE_WF2,
4081 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
4082     sdapref => FEATURE_HTML20_RFC,
4083 wakaba 1.52 size => FEATURE_M12N10_REC,
4084     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4085     }),
4086     ## TODO: Tests
4087     ## TODO: Tests for <nest/> in <select>
4088 wakaba 1.66 check_start => sub {
4089     my ($self, $item, $element_state) = @_;
4090    
4091     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
4092     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4093     },
4094 wakaba 1.52 };
4095 wakaba 1.1
4096 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
4097 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (transparent | option)*
4098     ## TODO: |option| child MUST be empty [WF2]
4099 wakaba 1.52 status => FEATURE_WF2,
4100 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
4101     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
4102     }, {
4103 wakaba 1.52 %HTMLAttrStatus,
4104 wakaba 1.56 data => FEATURE_WF2,
4105 wakaba 1.52 }),
4106     ## TODO: Tests
4107     ## TODO: Tests for <nest/> in <datalist>
4108 wakaba 1.66 check_start => sub {
4109     my ($self, $item, $element_state) = @_;
4110    
4111     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
4112     },
4113 wakaba 1.52 };
4114 wakaba 1.49
4115 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
4116 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (option|optgroup)* [HTML4] + [WF2] SHOULD avoid empty and visible [WF2]
4117 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
4118     check_attrs => $GetHTMLAttrsChecker->({
4119     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4120     label => sub {}, ## NOTE: Text [M12N] ## TODO: required
4121     }, {
4122     %HTMLAttrStatus,
4123     %HTMLM12NCommonAttrStatus,
4124 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4125 wakaba 1.52 label => FEATURE_M12N10_REC,
4126     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4127     }),
4128     ## TODO: Tests
4129     ## TODO: Tests for <nest/> in <optgroup>
4130     };
4131    
4132     $Element->{$HTML_NS}->{option} = {
4133     %HTMLTextChecker,
4134     status => FEATURE_WF2 | FEATURE_M12N10_REC,
4135     check_attrs => $GetHTMLAttrsChecker->({
4136     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4137     label => sub {}, ## NOTE: Text [M12N]
4138     selected => $GetHTMLBooleanAttrChecker->('selected'),
4139     value => sub {}, ## NOTE: CDATA [M12N]
4140     }, {
4141     %HTMLAttrStatus,
4142     %HTMLM12NCommonAttrStatus,
4143 wakaba 1.56 disabled => FEATURE_WF2, FEATURE_M12N10_REC,
4144 wakaba 1.52 label => FEATURE_M12N10_REC,
4145     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4146 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
4147     sdapref => FEATURE_HTML20_RFC,
4148 wakaba 1.52 selected => FEATURE_M12N10_REC,
4149     value => FEATURE_M12N10_REC,
4150     }),
4151     ## TODO: Tests
4152     ## TODO: Tests for <nest/> in <option>
4153     };
4154 wakaba 1.49
4155 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
4156     %HTMLTextChecker,
4157     status => FEATURE_WF2 | FEATURE_M12N10_REC,
4158     check_attrs => $GetHTMLAttrsChecker->({
4159 wakaba 1.56 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type
4160 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4161 wakaba 1.56 autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
4162     cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## TODO: SHOULD if wrap=hard [WF2]
4163 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4164 wakaba 1.56 ## TODO: form [WF2]
4165     ## TODO: inputmode [WF2]
4166     maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4167 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
4168 wakaba 1.56 ## TODO: pattern [WF2] ## TODO: |title| special semantics
4169 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
4170 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
4171     rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4172     oninvalid => $HTMLEventHandlerAttrChecker,
4173     wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
4174 wakaba 1.52 }, {
4175     %HTMLAttrStatus,
4176     %HTMLM12NCommonAttrStatus,
4177 wakaba 1.56 accept => FEATURE_WF2,
4178 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
4179 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
4180 wakaba 1.56 autofocus => FEATURE_WF2,
4181 wakaba 1.52 cols => FEATURE_M12N10_REC,
4182     datafld => FEATURE_HTML4_REC_RESERVED,
4183 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
4184     datasrc => FEATURE_HTML4_REC_RESERVED,
4185 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4186     form => FEATURE_WF2,
4187     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
4188 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4189 wakaba 1.56 maxlength => FEATURE_WF2,
4190 wakaba 1.52 name => FEATURE_M12N10_REC,
4191     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4192     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4193     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4194 wakaba 1.56 oninvalid => FEATURE_WF2,
4195 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4196 wakaba 1.56 pattern => FEATURE_WF2,
4197     readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
4198     required => FEATURE_WF2,
4199 wakaba 1.61 rows => FEATURE_M12N10_REC,
4200     sdaform => FEATURE_HTML20_RFC,
4201     sdapref => FEATURE_HTML20_RFC,
4202 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4203 wakaba 1.56 wrap => FEATURE_WF2,
4204 wakaba 1.52 }),
4205     ## TODO: Tests
4206     ## TODO: Tests for <nest/> in <textarea>
4207 wakaba 1.66 check_start => sub {
4208     my ($self, $item, $element_state) = @_;
4209    
4210     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
4211     },
4212 wakaba 1.52 };
4213 wakaba 1.49
4214 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
4215 wakaba 1.56 %HTMLPhrasingContentChecker, ## Inline [WF2]
4216 wakaba 1.52 status => FEATURE_WF2,
4217 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
4218     ## TODO: for [WF2]
4219     ## TODO: form [WF2]
4220     ## TODO: name [WF2]
4221     ## onformchange[WF2]
4222     ## onforminput[WF2]
4223     }, {
4224 wakaba 1.52 %HTMLAttrStatus,
4225 wakaba 1.56 for => FEATURE_WF2,
4226     form => FEATURE_WF2,
4227     name => FEATURE_WF2,
4228     onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
4229     onformchange => FEATURE_WF2,
4230     onforminput => FEATURE_WF2,
4231 wakaba 1.52 }),
4232     ## TODO: Tests
4233     ## TODO: Tests for <nest/> in <output>
4234 wakaba 1.56 ## NOTE: "The output element should be used when ..." [WF2]
4235 wakaba 1.52 };
4236    
4237     ## TODO: repetition template
4238    
4239     $Element->{$HTML_NS}->{isindex} = {
4240     %HTMLEmptyChecker,
4241 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
4242     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
4243 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4244     prompt => sub {}, ## NOTE: Text [M12N]
4245     }, {
4246     %HTMLAttrStatus,
4247     class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4248     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4249     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4250     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4251     prompt => FEATURE_M12N10_REC_DEPRECATED,
4252 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4253 wakaba 1.52 style => FEATURE_XHTML10_REC,
4254     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4255     }),
4256     ## TODO: Tests
4257     ## TODO: Tests for <nest/> in <isindex>
4258 wakaba 1.66 check_start => sub {
4259     my ($self, $item, $element_state) = @_;
4260    
4261     $element_state->{uri_info}->{action}->{type}->{action} = 1;
4262     },
4263 wakaba 1.52 };
4264 wakaba 1.49
4265 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
4266 wakaba 1.40 %HTMLChecker,
4267 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4268 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4269 wakaba 1.1 src => $HTMLURIAttrChecker,
4270     defer => $GetHTMLBooleanAttrChecker->('defer'),
4271     async => $GetHTMLBooleanAttrChecker->('async'),
4272     type => $HTMLIMTAttrChecker,
4273 wakaba 1.49 }, {
4274     %HTMLAttrStatus,
4275     %HTMLM12NCommonAttrStatus,
4276 wakaba 1.50 async => FEATURE_HTML5_DEFAULT,
4277 wakaba 1.49 charset => FEATURE_M12N10_REC,
4278 wakaba 1.50 defer => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4279 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
4280     for => FEATURE_HTML4_REC_RESERVED,
4281 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4282 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
4283 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4284     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4285 wakaba 1.9 }),
4286 wakaba 1.40 check_start => sub {
4287     my ($self, $item, $element_state) = @_;
4288 wakaba 1.1
4289 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
4290     $element_state->{must_be_empty} = 1;
4291 wakaba 1.1 } else {
4292     ## NOTE: No content model conformance in HTML5 spec.
4293 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
4294     my $language = $item->{node}->get_attribute_ns (undef, 'language');
4295 wakaba 1.1 if ((defined $type and $type eq '') or
4296     (defined $language and $language eq '')) {
4297     $type = 'text/javascript';
4298     } elsif (defined $type) {
4299     #
4300     } elsif (defined $language) {
4301     $type = 'text/' . $language;
4302     } else {
4303     $type = 'text/javascript';
4304     }
4305 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
4306     }
4307 wakaba 1.66
4308     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
4309 wakaba 1.40 },
4310     check_child_element => sub {
4311     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4312     $child_is_transparent, $element_state) = @_;
4313     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4314     $self->{onerror}->(node => $child_el,
4315     type => 'element not allowed:minus',
4316     level => $self->{must_level});
4317     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4318     #
4319     } else {
4320     if ($element_state->{must_be_empty}) {
4321     $self->{onerror}->(node => $child_el,
4322     type => 'element not allowed');
4323     }
4324     }
4325     },
4326     check_child_text => sub {
4327     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4328     if ($has_significant and
4329     $element_state->{must_be_empty}) {
4330     $self->{onerror}->(node => $child_node,
4331     type => 'character not allowed');
4332     }
4333     },
4334     check_end => sub {
4335     my ($self, $item, $element_state) = @_;
4336     unless ($element_state->{must_be_empty}) {
4337     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
4338     type => 'script:'.$element_state->{script_type});
4339     ## TODO: text/javascript support
4340    
4341     $HTMLChecker{check_end}->(@_);
4342 wakaba 1.1 }
4343     },
4344     };
4345 wakaba 1.25 ## ISSUE: Significant check and text child node
4346 wakaba 1.1
4347     ## NOTE: When script is disabled.
4348     $Element->{$HTML_NS}->{noscript} = {
4349 wakaba 1.40 %HTMLTransparentChecker,
4350 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4351     check_attrs => $GetHTMLAttrsChecker->({}, {
4352     %HTMLAttrStatus,
4353     %HTMLM12NCommonAttrStatus,
4354 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4355 wakaba 1.49 }),
4356 wakaba 1.40 check_start => sub {
4357     my ($self, $item, $element_state) = @_;
4358 wakaba 1.3
4359 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
4360     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
4361 wakaba 1.3 }
4362    
4363 wakaba 1.40 unless ($self->{flag}->{in_head}) {
4364     $self->_add_minus_elements ($element_state,
4365     {$HTML_NS => {noscript => 1}});
4366     }
4367 wakaba 1.3 },
4368 wakaba 1.40 check_child_element => sub {
4369     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4370     $child_is_transparent, $element_state) = @_;
4371     if ($self->{flag}->{in_head}) {
4372     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4373     $self->{onerror}->(node => $child_el,
4374     type => 'element not allowed:minus',
4375     level => $self->{must_level});
4376     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4377     #
4378     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
4379     #
4380     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
4381     if ($child_el->has_attribute_ns (undef, 'scoped')) {
4382     $self->{onerror}->(node => $child_el,
4383     type => 'element not allowed:head noscript',
4384     level => $self->{must_level});
4385     }
4386     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
4387 wakaba 1.47 my $http_equiv_attr
4388     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
4389     if ($http_equiv_attr) {
4390     ## TODO: case
4391     if (lc $http_equiv_attr->value eq 'content-type') {
4392 wakaba 1.40 $self->{onerror}->(node => $child_el,
4393 wakaba 1.34 type => 'element not allowed:head noscript',
4394     level => $self->{must_level});
4395 wakaba 1.47 } else {
4396     #
4397 wakaba 1.3 }
4398 wakaba 1.47 } else {
4399     $self->{onerror}->(node => $child_el,
4400     type => 'element not allowed:head noscript',
4401     level => $self->{must_level});
4402 wakaba 1.3 }
4403 wakaba 1.40 } else {
4404     $self->{onerror}->(node => $child_el,
4405     type => 'element not allowed:head noscript',
4406     level => $self->{must_level});
4407     }
4408     } else {
4409     $HTMLTransparentChecker{check_child_element}->(@_);
4410     }
4411     },
4412     check_child_text => sub {
4413     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4414     if ($self->{flag}->{in_head}) {
4415     if ($has_significant) {
4416     $self->{onerror}->(node => $child_node,
4417     type => 'character not allowed');
4418 wakaba 1.3 }
4419     } else {
4420 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
4421     }
4422     },
4423     check_end => sub {
4424     my ($self, $item, $element_state) = @_;
4425     $self->_remove_minus_elements ($element_state);
4426     if ($self->{flag}->{in_head}) {
4427     $HTMLChecker{check_end}->(@_);
4428     } else {
4429     $HTMLPhrasingContentChecker{check_end}->(@_);
4430 wakaba 1.3 }
4431 wakaba 1.1 },
4432     };
4433 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
4434 wakaba 1.1
4435     $Element->{$HTML_NS}->{'event-source'} = {
4436 wakaba 1.40 %HTMLEmptyChecker,
4437 wakaba 1.48 status => FEATURE_HTML5_LC,
4438 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4439 wakaba 1.1 src => $HTMLURIAttrChecker,
4440 wakaba 1.50 }, {
4441     %HTMLAttrStatus,
4442     src => FEATURE_HTML5_LC,
4443 wakaba 1.1 }),
4444 wakaba 1.66 check_start => sub {
4445     my ($self, $item, $element_state) = @_;
4446    
4447     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
4448     },
4449 wakaba 1.1 };
4450    
4451     $Element->{$HTML_NS}->{details} = {
4452 wakaba 1.40 %HTMLProseContentChecker,
4453 wakaba 1.48 status => FEATURE_HTML5_WD,
4454 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4455 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
4456 wakaba 1.50 }, {
4457     %HTMLAttrStatus,
4458 wakaba 1.59 open => FEATURE_HTML5_WD,
4459 wakaba 1.1 }),
4460 wakaba 1.43 ## NOTE: legend, Prose
4461     check_child_element => sub {
4462     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4463     $child_is_transparent, $element_state) = @_;
4464     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4465     $self->{onerror}->(node => $child_el,
4466     type => 'element not allowed:minus',
4467     level => $self->{must_level});
4468     $element_state->{has_non_legend} = 1;
4469     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4470     #
4471     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4472     if ($element_state->{has_non_legend}) {
4473     $self->{onerror}->(node => $child_el,
4474     type => 'element not allowed:details legend',
4475     level => $self->{must_level});
4476     }
4477     $element_state->{has_legend} = 1;
4478     $element_state->{has_non_legend} = 1;
4479     } else {
4480     $HTMLProseContentChecker{check_child_element}->(@_);
4481     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4482     ## ISSUE: |<details><object><legend>xx</legend></object>..</details>|
4483     ## is conforming?
4484     }
4485     },
4486     check_child_text => sub {
4487     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4488     if ($has_significant) {
4489     $element_state->{has_non_legend} = 1;
4490     }
4491     },
4492     check_end => sub {
4493     my ($self, $item, $element_state) = @_;
4494 wakaba 1.1
4495 wakaba 1.43 unless ($element_state->{has_legend}) {
4496     $self->{onerror}->(node => $item->{node},
4497     type => 'element missing:legend',
4498     level => $self->{must_level});
4499     }
4500    
4501     $HTMLProseContentChecker{check_end}->(@_);
4502     ## ISSUE: |<details><legend>aa</legend></details>| error?
4503 wakaba 1.1 },
4504     };
4505    
4506     $Element->{$HTML_NS}->{datagrid} = {
4507 wakaba 1.40 %HTMLProseContentChecker,
4508 wakaba 1.48 status => FEATURE_HTML5_WD,
4509 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4510 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4511     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
4512 wakaba 1.50 }, {
4513     %HTMLAttrStatus,
4514     disabled => FEATURE_HTML5_WD,
4515     multiple => FEATURE_HTML5_WD,
4516 wakaba 1.1 }),
4517 wakaba 1.40 check_start => sub {
4518     my ($self, $item, $element_state) = @_;
4519 wakaba 1.1
4520 wakaba 1.40 $self->_add_minus_elements ($element_state,
4521     {$HTML_NS => {a => 1, datagrid => 1}});
4522     $element_state->{phase} = 'any';
4523     },
4524     ## Prose -(text* table Prose*) | table | select | datalist | Empty
4525     check_child_element => sub {
4526     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4527     $child_is_transparent, $element_state) = @_;
4528     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4529     $self->{onerror}->(node => $child_el,
4530     type => 'element not allowed:minus',
4531     level => $self->{must_level});
4532     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4533     #
4534     } elsif ($element_state->{phase} eq 'prose') {
4535     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4536 wakaba 1.44 if (not $element_state->{has_element} and
4537 wakaba 1.40 $child_nsuri eq $HTML_NS and
4538     $child_ln eq 'table') {
4539     $self->{onerror}->(node => $child_el,
4540     type => 'element not allowed');
4541     } else {
4542 wakaba 1.8 #
4543 wakaba 1.1 }
4544 wakaba 1.40 } else {
4545     $self->{onerror}->(node => $child_el,
4546     type => 'element not allowed');
4547     }
4548 wakaba 1.43 $element_state->{has_element} = 1;
4549 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
4550     if ($child_nsuri eq $HTML_NS and
4551     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
4552     $element_state->{phase} = 'none';
4553     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4554     $element_state->{has_element} = 1;
4555     $element_state->{phase} = 'prose';
4556 wakaba 1.43 ## TODO: transparent?
4557 wakaba 1.40 } else {
4558     $self->{onerror}->(node => $child_el,
4559     type => 'element not allowed');
4560     }
4561     } elsif ($element_state->{phase} eq 'none') {
4562     $self->{onerror}->(node => $child_el,
4563     type => 'element not allowed');
4564     } else {
4565     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
4566     }
4567     },
4568     check_child_text => sub {
4569     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4570     if ($has_significant) {
4571     if ($element_state->{phase} eq 'prose') {
4572     #
4573     } elsif ($element_state->{phase} eq 'any') {
4574     $element_state->{phase} = 'prose';
4575     } else {
4576     $self->{onerror}->(node => $child_node,
4577     type => 'character not allowed');
4578 wakaba 1.1 }
4579     }
4580 wakaba 1.40 },
4581     check_end => sub {
4582     my ($self, $item, $element_state) = @_;
4583     $self->_remove_minus_elements ($element_state);
4584 wakaba 1.1
4585 wakaba 1.40 if ($element_state->{phase} eq 'none') {
4586     $HTMLChecker{check_end}->(@_);
4587     } else {
4588     $HTMLPhrasingContentChecker{check_end}->(@_);
4589     }
4590     },
4591 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
4592     ## are not disallowed (assuming that form control contents are also
4593     ## prose content).
4594 wakaba 1.1 };
4595    
4596     $Element->{$HTML_NS}->{command} = {
4597 wakaba 1.40 %HTMLEmptyChecker,
4598 wakaba 1.48 status => FEATURE_HTML5_WD,
4599 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4600 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
4601     default => $GetHTMLBooleanAttrChecker->('default'),
4602     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4603     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
4604     icon => $HTMLURIAttrChecker,
4605     label => sub { }, ## NOTE: No conformance creteria
4606     radiogroup => sub { }, ## NOTE: No conformance creteria
4607     type => sub {
4608     my ($self, $attr) = @_;
4609     my $value = $attr->value;
4610     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
4611     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
4612     }
4613     },
4614 wakaba 1.50 }, {
4615     %HTMLAttrStatus,
4616     checked => FEATURE_HTML5_WD,
4617     default => FEATURE_HTML5_WD,
4618     disabled => FEATURE_HTML5_WD,
4619     hidden => FEATURE_HTML5_WD,
4620     icon => FEATURE_HTML5_WD,
4621     label => FEATURE_HTML5_WD,
4622     radiogroup => FEATURE_HTML5_WD,
4623     type => FEATURE_HTML5_WD,
4624 wakaba 1.1 }),
4625 wakaba 1.66 check_start => sub {
4626     my ($self, $item, $element_state) = @_;
4627    
4628     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
4629     },
4630 wakaba 1.1 };
4631    
4632     $Element->{$HTML_NS}->{menu} = {
4633 wakaba 1.40 %HTMLPhrasingContentChecker,
4634 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
4635     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
4636     ## NOTE: We don't want any |menu| element warned as deprecated.
4637 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4638 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
4639     id => sub {
4640     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
4641     my ($self, $attr) = @_;
4642     my $value = $attr->value;
4643     if (length $value > 0) {
4644     if ($self->{id}->{$value}) {
4645     $self->{onerror}->(node => $attr, type => 'duplicate ID');
4646     push @{$self->{id}->{$value}}, $attr;
4647     } else {
4648     $self->{id}->{$value} = [$attr];
4649     }
4650     } else {
4651     ## NOTE: MUST contain at least one character
4652     $self->{onerror}->(node => $attr, type => 'empty attribute value');
4653     }
4654     if ($value =~ /[\x09-\x0D\x20]/) {
4655     $self->{onerror}->(node => $attr, type => 'space in ID');
4656     }
4657     $self->{menu}->{$value} ||= $attr;
4658     ## ISSUE: <menu id=""><p contextmenu=""> match?
4659     },
4660     label => sub { }, ## NOTE: No conformance creteria
4661     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
4662 wakaba 1.49 }, {
4663     %HTMLAttrStatus,
4664     %HTMLM12NCommonAttrStatus,
4665 wakaba 1.61 align => FEATURE_HTML2X_RFC,
4666 wakaba 1.50 autosubmit => FEATURE_HTML5_WD,
4667 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
4668 wakaba 1.50 label => FEATURE_HTML5_WD,
4669     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4670 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
4671     sdapref => FEATURE_HTML20_RFC,
4672 wakaba 1.50 type => FEATURE_HTML5_WD,
4673 wakaba 1.1 }),
4674 wakaba 1.40 check_start => sub {
4675     my ($self, $item, $element_state) = @_;
4676     $element_state->{phase} = 'li or phrasing';
4677     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
4678     $self->{flag}->{in_menu} = 1;
4679     },
4680     check_child_element => sub {
4681     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4682     $child_is_transparent, $element_state) = @_;
4683     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4684     $self->{onerror}->(node => $child_el,
4685     type => 'element not allowed:minus',
4686     level => $self->{must_level});
4687     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4688     #
4689     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
4690     if ($element_state->{phase} eq 'li') {
4691     #
4692     } elsif ($element_state->{phase} eq 'li or phrasing') {
4693     $element_state->{phase} = 'li';
4694     } else {
4695     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4696     }
4697     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4698     if ($element_state->{phase} eq 'phrasing') {
4699     #
4700     } elsif ($element_state->{phase} eq 'li or phrasing') {
4701     $element_state->{phase} = 'phrasing';
4702     } else {
4703     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4704     }
4705     } else {
4706     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4707     }
4708     },
4709     check_child_text => sub {
4710     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4711     if ($has_significant) {
4712     if ($element_state->{phase} eq 'phrasing') {
4713     #
4714     } elsif ($element_state->{phase} eq 'li or phrasing') {
4715     $element_state->{phase} = 'phrasing';
4716     } else {
4717     $self->{onerror}->(node => $child_node,
4718     type => 'character not allowed');
4719 wakaba 1.1 }
4720     }
4721 wakaba 1.40 },
4722     check_end => sub {
4723     my ($self, $item, $element_state) = @_;
4724     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
4725    
4726     if ($element_state->{phase} eq 'li') {
4727     $HTMLChecker{check_end}->(@_);
4728     } else { # 'phrasing' or 'li or phrasing'
4729     $HTMLPhrasingContentChecker{check_end}->(@_);
4730 wakaba 1.1 }
4731     },
4732 wakaba 1.8 };
4733    
4734     $Element->{$HTML_NS}->{datatemplate} = {
4735 wakaba 1.40 %HTMLChecker,
4736 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4737 wakaba 1.40 check_child_element => sub {
4738     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4739     $child_is_transparent, $element_state) = @_;
4740     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4741     $self->{onerror}->(node => $child_el,
4742     type => 'element not allowed:minus',
4743     level => $self->{must_level});
4744     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4745     #
4746     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
4747     #
4748     } else {
4749     $self->{onerror}->(node => $child_el,
4750     type => 'element not allowed:datatemplate');
4751     }
4752     },
4753     check_child_text => sub {
4754     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4755     if ($has_significant) {
4756     $self->{onerror}->(node => $child_node, type => 'character not allowed');
4757 wakaba 1.8 }
4758     },
4759     is_xml_root => 1,
4760     };
4761    
4762     $Element->{$HTML_NS}->{rule} = {
4763 wakaba 1.40 %HTMLChecker,
4764 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4765 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4766 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
4767 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
4768 wakaba 1.50 }, {
4769     %HTMLAttrStatus,
4770     condition => FEATURE_HTML5_AT_RISK,
4771     mode => FEATURE_HTML5_AT_RISK,
4772 wakaba 1.8 }),
4773 wakaba 1.40 check_start => sub {
4774     my ($self, $item, $element_state) = @_;
4775     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
4776     },
4777     check_child_element => sub { },
4778     check_child_text => sub { },
4779     check_end => sub {
4780     my ($self, $item, $element_state) = @_;
4781     $self->_remove_plus_elements ($element_state);
4782     $HTMLChecker{check_end}->(@_);
4783 wakaba 1.8 },
4784     ## NOTE: "MAY be anything that, when the parent |datatemplate|
4785     ## is applied to some conforming data, results in a conforming DOM tree.":
4786     ## We don't check against this.
4787     };
4788    
4789     $Element->{$HTML_NS}->{nest} = {
4790 wakaba 1.40 %HTMLEmptyChecker,
4791 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4792 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4793 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
4794     mode => sub {
4795     my ($self, $attr) = @_;
4796     my $value = $attr->value;
4797     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
4798     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
4799     }
4800     },
4801 wakaba 1.50 }, {
4802     %HTMLAttrStatus,
4803     filter => FEATURE_HTML5_AT_RISK,
4804     mode => FEATURE_HTML5_AT_RISK,
4805 wakaba 1.8 }),
4806 wakaba 1.1 };
4807    
4808     $Element->{$HTML_NS}->{legend} = {
4809 wakaba 1.40 %HTMLPhrasingContentChecker,
4810 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4811 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4812 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4813 wakaba 1.52 # align => $GetHTMLEnumeratedAttrChecker->({
4814     # top => 1, bottom => 1, left => 1, right => 1,
4815     # }),
4816     }, {
4817 wakaba 1.49 %HTMLAttrStatus,
4818     %HTMLM12NCommonAttrStatus,
4819     accesskey => FEATURE_M12N10_REC,
4820     align => FEATURE_M12N10_REC_DEPRECATED,
4821 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4822 wakaba 1.49 }),
4823 wakaba 1.1 };
4824    
4825     $Element->{$HTML_NS}->{div} = {
4826 wakaba 1.40 %HTMLProseContentChecker,
4827 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4828     check_attrs => $GetHTMLAttrsChecker->({}, {
4829     %HTMLAttrStatus,
4830     %HTMLM12NCommonAttrStatus,
4831     align => FEATURE_M12N10_REC_DEPRECATED,
4832     datafld => FEATURE_HTML4_REC_RESERVED,
4833     dataformatas => FEATURE_HTML4_REC_RESERVED,
4834     datasrc => FEATURE_HTML4_REC_RESERVED,
4835 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4836 wakaba 1.49 }),
4837 wakaba 1.66 check_start => sub {
4838     my ($self, $item, $element_state) = @_;
4839    
4840     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4841     },
4842 wakaba 1.1 };
4843    
4844 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
4845     %HTMLProseContentChecker,
4846     status => FEATURE_M12N10_REC_DEPRECATED,
4847     check_attrs => $GetHTMLAttrsChecker->({}, {
4848     %HTMLAttrStatus,
4849     %HTMLM12NCommonAttrStatus,
4850     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4851     }),
4852     };
4853    
4854 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
4855 wakaba 1.40 %HTMLTransparentChecker,
4856 wakaba 1.50 status => FEATURE_HTML5_AT_RISK | FEATURE_M12N10_REC_DEPRECATED,
4857 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({ ## TODO
4858     }, {
4859     %HTMLAttrStatus,
4860 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4861 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
4862 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4863 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
4864 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4865     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4866 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
4867 wakaba 1.50 style => FEATURE_HTML5_AT_RISK | FEATURE_XHTML10_REC,
4868     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4869 wakaba 1.49 }),
4870 wakaba 1.1 };
4871 wakaba 1.49
4872 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
4873     %HTMLEmptyChecker,
4874     status => FEATURE_M12N10_REC_DEPRECATED,
4875     check_attrs => $GetHTMLAttrsChecker->({
4876     ## TODO: color, face, size
4877     }, {
4878     %HTMLAttrStatus,
4879     color => FEATURE_M12N10_REC_DEPRECATED,
4880     face => FEATURE_M12N10_REC_DEPRECATED,
4881     #id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
4882     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4883     size => FEATURE_M12N10_REC_DEPRECATED,
4884     }),
4885     };
4886    
4887 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
4888     ## class title id cols rows onload onunload style(x10)
4889     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
4890     ## noframes Common, lang(xhtml10)
4891    
4892     ## TODO: CR: ruby rb rt rp rbc rtc @rbspan
4893 wakaba 1.56
4894 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
4895     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
4896     ## xmp, listing sdapref[HTML2,0]
4897    
4898 wakaba 1.56 =pod
4899    
4900     WF2: Documents MUST comply to [CHARMOD].
4901     WF2: Vencor extensions MUST NOT be used.
4902    
4903 wakaba 1.61 HTML 2.0 nextid @n
4904    
4905     RFC 2659: CERTS CRYPTOPTS
4906    
4907     ISO-HTML: pre-html, divN
4908    
4909 wakaba 1.56 =cut
4910 wakaba 1.61
4911     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
4912     ## We added them only to |a|. |link| and |form| might also allow them
4913     ## in theory.
4914 wakaba 1.1
4915     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
4916    
4917     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24